1 /* Subroutines used for code generation on the Tilera TILE-Gx.
2    Copyright (C) 2011-2016 Free Software Foundation, Inc.
3    Contributed by Walter Lee (walt@tilera.com)
4 
5    This file is part of GCC.
6 
7    GCC is free software; you can redistribute it and/or modify it
8    under the terms of the GNU General Public License as published
9    by the Free Software Foundation; either version 3, or (at your
10    option) any later version.
11 
12    GCC is distributed in the hope that it will be useful, but WITHOUT
13    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
15    License for more details.
16 
17    You should have received a copy of the GNU General Public License
18    along with GCC; see the file COPYING3.  If not see
19    <http://www.gnu.org/licenses/>.  */
20 
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "target.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "gimple.h"
29 #include "df.h"
30 #include "tm_p.h"
31 #include "stringpool.h"
32 #include "expmed.h"
33 #include "optabs.h"
34 #include "regs.h"
35 #include "emit-rtl.h"
36 #include "recog.h"
37 #include "diagnostic.h"
38 #include "output.h"
39 #include "insn-attr.h"
40 #include "alias.h"
41 #include "explow.h"
42 #include "calls.h"
43 #include "varasm.h"
44 #include "expr.h"
45 #include "langhooks.h"
46 #include "cfgrtl.h"
47 #include "tm-constrs.h"
48 #include "dwarf2.h"
49 #include "fold-const.h"
50 #include "stor-layout.h"
51 #include "gimplify.h"
52 #include "tilegx-builtins.h"
53 #include "tilegx-multiply.h"
54 #include "builtins.h"
55 
56 /* This file should be included last.  */
57 #include "target-def.h"
58 
59 /* SYMBOL_REF for GOT */
60 static GTY(()) rtx g_got_symbol = NULL;
61 
62 /* Report whether we're printing out the first address fragment of a
63    POST_INC or POST_DEC memory reference, from TARGET_PRINT_OPERAND to
64    TARGET_PRINT_OPERAND_ADDRESS.  */
65 static bool output_memory_autoinc_first;
66 
67 
68 
69 /* Option handling  */
70 
71 /* Implement TARGET_OPTION_OVERRIDE.  */
72 static void
tilegx_option_override(void)73 tilegx_option_override (void)
74 {
75   if (global_options_set.x_tilegx_cmodel)
76     {
77       switch (tilegx_cmodel)
78 	{
79 	case CM_SMALL:
80 	case CM_SMALL_PIC:
81 	  if (flag_pic)
82 	    tilegx_cmodel = CM_SMALL_PIC;
83 	  break;
84 
85 	case CM_LARGE:
86 	case CM_LARGE_PIC:
87 	  if (flag_pic)
88 	    tilegx_cmodel = CM_LARGE_PIC;
89 	  break;
90 
91 	default:
92 	  gcc_unreachable ();
93 	}
94     }
95   else
96     tilegx_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
97 
98   /* When modulo scheduling is enabled, we still rely on regular
99      scheduler for bundling.  */
100   if (flag_modulo_sched)
101     flag_resched_modulo_sched = 1;
102 }
103 
104 
105 
106 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.  */
107 static bool
tilegx_scalar_mode_supported_p(machine_mode mode)108 tilegx_scalar_mode_supported_p (machine_mode mode)
109 {
110   switch (mode)
111     {
112     case QImode:
113     case HImode:
114     case SImode:
115     case DImode:
116     case TImode:
117       return true;
118 
119     case SFmode:
120     case DFmode:
121       return true;
122 
123     default:
124       return false;
125     }
126 }
127 
128 
129 /* Implement TARGET_VECTOR_MODE_SUPPORTED_P.  */
130 static bool
tilegx_vector_mode_supported_p(machine_mode mode)131 tilegx_vector_mode_supported_p (machine_mode mode)
132 {
133   return mode == V8QImode || mode == V4HImode || mode == V2SImode;
134 }
135 
136 
137 /* Implement TARGET_CANNOT_FORCE_CONST_MEM.  */
138 static bool
tilegx_cannot_force_const_mem(machine_mode mode ATTRIBUTE_UNUSED,rtx x ATTRIBUTE_UNUSED)139 tilegx_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED,
140 			       rtx x ATTRIBUTE_UNUSED)
141 {
142   return true;
143 }
144 
145 
146 /* Implement TARGET_FUNCTION_OK_FOR_SIBCALL.  */
147 static bool
tilegx_function_ok_for_sibcall(tree decl,tree exp ATTRIBUTE_UNUSED)148 tilegx_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
149 {
150   return (tilegx_cmodel != CM_LARGE && tilegx_cmodel != CM_LARGE_PIC
151 	  && (decl != NULL));
152 }
153 
154 
155 /* Implement TARGET_PASS_BY_REFERENCE.  Variable sized types are
156    passed by reference.  */
157 static bool
tilegx_pass_by_reference(cumulative_args_t cum ATTRIBUTE_UNUSED,machine_mode mode ATTRIBUTE_UNUSED,const_tree type,bool named ATTRIBUTE_UNUSED)158 tilegx_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
159 			  machine_mode mode ATTRIBUTE_UNUSED,
160 			  const_tree type, bool named ATTRIBUTE_UNUSED)
161 {
162   return (type && TYPE_SIZE (type)
163 	  && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST);
164 }
165 
166 
167 /* Implement TARGET_RETURN_IN_MSB.  We return a value in the most
168    significant part of a register if:
169    - the target is big-endian; and
170    - the value has an aggregate type (e.g., structure or union).  */
171 static bool
tilegx_return_in_msb(const_tree valtype)172 tilegx_return_in_msb (const_tree valtype)
173 {
174   return (TARGET_BIG_ENDIAN && AGGREGATE_TYPE_P (valtype));
175 }
176 
177 
178 /* Implement TARGET_RETURN_IN_MEMORY.  */
179 static bool
tilegx_return_in_memory(const_tree type,const_tree fndecl ATTRIBUTE_UNUSED)180 tilegx_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
181 {
182   return !IN_RANGE (int_size_in_bytes (type),
183 		    0, TILEGX_NUM_RETURN_REGS * UNITS_PER_WORD);
184 }
185 
186 
187 /* Implement TARGET_MODE_REP_EXTENDED.  */
188 static int
tilegx_mode_rep_extended(machine_mode mode,machine_mode mode_rep)189 tilegx_mode_rep_extended (machine_mode mode, machine_mode mode_rep)
190 {
191   /* SImode register values are sign-extended to DImode.  */
192   if (mode == SImode && mode_rep == DImode)
193     return SIGN_EXTEND;
194 
195   return UNKNOWN;
196 }
197 
198 
199 /* Implement TARGET_FUNCTION_ARG_BOUNDARY.  */
200 static unsigned int
tilegx_function_arg_boundary(machine_mode mode,const_tree type)201 tilegx_function_arg_boundary (machine_mode mode, const_tree type)
202 {
203   unsigned int alignment;
204 
205   alignment = type ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode);
206   if (alignment < PARM_BOUNDARY)
207     alignment = PARM_BOUNDARY;
208   if (alignment > STACK_BOUNDARY)
209     alignment = STACK_BOUNDARY;
210   return alignment;
211 }
212 
213 
214 /* Implement TARGET_FUNCTION_ARG.  */
215 static rtx
tilegx_function_arg(cumulative_args_t cum_v,machine_mode mode,const_tree type,bool named ATTRIBUTE_UNUSED)216 tilegx_function_arg (cumulative_args_t cum_v,
217 		     machine_mode mode,
218 		     const_tree type, bool named ATTRIBUTE_UNUSED)
219 {
220   CUMULATIVE_ARGS cum = *get_cumulative_args (cum_v);
221   int byte_size = ((mode == BLKmode)
222 		   ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
223   bool doubleword_aligned_p;
224 
225   if (cum >= TILEGX_NUM_ARG_REGS)
226     return NULL_RTX;
227 
228   /* See whether the argument has doubleword alignment.  */
229   doubleword_aligned_p =
230     tilegx_function_arg_boundary (mode, type) > BITS_PER_WORD;
231 
232   if (doubleword_aligned_p)
233     cum += cum & 1;
234 
235   /* The ABI does not allow parameters to be passed partially in reg
236      and partially in stack.  */
237   if ((cum + (byte_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
238       > TILEGX_NUM_ARG_REGS)
239     return NULL_RTX;
240 
241   return gen_rtx_REG (mode, cum);
242 }
243 
244 
245 /* Implement TARGET_FUNCTION_ARG_ADVANCE.  */
246 static void
tilegx_function_arg_advance(cumulative_args_t cum_v,machine_mode mode,const_tree type,bool named ATTRIBUTE_UNUSED)247 tilegx_function_arg_advance (cumulative_args_t cum_v,
248 			     machine_mode mode,
249 			     const_tree type, bool named ATTRIBUTE_UNUSED)
250 {
251   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
252 
253   int byte_size = ((mode == BLKmode)
254 		   ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
255   int word_size = (byte_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
256   bool doubleword_aligned_p;
257 
258   /* See whether the argument has doubleword alignment.  */
259   doubleword_aligned_p =
260     tilegx_function_arg_boundary (mode, type) > BITS_PER_WORD;
261 
262   if (doubleword_aligned_p)
263     *cum += *cum & 1;
264 
265   /* If the current argument does not fit in the pretend_args space,
266      skip over it.  */
267   if (*cum < TILEGX_NUM_ARG_REGS
268       && *cum + word_size > TILEGX_NUM_ARG_REGS)
269     *cum = TILEGX_NUM_ARG_REGS;
270 
271   *cum += word_size;
272 }
273 
274 
275 /* Implement TARGET_FUNCTION_VALUE.  */
276 static rtx
tilegx_function_value(const_tree valtype,const_tree fn_decl_or_type,bool outgoing ATTRIBUTE_UNUSED)277 tilegx_function_value (const_tree valtype, const_tree fn_decl_or_type,
278 		       bool outgoing ATTRIBUTE_UNUSED)
279 {
280   machine_mode mode;
281   int unsigned_p;
282 
283   mode = TYPE_MODE (valtype);
284   unsigned_p = TYPE_UNSIGNED (valtype);
285 
286   mode = promote_function_mode (valtype, mode, &unsigned_p,
287 				fn_decl_or_type, 1);
288 
289   return gen_rtx_REG (mode, 0);
290 }
291 
292 
293 /* Implement TARGET_LIBCALL_VALUE.  */
294 static rtx
tilegx_libcall_value(machine_mode mode,const_rtx fun ATTRIBUTE_UNUSED)295 tilegx_libcall_value (machine_mode mode,
296 		       const_rtx fun ATTRIBUTE_UNUSED)
297 {
298   return gen_rtx_REG (mode, 0);
299 }
300 
301 
302 /* Implement FUNCTION_VALUE_REGNO_P.  */
303 static bool
tilegx_function_value_regno_p(const unsigned int regno)304 tilegx_function_value_regno_p (const unsigned int regno)
305 {
306   return regno < TILEGX_NUM_RETURN_REGS;
307 }
308 
309 
310 /* Implement TARGET_BUILD_BUILTIN_VA_LIST.  */
311 static tree
tilegx_build_builtin_va_list(void)312 tilegx_build_builtin_va_list (void)
313 {
314   tree f_args, f_skip, record, type_decl;
315   bool owp;
316 
317   record = lang_hooks.types.make_type (RECORD_TYPE);
318 
319   type_decl = build_decl (BUILTINS_LOCATION, TYPE_DECL,
320 			  get_identifier ("__va_list_tag"), record);
321 
322   f_args = build_decl (BUILTINS_LOCATION, FIELD_DECL,
323 		       get_identifier ("__args"), ptr_type_node);
324   f_skip = build_decl (BUILTINS_LOCATION, FIELD_DECL,
325 		       get_identifier ("__skip"), ptr_type_node);
326 
327   DECL_FIELD_CONTEXT (f_args) = record;
328 
329   DECL_FIELD_CONTEXT (f_skip) = record;
330 
331   TREE_CHAIN (record) = type_decl;
332   TYPE_NAME (record) = type_decl;
333   TYPE_FIELDS (record) = f_args;
334   TREE_CHAIN (f_args) = f_skip;
335 
336   /* We know this is being padded and we want it too.  It is an
337      internal type so hide the warnings from the user.  */
338   owp = warn_padded;
339   warn_padded = false;
340 
341   layout_type (record);
342 
343   warn_padded = owp;
344 
345   /* The correct type is an array type of one element.  */
346   return record;
347 }
348 
349 
350 /* Implement TARGET_EXPAND_BUILTIN_VA_START.  */
351 static void
tilegx_va_start(tree valist,rtx nextarg ATTRIBUTE_UNUSED)352 tilegx_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
353 {
354   tree f_args, f_skip;
355   tree args, skip, t;
356 
357   f_args = TYPE_FIELDS (TREE_TYPE (valist));
358   f_skip = TREE_CHAIN (f_args);
359 
360   args =
361     build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
362   skip =
363     build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
364 
365   /* Find the __args area.  */
366   t = make_tree (TREE_TYPE (args), virtual_incoming_args_rtx);
367   t = fold_build_pointer_plus_hwi (t,
368 				   UNITS_PER_WORD *
369 				   (crtl->args.info - TILEGX_NUM_ARG_REGS));
370 
371   if (crtl->args.pretend_args_size > 0)
372     t = fold_build_pointer_plus_hwi (t, -STACK_POINTER_OFFSET);
373 
374   t = build2 (MODIFY_EXPR, TREE_TYPE (args), args, t);
375   TREE_SIDE_EFFECTS (t) = 1;
376   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
377 
378   /* Find the __skip area.  */
379   t = make_tree (TREE_TYPE (skip), virtual_incoming_args_rtx);
380   t = fold_build_pointer_plus_hwi (t, -STACK_POINTER_OFFSET);
381   t = build2 (MODIFY_EXPR, TREE_TYPE (skip), skip, t);
382   TREE_SIDE_EFFECTS (t) = 1;
383   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
384 }
385 
386 
387 /* Implement TARGET_SETUP_INCOMING_VARARGS.  */
388 static void
tilegx_setup_incoming_varargs(cumulative_args_t cum,machine_mode mode,tree type,int * pretend_args,int no_rtl)389 tilegx_setup_incoming_varargs (cumulative_args_t cum,
390 			       machine_mode mode,
391 			       tree type, int *pretend_args, int no_rtl)
392 {
393   CUMULATIVE_ARGS local_cum = *get_cumulative_args (cum);
394   int first_reg;
395 
396   /* The caller has advanced CUM up to, but not beyond, the last named
397      argument.  Advance a local copy of CUM past the last "real" named
398      argument, to find out how many registers are left over.  */
399   targetm.calls.function_arg_advance (pack_cumulative_args (&local_cum),
400 				      mode, type, true);
401   first_reg = local_cum;
402 
403   if (local_cum < TILEGX_NUM_ARG_REGS)
404     {
405       *pretend_args = UNITS_PER_WORD * (TILEGX_NUM_ARG_REGS - first_reg);
406 
407       if (!no_rtl)
408 	{
409 	  alias_set_type set = get_varargs_alias_set ();
410 	  rtx tmp =
411 	    gen_rtx_MEM (BLKmode, plus_constant (Pmode,
412 						 virtual_incoming_args_rtx,
413 						 -STACK_POINTER_OFFSET -
414 						 UNITS_PER_WORD *
415 						 (TILEGX_NUM_ARG_REGS -
416 						  first_reg)));
417 	  MEM_NOTRAP_P (tmp) = 1;
418 	  set_mem_alias_set (tmp, set);
419 	  move_block_from_reg (first_reg, tmp,
420 			       TILEGX_NUM_ARG_REGS - first_reg);
421 	}
422     }
423   else
424     *pretend_args = 0;
425 }
426 
427 
428 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR.  Gimplify va_arg by updating
429    the va_list structure VALIST as required to retrieve an argument of
430    type TYPE, and returning that argument.
431 
432    ret = va_arg(VALIST, TYPE);
433 
434    generates code equivalent to:
435 
436     paddedsize = (sizeof(TYPE) + 7) & -8;
437     if (  (VALIST.__args + paddedsize > VALIST.__skip)
438 	& (VALIST.__args <= VALIST.__skip))
439       addr = VALIST.__skip + STACK_POINTER_OFFSET;
440     else
441       addr = VALIST.__args;
442     VALIST.__args = addr + paddedsize;
443     if (BYTES_BIG_ENDIAN)
444       ret = *(TYPE *)(addr + paddedsize - sizeof(TYPE));
445     else
446       ret = *(TYPE *)addr;
447  */
448 static tree
tilegx_gimplify_va_arg_expr(tree valist,tree type,gimple_seq * pre_p,gimple_seq * post_p ATTRIBUTE_UNUSED)449 tilegx_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
450 			     gimple_seq *post_p ATTRIBUTE_UNUSED)
451 {
452   tree f_args, f_skip;
453   tree args, skip;
454   HOST_WIDE_INT size, rsize;
455   tree addr, tmp;
456   bool pass_by_reference_p;
457 
458   f_args = TYPE_FIELDS (va_list_type_node);
459   f_skip = TREE_CHAIN (f_args);
460 
461   args =
462     build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
463   skip =
464     build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
465 
466   addr = create_tmp_var (ptr_type_node, "va_arg");
467 
468   /* If an object is dynamically sized, a pointer to it is passed
469      instead of the object itself.  */
470   pass_by_reference_p = pass_by_reference (NULL, TYPE_MODE (type), type,
471 					   false);
472 
473   if (pass_by_reference_p)
474     type = build_pointer_type (type);
475 
476   size = int_size_in_bytes (type);
477   rsize = ((size + UNITS_PER_WORD - 1) / UNITS_PER_WORD) * UNITS_PER_WORD;
478 
479   /* If the alignment of the type is greater than the default for a
480      parameter, align to the STACK_BOUNDARY. */
481   if (TYPE_ALIGN (type) > PARM_BOUNDARY)
482     {
483       /* Assert the only case we generate code for: when
484 	 stack boundary = 2 * parm boundary. */
485       gcc_assert (STACK_BOUNDARY == PARM_BOUNDARY * 2);
486 
487       tmp = build2 (BIT_AND_EXPR, sizetype,
488 		    fold_convert (sizetype, unshare_expr (args)),
489 		    size_int (PARM_BOUNDARY / 8));
490       tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node,
491 		    unshare_expr (args), tmp);
492 
493       gimplify_assign (unshare_expr (args), tmp, pre_p);
494     }
495 
496   /* Build conditional expression to calculate addr. The expression
497      will be gimplified later.  */
498   tmp = fold_build_pointer_plus_hwi (unshare_expr (args), rsize);
499   tmp = build2 (TRUTH_AND_EXPR, boolean_type_node,
500 		build2 (GT_EXPR, boolean_type_node, tmp, unshare_expr (skip)),
501 		build2 (LE_EXPR, boolean_type_node, unshare_expr (args),
502 			unshare_expr (skip)));
503 
504   tmp = build3 (COND_EXPR, ptr_type_node, tmp,
505 		build2 (POINTER_PLUS_EXPR, ptr_type_node, unshare_expr (skip),
506 			size_int (STACK_POINTER_OFFSET)),
507 		unshare_expr (args));
508 
509   /* Adjust the address of va_arg if it is in big endian mode.  */
510   if (BYTES_BIG_ENDIAN && rsize > size)
511     tmp = fold_build_pointer_plus_hwi (tmp, rsize - size);
512   gimplify_assign (addr, tmp, pre_p);
513 
514   /* Update VALIST.__args.  */
515 
516   if (BYTES_BIG_ENDIAN && rsize > size)
517     tmp = fold_build_pointer_plus_hwi (addr, size);
518   else
519     tmp = fold_build_pointer_plus_hwi (addr, rsize);
520   gimplify_assign (unshare_expr (args), tmp, pre_p);
521 
522   addr = fold_convert (build_pointer_type (type), addr);
523 
524   if (pass_by_reference_p)
525     addr = build_va_arg_indirect_ref (addr);
526 
527   return build_va_arg_indirect_ref (addr);
528 }
529 
530 
531 
532 /* Implement TARGET_RTX_COSTS.  */
533 static bool
tilegx_rtx_costs(rtx x,machine_mode mode,int outer_code,int opno,int * total,bool speed)534 tilegx_rtx_costs (rtx x, machine_mode mode, int outer_code, int opno,
535 		  int *total, bool speed)
536 {
537   int code = GET_CODE (x);
538 
539   switch (code)
540     {
541     case CONST_INT:
542       /* If this is an 8-bit constant, return zero since it can be
543 	 used nearly anywhere with no cost.  If it is a valid operand
544 	 for an ADD or AND, likewise return 0 if we know it will be
545 	 used in that context.  Otherwise, return 2 since it might be
546 	 used there later.  All other constants take at least two
547 	 insns.  */
548       if (satisfies_constraint_I (x))
549 	{
550 	  *total = 0;
551 	  return true;
552 	}
553       else if (outer_code == PLUS && add_operand (x, VOIDmode))
554 	{
555 	  /* Slightly penalize large constants even though we can add
556 	     them in one instruction, because it forces the use of
557 	     2-wide bundling mode.  */
558 	  *total = 1;
559 	  return true;
560 	}
561       else if (move_operand (x, SImode))
562 	{
563 	  /* We can materialize in one move.  */
564 	  *total = COSTS_N_INSNS (1);
565 	  return true;
566 	}
567       else
568 	{
569 	  /* We can materialize in two moves.  */
570 	  *total = COSTS_N_INSNS (2);
571 	  return true;
572 	}
573 
574       return false;
575 
576     case CONST:
577     case LABEL_REF:
578     case SYMBOL_REF:
579       *total = COSTS_N_INSNS (2);
580       return true;
581 
582     case CONST_DOUBLE:
583       *total = COSTS_N_INSNS (4);
584       return true;
585 
586     case HIGH:
587       *total = 0;
588       return true;
589 
590     case MEM:
591       /* If outer-code was a sign or zero extension, a cost of
592 	 COSTS_N_INSNS (1) was already added in, so account for
593 	 that.  */
594       if (outer_code == ZERO_EXTEND || outer_code == SIGN_EXTEND)
595 	*total = COSTS_N_INSNS (1);
596       else
597 	*total = COSTS_N_INSNS (2);
598       return true;
599 
600     case PLUS:
601       /* Convey that shl[123]add are efficient.  */
602       if (GET_CODE (XEXP (x, 0)) == MULT
603 	  && cint_248_operand (XEXP (XEXP (x, 0), 1), VOIDmode))
604 	{
605 	  *total = (rtx_cost (XEXP (XEXP (x, 0), 0), mode,
606 			      (enum rtx_code) outer_code, opno, speed)
607 		    + rtx_cost (XEXP (x, 1), mode,
608 				(enum rtx_code) outer_code, opno, speed)
609 		    + COSTS_N_INSNS (1));
610 	  return true;
611 	}
612       return false;
613 
614     case MULT:
615       *total = COSTS_N_INSNS (2);
616       return false;
617 
618     case DIV:
619     case UDIV:
620     case MOD:
621     case UMOD:
622       /* These are handled by software and are very expensive.  */
623       *total = COSTS_N_INSNS (100);
624       return false;
625 
626     case UNSPEC:
627     case UNSPEC_VOLATILE:
628       {
629 	int num = XINT (x, 1);
630 
631 	if (num <= TILEGX_LAST_LATENCY_1_INSN)
632 	  *total = COSTS_N_INSNS (1);
633 	else if (num <= TILEGX_LAST_LATENCY_2_INSN)
634 	  *total = COSTS_N_INSNS (2);
635 	else if (num > TILEGX_LAST_LATENCY_INSN)
636 	  {
637 	    if (num == UNSPEC_NON_TEMPORAL)
638 	      {
639 		/* These are basically loads.  */
640 		if (outer_code == ZERO_EXTEND || outer_code == SIGN_EXTEND)
641 		  *total = COSTS_N_INSNS (1);
642 		else
643 		  *total = COSTS_N_INSNS (2);
644 	      }
645 	    else
646 	      {
647 		if (outer_code == PLUS)
648 		  *total = 0;
649 		else
650 		  *total = COSTS_N_INSNS (1);
651 	      }
652 	  }
653 	else
654 	  {
655 	    switch (num)
656 	      {
657 	      case UNSPEC_BLOCKAGE:
658 	      case UNSPEC_NETWORK_BARRIER:
659 	      case UNSPEC_ATOMIC:
660 		*total = 0;
661 		break;
662 
663 	      case UNSPEC_LNK_AND_LABEL:
664 	      case UNSPEC_MF:
665 	      case UNSPEC_MOV_PCREL_STEP3:
666 	      case UNSPEC_NETWORK_RECEIVE:
667 	      case UNSPEC_NETWORK_SEND:
668 	      case UNSPEC_SPR_MOVE:
669 	      case UNSPEC_TLS_GD_ADD:
670 		*total = COSTS_N_INSNS (1);
671 		break;
672 
673 	      case UNSPEC_TLS_IE_LOAD:
674 	      case UNSPEC_XCHG:
675 		*total = COSTS_N_INSNS (2);
676 		break;
677 
678 	      case UNSPEC_SP_SET:
679 		*total = COSTS_N_INSNS (3);
680 		break;
681 
682 	      case UNSPEC_SP_TEST:
683 		*total = COSTS_N_INSNS (4);
684 		break;
685 
686 	      case UNSPEC_CMPXCHG:
687 	      case UNSPEC_INSN_CMPEXCH:
688 	      case UNSPEC_LATENCY_L2:
689 		*total = COSTS_N_INSNS (11);
690 		break;
691 
692 	      case UNSPEC_TLS_GD_CALL:
693 		*total = COSTS_N_INSNS (30);
694 		break;
695 
696 	      case UNSPEC_LATENCY_MISS:
697 		*total = COSTS_N_INSNS (80);
698 		break;
699 
700 	      default:
701 		*total = COSTS_N_INSNS (1);
702 	      }
703 	  }
704 	return true;
705       }
706 
707     default:
708       return false;
709     }
710 }
711 
712 
713 
714 /* Rtl lowering.  */
715 
716 /* Create a temporary variable to hold a partial result, to enable
717    CSE.  */
718 static rtx
create_temp_reg_if_possible(machine_mode mode,rtx default_reg)719 create_temp_reg_if_possible (machine_mode mode, rtx default_reg)
720 {
721   return can_create_pseudo_p () ? gen_reg_rtx (mode) : default_reg;
722 }
723 
724 
725 /* Functions to save and restore machine-specific function data.  */
726 static struct machine_function *
tilegx_init_machine_status(void)727 tilegx_init_machine_status (void)
728 {
729   return ggc_cleared_alloc<machine_function> ();
730 }
731 
732 
733 /* Do anything needed before RTL is emitted for each function.  */
734 void
tilegx_init_expanders(void)735 tilegx_init_expanders (void)
736 {
737   /* Arrange to initialize and mark the machine per-function
738      status.  */
739   init_machine_status = tilegx_init_machine_status;
740 
741   if (cfun && cfun->machine && flag_pic)
742     {
743       static int label_num = 0;
744 
745       char text_label_name[32];
746 
747       struct machine_function *machine = cfun->machine;
748 
749       ASM_GENERATE_INTERNAL_LABEL (text_label_name, "L_PICLNK", label_num++);
750 
751       machine->text_label_symbol =
752 	gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (text_label_name));
753 
754       machine->text_label_rtx =
755 	gen_rtx_REG (Pmode, TILEGX_PIC_TEXT_LABEL_REGNUM);
756 
757       machine->got_rtx = gen_rtx_REG (Pmode, PIC_OFFSET_TABLE_REGNUM);
758 
759       machine->calls_tls_get_addr = false;
760     }
761 }
762 
763 
764 /* Implement TARGET_EXPAND_TO_RTL_HOOK.  */
765 static void
tilegx_expand_to_rtl_hook(void)766 tilegx_expand_to_rtl_hook (void)
767 {
768   /* Exclude earlier sets of crtl->uses_pic_offset_table, because we
769      only care about uses actually emitted.  */
770   crtl->uses_pic_offset_table = 0;
771 }
772 
773 
774 /* Implement TARGET_SHIFT_TRUNCATION_MASK.  DImode shifts use the mode
775    matching insns and therefore guarantee that the shift count is
776    modulo 64.  SImode shifts sometimes use the 64 bit version so do
777    not hold such guarantee.  */
778 static unsigned HOST_WIDE_INT
tilegx_shift_truncation_mask(machine_mode mode)779 tilegx_shift_truncation_mask (machine_mode mode)
780 {
781   return mode == DImode ? 63 : 0;
782 }
783 
784 
785 /* Implement TARGET_INIT_LIBFUNCS.  */
786 static void
tilegx_init_libfuncs(void)787 tilegx_init_libfuncs (void)
788 {
789   /* We need to explicitly generate these libfunc's to support
790      conversion of divide by constant to multiply (the divide stubs in
791      tilegx.md exist also for this reason).  Normally we'd expect gcc
792      to lazily generate them when they are needed, but for some reason
793      it's set up to only generate them if the mode is the word
794      mode.  */
795   set_optab_libfunc (sdiv_optab, SImode, "__divsi3");
796   set_optab_libfunc (udiv_optab, SImode, "__udivsi3");
797   set_optab_libfunc (smod_optab, SImode, "__modsi3");
798   set_optab_libfunc (umod_optab, SImode, "__umodsi3");
799 }
800 
801 
802 /* Return true if X contains a thread-local symbol.  */
803 static bool
tilegx_tls_referenced_p(rtx x)804 tilegx_tls_referenced_p (rtx x)
805 {
806   if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
807     x = XEXP (XEXP (x, 0), 0);
808 
809   if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x))
810     return true;
811 
812   /* That's all we handle in tilegx_legitimize_tls_address for
813      now.  */
814   return false;
815 }
816 
817 
818 /* Return true if X requires a scratch register.  It is given that
819    flag_pic is on and that X satisfies CONSTANT_P.  */
820 static int
tilegx_pic_address_needs_scratch(rtx x)821 tilegx_pic_address_needs_scratch (rtx x)
822 {
823   if (GET_CODE (x) == CONST
824       && GET_CODE (XEXP (x, 0)) == PLUS
825       && (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
826 	  || GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF)
827       && (CONST_INT_P (XEXP (XEXP (x, 0), 1))))
828     return true;
829 
830   return false;
831 }
832 
833 
834 /* Implement TARGET_LEGITIMATE_CONSTANT_P.  This is all constants for
835    which we are willing to load the value into a register via a move
836    pattern.  TLS cannot be treated as a constant because it can
837    include a function call.  */
838 static bool
tilegx_legitimate_constant_p(machine_mode mode ATTRIBUTE_UNUSED,rtx x)839 tilegx_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
840 {
841   switch (GET_CODE (x))
842     {
843     case CONST:
844     case SYMBOL_REF:
845       return !tilegx_tls_referenced_p (x);
846 
847     default:
848       return true;
849     }
850 }
851 
852 
853 /* Return true if the constant value X is a legitimate general operand
854    when generating PIC code.  It is given that flag_pic is on and that
855    X satisfies CONSTANT_P.  */
856 bool
tilegx_legitimate_pic_operand_p(rtx x)857 tilegx_legitimate_pic_operand_p (rtx x)
858 {
859   if (tilegx_pic_address_needs_scratch (x))
860     return false;
861 
862   if (tilegx_tls_referenced_p (x))
863     return false;
864 
865   return true;
866 }
867 
868 
869 /* Return true if the rtx X can be used as an address operand.  */
870 static bool
tilegx_legitimate_address_p(machine_mode ARG_UNUSED (mode),rtx x,bool strict)871 tilegx_legitimate_address_p (machine_mode ARG_UNUSED (mode), rtx x,
872 			     bool strict)
873 {
874   if (GET_CODE (x) == SUBREG)
875     x = SUBREG_REG (x);
876 
877   switch (GET_CODE (x))
878     {
879     case POST_INC:
880     case POST_DEC:
881       if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
882 	return false;
883 
884       x = XEXP (x, 0);
885       break;
886 
887     case POST_MODIFY:
888       if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
889 	return false;
890 
891       if (GET_CODE (XEXP (x, 1)) != PLUS)
892 	return false;
893 
894       if (!rtx_equal_p (XEXP (x, 0), XEXP (XEXP (x, 1), 0)))
895 	return false;
896 
897       if (!satisfies_constraint_I (XEXP (XEXP (x, 1), 1)))
898 	return false;
899 
900       x = XEXP (x, 0);
901       break;
902 
903     case REG:
904       break;
905 
906     default:
907       return false;
908     }
909 
910   /* Check if x is a valid reg.  */
911   if (!REG_P (x))
912     return false;
913 
914   if (strict)
915     return REGNO_OK_FOR_BASE_P (REGNO (x));
916   else
917     return true;
918 }
919 
920 
921 /* Return the rtx containing SYMBOL_REF to the text label.  */
922 static rtx
tilegx_text_label_symbol(void)923 tilegx_text_label_symbol (void)
924 {
925   return cfun->machine->text_label_symbol;
926 }
927 
928 
929 /* Return the register storing the value of the text label.  */
930 static rtx
tilegx_text_label_rtx(void)931 tilegx_text_label_rtx (void)
932 {
933   return cfun->machine->text_label_rtx;
934 }
935 
936 
937 /* Return the register storing the value of the global offset
938    table.  */
939 static rtx
tilegx_got_rtx(void)940 tilegx_got_rtx (void)
941 {
942   return cfun->machine->got_rtx;
943 }
944 
945 
946 /* Return the SYMBOL_REF for _GLOBAL_OFFSET_TABLE_.  */
947 static rtx
tilegx_got_symbol(void)948 tilegx_got_symbol (void)
949 {
950   if (g_got_symbol == NULL)
951     g_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
952 
953   return g_got_symbol;
954 }
955 
956 
957 /* Return a reference to the got to be used by tls references.  */
958 static rtx
tilegx_tls_got(void)959 tilegx_tls_got (void)
960 {
961   rtx temp;
962   if (flag_pic)
963     {
964       crtl->uses_pic_offset_table = 1;
965       return tilegx_got_rtx ();
966     }
967 
968   temp = gen_reg_rtx (Pmode);
969   emit_move_insn (temp, tilegx_got_symbol ());
970 
971   return temp;
972 }
973 
974 
975 /* ADDR contains a thread-local SYMBOL_REF.  Generate code to compute
976    this (thread-local) address.  */
977 static rtx
tilegx_legitimize_tls_address(rtx addr)978 tilegx_legitimize_tls_address (rtx addr)
979 {
980   rtx ret;
981 
982   gcc_assert (can_create_pseudo_p ());
983 
984   if (GET_CODE (addr) == SYMBOL_REF)
985     switch (SYMBOL_REF_TLS_MODEL (addr))
986       {
987       case TLS_MODEL_GLOBAL_DYNAMIC:
988       case TLS_MODEL_LOCAL_DYNAMIC:
989 	{
990 	  rtx r0, temp, temp2, temp3, got, last;
991 
992 	  ret = gen_reg_rtx (Pmode);
993 	  r0 = gen_rtx_REG (Pmode, 0);
994 	  temp = gen_reg_rtx (Pmode);
995 	  temp2 = gen_reg_rtx (Pmode);
996 	  temp3 = gen_reg_rtx (Pmode);
997 
998 	  got = tilegx_tls_got ();
999 	  if (TARGET_32BIT)
1000 	    {
1001 	      emit_insn (gen_mov_tls_gd_step1_32bit (temp, addr));
1002 	      emit_insn (gen_mov_tls_gd_step2_32bit (temp2, temp, addr));
1003 	      emit_insn (gen_tls_add_32bit (temp2, got, temp2, addr));
1004 	    }
1005 	  else
1006 	    {
1007 	      emit_insn (gen_mov_tls_gd_step1 (temp, addr));
1008 	      emit_insn (gen_mov_tls_gd_step2 (temp2, temp, addr));
1009 	      emit_insn (gen_tls_add (temp2, got, temp2, addr));
1010 	    }
1011 
1012 	  emit_move_insn (r0, temp2);
1013 
1014 	  if (TARGET_32BIT)
1015 	    {
1016 	      emit_insn (gen_tls_gd_call_32bit (addr));
1017 	    }
1018 	  else
1019 	    {
1020 	      emit_insn (gen_tls_gd_call (addr));
1021 	    }
1022 
1023 	  emit_move_insn (temp3, r0);
1024 
1025 	  if (TARGET_32BIT)
1026 	    last = emit_insn (gen_tls_gd_add_32bit (ret, temp3, addr));
1027 	  else
1028 	    last = emit_insn (gen_tls_gd_add (ret, temp3, addr));
1029 
1030 	  set_unique_reg_note (last, REG_EQUAL, copy_rtx (addr));
1031 	  break;
1032 	}
1033       case TLS_MODEL_INITIAL_EXEC:
1034 	{
1035 	  rtx temp, temp2, temp3, got;
1036 	  rtx_insn *last;
1037 
1038 	  ret = gen_reg_rtx (Pmode);
1039 	  temp = gen_reg_rtx (Pmode);
1040 	  temp2 = gen_reg_rtx (Pmode);
1041 	  temp3 = gen_reg_rtx (Pmode);
1042 
1043 	  got = tilegx_tls_got ();
1044 	  if (TARGET_32BIT)
1045 	    {
1046 	      emit_insn (gen_mov_tls_ie_step1_32bit (temp, addr));
1047 	      emit_insn (gen_mov_tls_ie_step2_32bit (temp2, temp, addr));
1048 	      emit_insn (gen_tls_add_32bit (temp2, got, temp2, addr));
1049 	      emit_insn (gen_tls_ie_load_32bit (temp3, temp2, addr));
1050 	    }
1051 	  else
1052 	    {
1053 	      emit_insn (gen_mov_tls_ie_step1 (temp, addr));
1054 	      emit_insn (gen_mov_tls_ie_step2 (temp2, temp, addr));
1055 	      emit_insn (gen_tls_add (temp2, got, temp2, addr));
1056 	      emit_insn (gen_tls_ie_load (temp3, temp2, addr));
1057 	    }
1058 
1059 	  last =
1060 	    emit_move_insn(ret,
1061 			   gen_rtx_PLUS (Pmode,
1062 					 gen_rtx_REG (Pmode,
1063 						      THREAD_POINTER_REGNUM),
1064 					 temp3));
1065 	  set_unique_reg_note (last, REG_EQUAL, copy_rtx (addr));
1066 	  break;
1067 	}
1068       case TLS_MODEL_LOCAL_EXEC:
1069 	{
1070 	  rtx temp, temp2;
1071 	  rtx_insn *last;
1072 
1073 	  ret = gen_reg_rtx (Pmode);
1074 	  temp = gen_reg_rtx (Pmode);
1075 	  temp2 = gen_reg_rtx (Pmode);
1076 
1077 	  if (TARGET_32BIT)
1078 	    {
1079 	      emit_insn (gen_mov_tls_le_step1_32bit (temp, addr));
1080 	      emit_insn (gen_mov_tls_le_step2_32bit (temp2, temp, addr));
1081 	    }
1082 	  else
1083 	    {
1084 	      emit_insn (gen_mov_tls_le_step1 (temp, addr));
1085 	      emit_insn (gen_mov_tls_le_step2 (temp2, temp, addr));
1086 	    }
1087 
1088 	  last =
1089 	    emit_move_insn (ret,
1090 			    gen_rtx_PLUS (Pmode,
1091 					  gen_rtx_REG (Pmode,
1092 						       THREAD_POINTER_REGNUM),
1093 					  temp2));
1094 	  set_unique_reg_note (last, REG_EQUAL, copy_rtx (addr));
1095 	  break;
1096 	}
1097       default:
1098 	gcc_unreachable ();
1099       }
1100   else if (GET_CODE (addr) == CONST)
1101     {
1102       rtx base, offset;
1103 
1104       gcc_assert (GET_CODE (XEXP (addr, 0)) == PLUS);
1105 
1106       base = tilegx_legitimize_tls_address (XEXP (XEXP (addr, 0), 0));
1107       offset = XEXP (XEXP (addr, 0), 1);
1108 
1109       base = force_operand (base, NULL_RTX);
1110       ret = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, offset));
1111     }
1112   else
1113     gcc_unreachable ();
1114 
1115   return ret;
1116 }
1117 
1118 
1119 /* Returns a register that points to ADDR, a symbolic address, by
1120    computing its address relative to tilegx_text_label_symbol.  */
1121 void
tilegx_compute_pcrel_address(rtx result,rtx addr)1122 tilegx_compute_pcrel_address (rtx result, rtx addr)
1123 {
1124   rtx text_label_symbol = tilegx_text_label_symbol ();
1125   rtx text_label_rtx = tilegx_text_label_rtx ();
1126   rtx temp, temp2, temp3;
1127 
1128   temp = create_temp_reg_if_possible (Pmode, result);
1129   temp2 = create_temp_reg_if_possible (Pmode, result);
1130 
1131   if (TARGET_32BIT)
1132     {
1133       emit_insn (gen_mov_pcrel_step1_32bit (temp, addr, text_label_symbol));
1134       emit_insn (gen_mov_pcrel_step2_32bit (temp2, temp, addr,
1135 					    text_label_symbol));
1136       emit_insn (gen_mov_pcrel_step3_32bit (result, temp2,
1137 					    text_label_rtx,
1138 					    addr, text_label_symbol));
1139     }
1140   else if (tilegx_cmodel == CM_LARGE_PIC)
1141     {
1142       temp3 = create_temp_reg_if_possible (Pmode, result);
1143       emit_insn (gen_mov_large_pcrel_step1 (temp, addr, text_label_symbol));
1144       emit_insn (gen_mov_large_pcrel_step2 (temp2, temp, addr,
1145 					    text_label_symbol));
1146       emit_insn (gen_mov_large_pcrel_step3 (temp3, temp2, addr,
1147 					    text_label_symbol));
1148       emit_insn (gen_mov_large_pcrel_step4 (result, temp3,
1149 					    text_label_rtx,
1150 					    addr, text_label_symbol));
1151     }
1152   else
1153     {
1154       emit_insn (gen_mov_pcrel_step1 (temp, addr, text_label_symbol));
1155       emit_insn (gen_mov_pcrel_step2 (temp2, temp, addr, text_label_symbol));
1156       emit_insn (gen_mov_pcrel_step3 (result, temp2,
1157 				      text_label_rtx,
1158 				      addr, text_label_symbol));
1159     }
1160 }
1161 
1162 
1163 /* Returns a register that points to the plt entry of ADDR, a symbolic
1164    address, by computing its address relative to
1165    tilegx_text_label_symbol.  */
1166 void
tilegx_compute_pcrel_plt_address(rtx result,rtx addr)1167 tilegx_compute_pcrel_plt_address (rtx result, rtx addr)
1168 {
1169   rtx text_label_symbol = tilegx_text_label_symbol ();
1170   rtx text_label_rtx = tilegx_text_label_rtx ();
1171   rtx temp, temp2, temp3;
1172 
1173   temp = create_temp_reg_if_possible (Pmode, result);
1174   temp2 = create_temp_reg_if_possible (Pmode, result);
1175 
1176   if (TARGET_32BIT)
1177     {
1178       emit_insn (gen_mov_plt_pcrel_step1_32bit (temp, addr,
1179 						text_label_symbol));
1180       emit_insn (gen_mov_plt_pcrel_step2_32bit (temp2, temp, addr,
1181 						text_label_symbol));
1182       emit_move_insn (result, gen_rtx_PLUS (Pmode, temp2, text_label_rtx));
1183     }
1184   else
1185     {
1186       temp3 = create_temp_reg_if_possible (Pmode, result);
1187 
1188       emit_insn (gen_mov_plt_pcrel_step1 (temp, addr, text_label_symbol));
1189       emit_insn (gen_mov_plt_pcrel_step2 (temp2, temp, addr,
1190 					  text_label_symbol));
1191       emit_insn (gen_mov_plt_pcrel_step3 (temp3, temp2, addr,
1192 					  text_label_symbol));
1193       emit_move_insn (result, gen_rtx_PLUS (Pmode, temp3, text_label_rtx));
1194     }
1195 }
1196 
1197 
1198 /* Legitimize PIC addresses.  If the address is already
1199    position-independent, we return ORIG.  Newly generated
1200    position-independent addresses go into a reg.  This is REG if
1201    nonzero, otherwise we allocate register(s) as necessary.  */
1202 static rtx
tilegx_legitimize_pic_address(rtx orig,machine_mode mode ATTRIBUTE_UNUSED,rtx reg)1203 tilegx_legitimize_pic_address (rtx orig,
1204 			       machine_mode mode ATTRIBUTE_UNUSED,
1205 			       rtx reg)
1206 {
1207   if (GET_CODE (orig) == SYMBOL_REF)
1208     {
1209       rtx address, pic_ref;
1210 
1211       if (reg == 0)
1212 	{
1213 	  gcc_assert (can_create_pseudo_p ());
1214 	  reg = gen_reg_rtx (Pmode);
1215 	}
1216 
1217       if (SYMBOL_REF_LOCAL_P (orig))
1218 	{
1219 	  /* If not during reload, allocate another temp reg here for
1220 	     loading in the address, so that these instructions can be
1221 	     optimized properly.  */
1222 	  rtx temp_reg = create_temp_reg_if_possible (Pmode, reg);
1223 	  tilegx_compute_pcrel_address (temp_reg, orig);
1224 
1225 	  /* Note: this is conservative.  We use the text_label but we
1226 	     don't use the pic_offset_table.  However, in some cases
1227 	     we may need the pic_offset_table (see
1228 	     tilegx_fixup_pcrel_references).  */
1229 	  crtl->uses_pic_offset_table = 1;
1230 
1231 	  address = temp_reg;
1232 
1233 	  emit_move_insn (reg, address);
1234 	  return reg;
1235 	}
1236       else
1237 	{
1238 	  /* If not during reload, allocate another temp reg here for
1239 	     loading in the address, so that these instructions can be
1240 	     optimized properly.  */
1241 	  rtx temp_reg = create_temp_reg_if_possible (Pmode, reg);
1242 
1243 	  gcc_assert (flag_pic);
1244 	  if (flag_pic == 1)
1245 	    {
1246 	      if (TARGET_32BIT)
1247 		{
1248 		  emit_insn (gen_add_got16_32bit (temp_reg,
1249 						  tilegx_got_rtx (),
1250 						  orig));
1251 		}
1252 	      else
1253 		{
1254 		  emit_insn (gen_add_got16 (temp_reg,
1255 					    tilegx_got_rtx (), orig));
1256 		}
1257 	    }
1258 	  else
1259 	    {
1260 	      rtx temp_reg2 = create_temp_reg_if_possible (Pmode, reg);
1261 	      rtx temp_reg3 = create_temp_reg_if_possible (Pmode, reg);
1262 	      if (TARGET_32BIT)
1263 		{
1264 		  emit_insn (gen_mov_got32_step1_32bit (temp_reg3, orig));
1265 		  emit_insn (gen_mov_got32_step2_32bit
1266 			     (temp_reg2, temp_reg3, orig));
1267 		}
1268 	      else
1269 		{
1270 		  emit_insn (gen_mov_got32_step1 (temp_reg3, orig));
1271 		  emit_insn (gen_mov_got32_step2 (temp_reg2, temp_reg3,
1272 						  orig));
1273 		}
1274 	      emit_move_insn (temp_reg,
1275 			      gen_rtx_PLUS (Pmode,
1276 					    tilegx_got_rtx (), temp_reg2));
1277 	    }
1278 
1279 	  address = temp_reg;
1280 
1281 	  pic_ref = gen_const_mem (Pmode, address);
1282 	  crtl->uses_pic_offset_table = 1;
1283 	  emit_move_insn (reg, pic_ref);
1284 	  /* The following put a REG_EQUAL note on this insn, so that
1285 	     it can be optimized by loop.  But it causes the label to
1286 	     be optimized away.  */
1287 	  /* set_unique_reg_note (insn, REG_EQUAL, orig); */
1288 	  return reg;
1289 	}
1290     }
1291   else if (GET_CODE (orig) == CONST)
1292     {
1293       rtx base, offset;
1294 
1295       if (GET_CODE (XEXP (orig, 0)) == PLUS
1296 	  && XEXP (XEXP (orig, 0), 0) == tilegx_got_rtx ())
1297 	return orig;
1298 
1299       if (reg == 0)
1300 	{
1301 	  gcc_assert (can_create_pseudo_p ());
1302 	  reg = gen_reg_rtx (Pmode);
1303 	}
1304 
1305       gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
1306       base = tilegx_legitimize_pic_address (XEXP (XEXP (orig, 0), 0),
1307 					    Pmode, reg);
1308       offset = tilegx_legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
1309 					      base == reg ? 0 : reg);
1310 
1311       if (CONST_INT_P (offset))
1312 	{
1313 	  if (can_create_pseudo_p ())
1314 	    offset = force_reg (Pmode, offset);
1315 	  else
1316 	    /* If we reach here, then something is seriously wrong.  */
1317 	    gcc_unreachable ();
1318 	}
1319 
1320       if (can_create_pseudo_p ())
1321 	return force_reg (Pmode, gen_rtx_PLUS (Pmode, base, offset));
1322       else
1323 	gcc_unreachable ();
1324     }
1325   else if (GET_CODE (orig) == LABEL_REF)
1326     {
1327       rtx address;
1328       rtx temp_reg;
1329 
1330       if (reg == 0)
1331 	{
1332 	  gcc_assert (can_create_pseudo_p ());
1333 	  reg = gen_reg_rtx (Pmode);
1334 	}
1335 
1336       /* If not during reload, allocate another temp reg here for
1337 	 loading in the address, so that these instructions can be
1338 	 optimized properly.  */
1339       temp_reg = create_temp_reg_if_possible (Pmode, reg);
1340       tilegx_compute_pcrel_address (temp_reg, orig);
1341 
1342       /* Note: this is conservative.  We use the text_label but we
1343 	 don't use the pic_offset_table.  */
1344       crtl->uses_pic_offset_table = 1;
1345 
1346       address = temp_reg;
1347 
1348       emit_move_insn (reg, address);
1349 
1350       return reg;
1351     }
1352 
1353   return orig;
1354 }
1355 
1356 
1357 /* Implement TARGET_LEGITIMIZE_ADDRESS.  */
1358 static rtx
tilegx_legitimize_address(rtx x,rtx oldx ATTRIBUTE_UNUSED,machine_mode mode)1359 tilegx_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
1360 			   machine_mode mode)
1361 {
1362   if (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
1363       && symbolic_operand (x, Pmode) && tilegx_tls_referenced_p (x))
1364     {
1365       return tilegx_legitimize_tls_address (x);
1366     }
1367   else if (flag_pic)
1368     {
1369       return tilegx_legitimize_pic_address (x, mode, 0);
1370     }
1371   else
1372     return x;
1373 }
1374 
1375 
1376 /* Implement TARGET_DELEGITIMIZE_ADDRESS.  */
1377 static rtx
tilegx_delegitimize_address(rtx x)1378 tilegx_delegitimize_address (rtx x)
1379 {
1380   x = delegitimize_mem_from_attrs (x);
1381 
1382   if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == UNSPEC)
1383     {
1384       switch (XINT (XEXP (x, 0), 1))
1385 	{
1386 	  case UNSPEC_HW0:
1387 	  case UNSPEC_HW1:
1388 	  case UNSPEC_HW2:
1389 	  case UNSPEC_HW3:
1390 	  case UNSPEC_HW0_LAST:
1391 	  case UNSPEC_HW1_LAST:
1392 	  case UNSPEC_HW2_LAST:
1393 	  case UNSPEC_HW0_PCREL:
1394 	  case UNSPEC_HW1_PCREL:
1395 	  case UNSPEC_HW1_LAST_PCREL:
1396 	  case UNSPEC_HW2_LAST_PCREL:
1397 	  case UNSPEC_HW0_PLT_PCREL:
1398 	  case UNSPEC_HW1_PLT_PCREL:
1399 	  case UNSPEC_HW1_LAST_PLT_PCREL:
1400 	  case UNSPEC_HW2_LAST_PLT_PCREL:
1401 	  case UNSPEC_HW0_GOT:
1402 	  case UNSPEC_HW0_LAST_GOT:
1403   	  case UNSPEC_HW1_LAST_GOT:
1404   	  case UNSPEC_HW0_TLS_GD:
1405   	  case UNSPEC_HW1_LAST_TLS_GD:
1406   	  case UNSPEC_HW0_TLS_IE:
1407   	  case UNSPEC_HW1_LAST_TLS_IE:
1408   	  case UNSPEC_HW0_TLS_LE:
1409   	  case UNSPEC_HW1_LAST_TLS_LE:
1410 	    x = XVECEXP (XEXP (x, 0), 0, 0);
1411 	  break;
1412 	}
1413     }
1414 
1415   return x;
1416 }
1417 
1418 
1419 /* Emit code to load the PIC register.  */
1420 static void
load_pic_register(bool delay_pic_helper ATTRIBUTE_UNUSED)1421 load_pic_register (bool delay_pic_helper ATTRIBUTE_UNUSED)
1422 {
1423   int orig_flag_pic = flag_pic;
1424 
1425   rtx got_symbol = tilegx_got_symbol ();
1426   rtx text_label_symbol = tilegx_text_label_symbol ();
1427   rtx text_label_rtx = tilegx_text_label_rtx ();
1428   flag_pic = 0;
1429 
1430   if (TARGET_32BIT)
1431     {
1432       emit_insn (gen_insn_lnk_and_label_32bit (text_label_rtx,
1433 					       text_label_symbol));
1434     }
1435   else
1436     {
1437       emit_insn (gen_insn_lnk_and_label (text_label_rtx, text_label_symbol));
1438     }
1439 
1440   tilegx_compute_pcrel_address (tilegx_got_rtx (), got_symbol);
1441 
1442   flag_pic = orig_flag_pic;
1443 
1444   /* Need to emit this whether or not we obey regdecls, since
1445      setjmp/longjmp can cause life info to screw up.  ??? In the case
1446      where we don't obey regdecls, this is not sufficient since we may
1447      not fall out the bottom.  */
1448   emit_use (tilegx_got_rtx ());
1449 }
1450 
1451 
1452 /* Return the simd variant of the constant NUM of mode MODE, by
1453    replicating it to fill an interger of mode DImode.  NUM is first
1454    truncated to fit in MODE.  */
1455 rtx
tilegx_simd_int(rtx num,machine_mode mode)1456 tilegx_simd_int (rtx num, machine_mode mode)
1457 {
1458   HOST_WIDE_INT n = 0;
1459 
1460   gcc_assert (CONST_INT_P (num));
1461 
1462   n = INTVAL (num);
1463 
1464   switch (mode)
1465     {
1466     case QImode:
1467       n = 0x0101010101010101LL * (n & 0x000000FF);
1468       break;
1469     case HImode:
1470       n = 0x0001000100010001LL * (n & 0x0000FFFF);
1471       break;
1472     case SImode:
1473       n = 0x0000000100000001LL * (n & 0xFFFFFFFF);
1474       break;
1475     case DImode:
1476       break;
1477     default:
1478       gcc_unreachable ();
1479     }
1480 
1481   return GEN_INT (n);
1482 }
1483 
1484 
1485 /* Returns true iff VAL can be moved into a register in one
1486    instruction.  And if it can, it emits the code to move the constant
1487    into DEST_REG.
1488 
1489    If THREE_WIDE_ONLY is true, this insists on an instruction that
1490    works in a bundle containing three instructions.  */
1491 static bool
expand_set_cint64_one_inst(rtx dest_reg,HOST_WIDE_INT val,bool three_wide_only)1492 expand_set_cint64_one_inst (rtx dest_reg,
1493 			    HOST_WIDE_INT val, bool three_wide_only)
1494 {
1495   if (val == trunc_int_for_mode (val, QImode))
1496     {
1497       /* Success! */
1498       emit_move_insn (dest_reg, GEN_INT (val));
1499       return true;
1500     }
1501   else if (!three_wide_only)
1502     {
1503       /* Test for the following constraints: J, K, N, P.  We avoid
1504 	 generating an rtx and using existing predicates because we
1505 	 can be testing and rejecting a lot of constants, and GEN_INT
1506 	 is O(N).  */
1507       if ((val >= -32768 && val <= 65535)
1508 	  || ((val == (val & 0xFF) * 0x0101010101010101LL))
1509 	  || (val == ((trunc_int_for_mode (val, QImode) & 0xFFFF)
1510 		      * 0x0001000100010001LL)))
1511 	{
1512 	  emit_move_insn (dest_reg, GEN_INT (val));
1513 	  return true;
1514 	}
1515     }
1516 
1517   return false;
1518 }
1519 
1520 
1521 /* Implement DImode rotatert.  */
1522 static HOST_WIDE_INT
rotate_right(HOST_WIDE_INT n,int count)1523 rotate_right (HOST_WIDE_INT n, int count)
1524 {
1525   unsigned HOST_WIDE_INT x = n & 0xFFFFFFFFFFFFFFFFULL;
1526   if (count == 0)
1527     return x;
1528   return ((x >> count) | (x << (64 - count))) & 0xFFFFFFFFFFFFFFFFULL;
1529 }
1530 
1531 
1532 /* Return true iff n contains exactly one contiguous sequence of 1
1533    bits, possibly wrapping around from high bits to low bits.  */
1534 bool
tilegx_bitfield_operand_p(HOST_WIDE_INT n,int * first_bit,int * last_bit)1535 tilegx_bitfield_operand_p (HOST_WIDE_INT n, int *first_bit, int *last_bit)
1536 {
1537   int i;
1538 
1539   if (n == 0)
1540     return false;
1541 
1542   for (i = 0; i < 64; i++)
1543     {
1544       unsigned HOST_WIDE_INT x = rotate_right (n, i);
1545       if (!(x & 1))
1546 	continue;
1547 
1548       /* See if x is a power of two minus one, i.e. only consecutive 1
1549 	 bits starting from bit 0.  */
1550       if ((x & (x + 1)) == 0)
1551 	{
1552 	  if (first_bit != NULL)
1553 	    *first_bit = i;
1554 	  if (last_bit != NULL)
1555 	    *last_bit = (i + exact_log2 (x ^ (x >> 1))) & 63;
1556 
1557 	  return true;
1558 	}
1559     }
1560 
1561   return false;
1562 }
1563 
1564 
1565 /* Create code to move the CONST_INT value in src_val to dest_reg.  */
1566 static void
expand_set_cint64(rtx dest_reg,rtx src_val)1567 expand_set_cint64 (rtx dest_reg, rtx src_val)
1568 {
1569   HOST_WIDE_INT val;
1570   int leading_zeroes, trailing_zeroes;
1571   int three_wide_only;
1572   int shift, ins_shift, zero_cluster_shift;
1573   rtx temp, subreg;
1574 
1575   gcc_assert (CONST_INT_P (src_val));
1576   val = trunc_int_for_mode (INTVAL (src_val), GET_MODE (dest_reg));
1577 
1578   /* See if we can generate the constant in one instruction.  */
1579   if (expand_set_cint64_one_inst (dest_reg, val, false))
1580     return;
1581 
1582   /* Force the destination to DImode so we can use DImode instructions
1583      to create it.  This both allows instructions like rotl, and
1584      certain efficient 3-wide instructions.  */
1585   subreg = simplify_gen_subreg (DImode, dest_reg, GET_MODE (dest_reg), 0);
1586   gcc_assert (subreg != NULL);
1587   dest_reg = subreg;
1588 
1589   temp = create_temp_reg_if_possible (DImode, dest_reg);
1590 
1591   leading_zeroes = 63 - floor_log2 (val & 0xFFFFFFFFFFFFFFFFULL);
1592   trailing_zeroes = exact_log2 (val & -val);
1593 
1594   /* First try all three-wide instructions that generate a constant
1595      (i.e. movei) followed by various shifts and rotates. If none of
1596      those work, try various two-wide ways of generating a constant
1597      followed by various shifts and rotates.  */
1598   for (three_wide_only = 1; three_wide_only >= 0; three_wide_only--)
1599     {
1600       int count;
1601 
1602       if (expand_set_cint64_one_inst (temp, val >> trailing_zeroes,
1603 				      three_wide_only))
1604 	{
1605 	  /* 0xFFFFFFFFFFFFA500 becomes:
1606 	     movei temp, 0xFFFFFFFFFFFFFFA5
1607 	     shli dest, temp, 8  */
1608 	  emit_move_insn (dest_reg,
1609 			  gen_rtx_ASHIFT (DImode, temp,
1610 					  GEN_INT (trailing_zeroes)));
1611 	  return;
1612 	}
1613 
1614       if (expand_set_cint64_one_inst (temp, val << leading_zeroes,
1615 				      three_wide_only))
1616 	{
1617 	  /* 0x7FFFFFFFFFFFFFFF becomes:
1618 	     movei temp, -2
1619 	     shrui dest, temp, 1  */
1620 	  emit_move_insn (dest_reg,
1621 			  gen_rtx_LSHIFTRT (DImode, temp,
1622 					    GEN_INT (leading_zeroes)));
1623 	  return;
1624 	}
1625 
1626       /* Try rotating a one-instruction immediate.  */
1627       for (count = 1; count < 64; count++)
1628 	{
1629 	  HOST_WIDE_INT r = rotate_right (val, count);
1630 	  if (expand_set_cint64_one_inst (temp, r, three_wide_only))
1631 	    {
1632 	      /* 0xFFFFFFFFFFA5FFFF becomes:
1633 		 movei temp, 0xFFFFFFFFFFFFFFA5
1634 		 rotli dest, temp, 16  */
1635 	      emit_move_insn (dest_reg,
1636 			      gen_rtx_ROTATE (DImode, temp, GEN_INT (count)));
1637 	      return;
1638 	    }
1639 	}
1640     }
1641 
1642   /* There are two cases here to produce a large constant.
1643      In the most general case, we do this:
1644 
1645      moveli x, hw3(NUM)
1646      shl16insli x, x, hw2(NUM)
1647      shl16insli x, x, hw1(NUM)
1648      shl16insli x, x, hw0(NUM)
1649 
1650      However, we can sometimes do better.  shl16insli is a poor way to
1651      insert 16 zero bits, because simply shifting left by 16 has more
1652      bundling freedom.  So if we see any contiguous aligned sequence
1653      of 16 or more zero bits (below the highest set bit), it is always
1654      more efficient to materialize the bits above the zero bits, then
1655      left shift to put in the zeroes, then insert whatever bits
1656      remain.  For example, we might end up with:
1657 
1658      movei x, NUM >> (37 + 16)
1659      shli x, x, 37
1660      shl16insli x, x, hw0(NUM)      */
1661 
1662   zero_cluster_shift = -1;
1663 
1664   for (shift = 0; shift < 48 - leading_zeroes; shift += 16)
1665     {
1666       HOST_WIDE_INT x = val >> shift;
1667 
1668       /* Find the least significant group of 16 aligned zero bits.  */
1669       if ((x & 0xFFFF) == 0x0000)
1670 	{
1671 	  /* Grab any following zero bits as well.  */
1672 	  zero_cluster_shift = exact_log2 (x & -x);
1673 	  shift += zero_cluster_shift;
1674 	  break;
1675 	}
1676     }
1677 
1678   if (zero_cluster_shift >= 0)
1679     {
1680       unsigned HOST_WIDE_INT leftover;
1681 
1682       /* Recursively create the constant above the lowest 16 zero
1683 	 bits.  */
1684       expand_set_cint64 (temp, GEN_INT (val >> shift));
1685 
1686       /* See if we can easily insert the remaining bits, or if we need
1687 	 to fall through to the more general case.  */
1688       leftover = val - ((val >> shift) << shift);
1689       if (leftover == 0)
1690 	{
1691 	  /* A simple left shift is enough.  */
1692 	  emit_move_insn (dest_reg,
1693 			  gen_rtx_ASHIFT (DImode, temp, GEN_INT (shift)));
1694 	  return;
1695 	}
1696       else if (leftover <= 32767)
1697 	{
1698 	  /* Left shift into position then add in the leftover.  */
1699 	  rtx temp2 = create_temp_reg_if_possible (DImode, temp);
1700 	  emit_move_insn (temp2,
1701 			  gen_rtx_ASHIFT (DImode, temp, GEN_INT (shift)));
1702 	  emit_move_insn (dest_reg,
1703 			  gen_rtx_PLUS (DImode, temp2, GEN_INT (leftover)));
1704 	  return;
1705 	}
1706       else
1707 	{
1708 	  /* Shift in the batch of >= 16 zeroes we detected earlier.
1709 	     After this, shift will be aligned mod 16 so the final
1710 	     loop can use shl16insli.  */
1711 	  rtx temp2 = create_temp_reg_if_possible (DImode, temp);
1712 	  rtx shift_count_rtx = GEN_INT (zero_cluster_shift);
1713 
1714 	  emit_move_insn (temp2,
1715 			  gen_rtx_ASHIFT (DImode, temp, shift_count_rtx));
1716 
1717 	  shift -= zero_cluster_shift;
1718 	  temp = temp2;
1719 	}
1720     }
1721   else
1722     {
1723       /* Set as many high 16-bit blocks as we can with a single
1724 	 instruction.  We'll insert the remaining 16-bit blocks
1725 	 below.  */
1726       for (shift = 16;; shift += 16)
1727 	{
1728 	  gcc_assert (shift < 64);
1729 	  if (expand_set_cint64_one_inst (temp, val >> shift, false))
1730 	    break;
1731 	}
1732     }
1733 
1734   /* At this point, temp == val >> shift, shift % 16 == 0, and we
1735      still need to insert any bits of 'val' below 'shift'. Those bits
1736      are guaranteed to not have 16 contiguous zeroes.  */
1737 
1738   gcc_assert ((shift & 15) == 0);
1739 
1740   for (ins_shift = shift - 16; ins_shift >= 0; ins_shift -= 16)
1741     {
1742       rtx result;
1743       HOST_WIDE_INT bits = (val >> ins_shift) & 0xFFFF;
1744       gcc_assert (bits != 0);
1745 
1746       /* On the last iteration we need to store into dest_reg.  */
1747       if (ins_shift == 0)
1748 	result = dest_reg;
1749       else
1750 	result = create_temp_reg_if_possible (DImode, dest_reg);
1751 
1752       emit_insn (gen_insn_shl16insli (result, temp, GEN_INT (bits)));
1753 
1754       temp = result;
1755     }
1756 }
1757 
1758 
1759 /* Load OP1, a 64-bit constant, into OP0, a register.  We know it
1760    can't be done in one insn when we get here, the move expander
1761    guarantees this.  */
1762 void
tilegx_expand_set_const64(rtx op0,rtx op1)1763 tilegx_expand_set_const64 (rtx op0, rtx op1)
1764 {
1765   if (CONST_INT_P (op1))
1766     {
1767       /* TODO: I don't know if we want to split large constants
1768 	 now, or wait until later (with a define_split).
1769 
1770 	 Does splitting early help CSE?  Does it harm other
1771 	 optimizations that might fold loads?  */
1772       expand_set_cint64 (op0, op1);
1773     }
1774   else
1775     {
1776       rtx temp = create_temp_reg_if_possible (Pmode, op0);
1777 
1778       if (TARGET_32BIT)
1779 	{
1780 	  /* Generate the 2-insn sequence to materialize a symbolic
1781 	     address.  */
1782 	  emit_insn (gen_mov_address_32bit_step1 (temp, op1));
1783 	  emit_insn (gen_mov_address_32bit_step2 (op0, temp, op1));
1784 	}
1785       else
1786 	{
1787 	  /* Generate the 3-insn sequence to materialize a symbolic
1788 	     address.  Note that this assumes that virtual addresses
1789 	     fit in 48 signed bits, which is currently true.  */
1790 	  rtx temp2 = create_temp_reg_if_possible (Pmode, op0);
1791 	  emit_insn (gen_mov_address_step1 (temp, op1));
1792 	  emit_insn (gen_mov_address_step2 (temp2, temp, op1));
1793 	  emit_insn (gen_mov_address_step3 (op0, temp2, op1));
1794 	}
1795     }
1796 }
1797 
1798 
1799 /* Expand a move instruction.  Return true if all work is done.  */
1800 bool
tilegx_expand_mov(machine_mode mode,rtx * operands)1801 tilegx_expand_mov (machine_mode mode, rtx *operands)
1802 {
1803   /* Handle sets of MEM first.  */
1804   if (MEM_P (operands[0]))
1805     {
1806       if (can_create_pseudo_p ())
1807 	operands[0] = validize_mem (operands[0]);
1808 
1809       if (reg_or_0_operand (operands[1], mode))
1810 	return false;
1811 
1812       if (!reload_in_progress)
1813 	operands[1] = force_reg (mode, operands[1]);
1814     }
1815 
1816   /* Fixup TLS cases.  */
1817   if (CONSTANT_P (operands[1]) && tilegx_tls_referenced_p (operands[1]))
1818     {
1819       operands[1] = tilegx_legitimize_tls_address (operands[1]);
1820       return false;
1821     }
1822 
1823   /* Fixup PIC cases.  */
1824   if (flag_pic && CONSTANT_P (operands[1]))
1825     {
1826       if (tilegx_pic_address_needs_scratch (operands[1]))
1827 	operands[1] = tilegx_legitimize_pic_address (operands[1], mode, 0);
1828 
1829       if (symbolic_operand (operands[1], mode))
1830 	{
1831 	  operands[1] = tilegx_legitimize_pic_address (operands[1],
1832 						       mode,
1833 						       (reload_in_progress ?
1834 							operands[0] :
1835 							NULL_RTX));
1836 	  return false;
1837 	}
1838     }
1839 
1840   /* Accept non-constants and valid constants unmodified.  */
1841   if (!CONSTANT_P (operands[1]) || move_operand (operands[1], mode))
1842     return false;
1843 
1844   /* Split large integers.  */
1845   tilegx_expand_set_const64 (operands[0], operands[1]);
1846   return true;
1847 }
1848 
1849 
1850 /* Expand unaligned loads.  */
1851 void
tilegx_expand_unaligned_load(rtx dest_reg,rtx mem,HOST_WIDE_INT bitsize,HOST_WIDE_INT bit_offset,bool sign)1852 tilegx_expand_unaligned_load (rtx dest_reg, rtx mem, HOST_WIDE_INT bitsize,
1853 			      HOST_WIDE_INT bit_offset, bool sign)
1854 {
1855   machine_mode mode;
1856   rtx addr_lo, addr_hi;
1857   rtx mem_lo, mem_hi, hi;
1858   rtx mema, wide_result;
1859   int last_byte_offset;
1860   HOST_WIDE_INT byte_offset = bit_offset / BITS_PER_UNIT;
1861 
1862   mode = GET_MODE (dest_reg);
1863 
1864   if (bitsize == 2 * BITS_PER_UNIT && (bit_offset % BITS_PER_UNIT) == 0)
1865     {
1866       rtx mem_left, mem_right;
1867       rtx left = gen_reg_rtx (mode);
1868 
1869       /* When just loading a two byte value, we can load the two bytes
1870 	 individually and combine them efficiently.  */
1871 
1872       mem_lo = adjust_address (mem, QImode, byte_offset);
1873       mem_hi = adjust_address (mem, QImode, byte_offset + 1);
1874 
1875       if (BYTES_BIG_ENDIAN)
1876 	{
1877 	  mem_left = mem_lo;
1878 	  mem_right = mem_hi;
1879 	}
1880       else
1881 	{
1882 	  mem_left = mem_hi;
1883 	  mem_right = mem_lo;
1884 	}
1885 
1886       if (sign)
1887 	{
1888 	  /* Do a signed load of the second byte and use bfins to set
1889 	     the high bits of the result.  */
1890 	  emit_insn (gen_zero_extendqidi2 (gen_lowpart (DImode, dest_reg),
1891 					   mem_right));
1892 	  emit_insn (gen_extendqidi2 (gen_lowpart (DImode, left), mem_left));
1893 	  emit_insn (gen_insv (gen_lowpart (DImode, dest_reg),
1894 			       GEN_INT (64 - 8), GEN_INT (8),
1895 			       gen_lowpart (DImode, left)));
1896 	}
1897       else
1898 	{
1899 	  /* Do two unsigned loads and use v1int_l to interleave
1900 	     them.  */
1901 	  rtx right = gen_reg_rtx (mode);
1902 	  emit_insn (gen_zero_extendqidi2 (gen_lowpart (DImode, right),
1903 					   mem_right));
1904 	  emit_insn (gen_zero_extendqidi2 (gen_lowpart (DImode, left),
1905 					   mem_left));
1906 	  emit_insn (gen_insn_v1int_l (gen_lowpart (DImode, dest_reg),
1907 				       gen_lowpart (DImode, left),
1908 				       gen_lowpart (DImode, right)));
1909 	}
1910 
1911       return;
1912     }
1913 
1914   mema = XEXP (mem, 0);
1915 
1916   /* AND addresses cannot be in any alias set, since they may
1917      implicitly alias surrounding code.  Ideally we'd have some alias
1918      set that covered all types except those with alignment 8 or
1919      higher.  */
1920   addr_lo = force_reg (Pmode, plus_constant (Pmode, mema, byte_offset));
1921   mem_lo = change_address (mem, mode,
1922 			   gen_rtx_AND (GET_MODE (mema), addr_lo,
1923 					GEN_INT (-8)));
1924   set_mem_alias_set (mem_lo, 0);
1925 
1926   /* Load the high word at an address that will not fault if the low
1927      address is aligned and at the very end of a page.  */
1928   last_byte_offset = (bit_offset + bitsize - 1) / BITS_PER_UNIT;
1929   addr_hi = force_reg (Pmode, plus_constant (Pmode, mema, last_byte_offset));
1930   mem_hi = change_address (mem, mode,
1931 			   gen_rtx_AND (GET_MODE (mema), addr_hi,
1932 					GEN_INT (-8)));
1933   set_mem_alias_set (mem_hi, 0);
1934 
1935   if (bitsize == 64)
1936     {
1937       addr_lo = make_safe_from (addr_lo, dest_reg);
1938       wide_result = dest_reg;
1939     }
1940   else
1941     {
1942       wide_result = gen_reg_rtx (mode);
1943     }
1944 
1945   /* Load hi first in case dest_reg is used in mema.  */
1946   hi = gen_reg_rtx (mode);
1947   emit_move_insn (hi, mem_hi);
1948   emit_move_insn (wide_result, mem_lo);
1949 
1950   emit_insn (gen_insn_dblalign (gen_lowpart (DImode, wide_result),
1951 				gen_lowpart (DImode, wide_result),
1952 				gen_lowpart (DImode, hi), addr_lo));
1953 
1954   if (bitsize != 64)
1955     {
1956       rtx extracted =
1957 	extract_bit_field (gen_lowpart (DImode, wide_result),
1958 			   bitsize, bit_offset % BITS_PER_UNIT,
1959 			   !sign, gen_lowpart (DImode, dest_reg),
1960 			   DImode, DImode, false);
1961 
1962       if (extracted != dest_reg)
1963 	emit_move_insn (dest_reg, gen_lowpart (DImode, extracted));
1964     }
1965 }
1966 
1967 
1968 /* Expand unaligned stores.  */
1969 static void
tilegx_expand_unaligned_store(rtx mem,rtx src,HOST_WIDE_INT bitsize,HOST_WIDE_INT bit_offset)1970 tilegx_expand_unaligned_store (rtx mem, rtx src, HOST_WIDE_INT bitsize,
1971 			       HOST_WIDE_INT bit_offset)
1972 {
1973   HOST_WIDE_INT byte_offset = bit_offset / BITS_PER_UNIT;
1974   HOST_WIDE_INT bytesize = bitsize / BITS_PER_UNIT;
1975   HOST_WIDE_INT shift_init, shift_increment, shift_amt;
1976   HOST_WIDE_INT i;
1977   rtx mem_addr;
1978   rtx store_val;
1979 
1980   shift_init = BYTES_BIG_ENDIAN ? (bitsize - BITS_PER_UNIT) : 0;
1981   shift_increment = BYTES_BIG_ENDIAN ? -BITS_PER_UNIT : BITS_PER_UNIT;
1982 
1983   for (i = 0, shift_amt = shift_init;
1984        i < bytesize;
1985        i++, shift_amt += shift_increment)
1986     {
1987       mem_addr = adjust_address (mem, QImode, byte_offset + i);
1988 
1989       if (shift_amt)
1990 	{
1991 	  store_val = expand_simple_binop (DImode, LSHIFTRT,
1992 					   gen_lowpart (DImode, src),
1993 					   GEN_INT (shift_amt), NULL, 1,
1994 					   OPTAB_LIB_WIDEN);
1995 	  store_val = gen_lowpart (QImode, store_val);
1996 	}
1997       else
1998 	{
1999 	  store_val = gen_lowpart (QImode, src);
2000 	}
2001 
2002       emit_move_insn (mem_addr, store_val);
2003     }
2004 }
2005 
2006 
2007 /* Implement the movmisalign patterns.  One of the operands is a
2008    memory that is not naturally aligned.  Emit instructions to load
2009    it.  */
2010 void
tilegx_expand_movmisalign(machine_mode mode,rtx * operands)2011 tilegx_expand_movmisalign (machine_mode mode, rtx *operands)
2012 {
2013   if (MEM_P (operands[1]))
2014     {
2015       rtx tmp;
2016 
2017       if (register_operand (operands[0], mode))
2018 	tmp = operands[0];
2019       else
2020 	tmp = gen_reg_rtx (mode);
2021 
2022       tilegx_expand_unaligned_load (tmp, operands[1], GET_MODE_BITSIZE (mode),
2023 				    0, true);
2024 
2025       if (tmp != operands[0])
2026 	emit_move_insn (operands[0], tmp);
2027     }
2028   else if (MEM_P (operands[0]))
2029     {
2030       if (!reg_or_0_operand (operands[1], mode))
2031 	operands[1] = force_reg (mode, operands[1]);
2032 
2033       tilegx_expand_unaligned_store (operands[0], operands[1],
2034 				     GET_MODE_BITSIZE (mode), 0);
2035     }
2036   else
2037     gcc_unreachable ();
2038 
2039 }
2040 
2041 
2042 /* Implement the allocate_stack pattern (alloca).  */
2043 void
tilegx_allocate_stack(rtx op0,rtx op1)2044 tilegx_allocate_stack (rtx op0, rtx op1)
2045 {
2046   /* Technically the correct way to initialize chain_loc is with
2047    * gen_frame_mem() instead of gen_rtx_MEM(), but gen_frame_mem()
2048    * sets the alias_set to that of a frame reference.  Some of our
2049    * tests rely on some unsafe assumption about when the chaining
2050    * update is done, we need to be conservative about reordering the
2051    * chaining instructions.
2052    */
2053   rtx fp_addr = gen_reg_rtx (Pmode);
2054   rtx fp_value = gen_reg_rtx (Pmode);
2055   rtx fp_loc;
2056 
2057   emit_move_insn (fp_addr, gen_rtx_PLUS (Pmode, stack_pointer_rtx,
2058 					 GEN_INT (UNITS_PER_WORD)));
2059 
2060   fp_loc = gen_frame_mem (Pmode, fp_addr);
2061 
2062   emit_move_insn (fp_value, fp_loc);
2063 
2064   op1 = force_reg (Pmode, op1);
2065 
2066   emit_move_insn (stack_pointer_rtx,
2067 		  gen_rtx_MINUS (Pmode, stack_pointer_rtx, op1));
2068 
2069   emit_move_insn (fp_addr, gen_rtx_PLUS (Pmode, stack_pointer_rtx,
2070 					 GEN_INT (UNITS_PER_WORD)));
2071 
2072   fp_loc = gen_frame_mem (Pmode, fp_addr);
2073 
2074   emit_move_insn (fp_loc, fp_value);
2075 
2076   emit_move_insn (op0, virtual_stack_dynamic_rtx);
2077 }
2078 
2079 
2080 
2081 /* Multiplies */
2082 
2083 
2084 /* Returns the insn_code in ENTRY.  */
2085 static enum insn_code
tilegx_multiply_get_opcode(const struct tilegx_multiply_insn_seq_entry * entry)2086 tilegx_multiply_get_opcode (const struct tilegx_multiply_insn_seq_entry
2087 			    *entry)
2088 {
2089   return tilegx_multiply_insn_seq_decode_opcode[entry->compressed_opcode];
2090 }
2091 
2092 
2093 /* Returns the length of the 'op' array.  */
2094 static int
tilegx_multiply_get_num_ops(const struct tilegx_multiply_insn_seq * seq)2095 tilegx_multiply_get_num_ops (const struct tilegx_multiply_insn_seq *seq)
2096 {
2097   /* The array either uses all of its allocated slots or is terminated
2098      by a bogus opcode. Either way, the array size is the index of the
2099      last valid opcode plus one.  */
2100   int i;
2101   for (i = tilegx_multiply_insn_seq_MAX_OPERATIONS - 1; i >= 0; i--)
2102     if (tilegx_multiply_get_opcode (&seq->op[i]) != CODE_FOR_nothing)
2103       return i + 1;
2104 
2105   /* An empty array is not allowed.  */
2106   gcc_unreachable ();
2107 }
2108 
2109 
2110 /* We precompute a number of expression trees for multiplying by
2111    constants.  This generates code for such an expression tree by
2112    walking through the nodes in the tree (which are conveniently
2113    pre-linearized) and emitting an instruction for each one.  */
2114 static void
tilegx_expand_constant_multiply_given_sequence(rtx result,rtx src,const struct tilegx_multiply_insn_seq * seq)2115 tilegx_expand_constant_multiply_given_sequence (rtx result, rtx src,
2116 						const struct
2117 						tilegx_multiply_insn_seq *seq)
2118 {
2119   int i;
2120   int num_ops;
2121 
2122   /* Keep track of the subexpressions computed so far, so later
2123      instructions can refer to them.  We seed the array with zero and
2124      the value being multiplied.  */
2125   int num_subexprs = 2;
2126   rtx subexprs[tilegx_multiply_insn_seq_MAX_OPERATIONS + 2];
2127   subexprs[0] = const0_rtx;
2128   subexprs[1] = src;
2129 
2130   /* Determine how many instructions we are going to generate.  */
2131   num_ops = tilegx_multiply_get_num_ops (seq);
2132   gcc_assert (num_ops > 0
2133 	      && num_ops <= tilegx_multiply_insn_seq_MAX_OPERATIONS);
2134 
2135   for (i = 0; i < num_ops; i++)
2136     {
2137       const struct tilegx_multiply_insn_seq_entry *entry = &seq->op[i];
2138 
2139       /* Figure out where to store the output of this instruction.  */
2140       const bool is_last_op = (i + 1 == num_ops);
2141       rtx out = is_last_op ? result : gen_reg_rtx (DImode);
2142 
2143       enum insn_code opcode = tilegx_multiply_get_opcode (entry);
2144       if (opcode == CODE_FOR_ashldi3)
2145 	{
2146 	  /* Handle shift by immediate. This is a special case because
2147 	     the meaning of the second operand is a constant shift
2148 	     count rather than an operand index.  */
2149 
2150 	  /* Make sure the shift count is in range. Zero should not
2151 	     happen.  */
2152 	  const int shift_count = entry->rhs;
2153 	  gcc_assert (shift_count > 0 && shift_count < 64);
2154 
2155 	  /* Emit the actual instruction.  */
2156 	  emit_insn (GEN_FCN (opcode)
2157 		     (out, subexprs[entry->lhs],
2158 		      gen_rtx_CONST_INT (DImode, shift_count)));
2159 	}
2160       else
2161 	{
2162 	  /* Handle a normal two-operand instruction, such as add or
2163 	     shl1add.  */
2164 
2165 	  /* Make sure we are referring to a previously computed
2166 	     subexpression.  */
2167 	  gcc_assert (entry->rhs < num_subexprs);
2168 
2169 	  /* Emit the actual instruction.  */
2170 	  emit_insn (GEN_FCN (opcode)
2171 		     (out, subexprs[entry->lhs], subexprs[entry->rhs]));
2172 	}
2173 
2174       /* Record this subexpression for use by later expressions.  */
2175       subexprs[num_subexprs++] = out;
2176     }
2177 }
2178 
2179 
2180 /* bsearch helper function.  */
2181 static int
tilegx_compare_multipliers(const void * key,const void * t)2182 tilegx_compare_multipliers (const void *key, const void *t)
2183 {
2184   long long delta =
2185     (*(const long long *) key
2186      - ((const struct tilegx_multiply_insn_seq *) t)->multiplier);
2187   return (delta < 0) ? -1 : (delta > 0);
2188 }
2189 
2190 
2191 /* Returns the tilegx_multiply_insn_seq for multiplier, or NULL if none
2192    exists.  */
2193 static const struct tilegx_multiply_insn_seq *
tilegx_find_multiply_insn_seq_for_constant(long long multiplier)2194 tilegx_find_multiply_insn_seq_for_constant (long long multiplier)
2195 {
2196   return ((const struct tilegx_multiply_insn_seq *)
2197 	  bsearch (&multiplier, tilegx_multiply_insn_seq_table,
2198 		   tilegx_multiply_insn_seq_table_size,
2199 		   sizeof tilegx_multiply_insn_seq_table[0],
2200 		   tilegx_compare_multipliers));
2201 }
2202 
2203 
2204 /* Try to a expand constant multiply in DImode by looking it up in a
2205    precompiled table.  OP0 is the result operand, OP1 is the source
2206    operand, and MULTIPLIER is the value of the constant.  Return true
2207    if it succeeds.  */
2208 static bool
tilegx_expand_const_muldi(rtx op0,rtx op1,long long multiplier)2209 tilegx_expand_const_muldi (rtx op0, rtx op1, long long multiplier)
2210 {
2211   /* See if we have precomputed an efficient way to multiply by this
2212      constant.  */
2213   const struct tilegx_multiply_insn_seq *seq =
2214     tilegx_find_multiply_insn_seq_for_constant (multiplier);
2215   if (seq != NULL)
2216     {
2217       tilegx_expand_constant_multiply_given_sequence (op0, op1, seq);
2218       return true;
2219     }
2220   else
2221     return false;
2222 }
2223 
2224 
2225 /* Expand the muldi pattern.  */
2226 bool
tilegx_expand_muldi(rtx op0,rtx op1,rtx op2)2227 tilegx_expand_muldi (rtx op0, rtx op1, rtx op2)
2228 {
2229   if (CONST_INT_P (op2))
2230     {
2231       HOST_WIDE_INT n = trunc_int_for_mode (INTVAL (op2), DImode);
2232       return tilegx_expand_const_muldi (op0, op1, n);
2233     }
2234   return false;
2235 }
2236 
2237 
2238 /* Expand a high multiply pattern in DImode.  RESULT, OP1, OP2 are the
2239    operands, and SIGN is true if it's a signed multiply, and false if
2240    it's an unsigned multiply.  */
2241 static void
tilegx_expand_high_multiply(rtx result,rtx op1,rtx op2,bool sign)2242 tilegx_expand_high_multiply (rtx result, rtx op1, rtx op2, bool sign)
2243 {
2244   rtx tmp0 = gen_reg_rtx (DImode);
2245   rtx tmp1 = gen_reg_rtx (DImode);
2246   rtx tmp2 = gen_reg_rtx (DImode);
2247   rtx tmp3 = gen_reg_rtx (DImode);
2248   rtx tmp4 = gen_reg_rtx (DImode);
2249   rtx tmp5 = gen_reg_rtx (DImode);
2250   rtx tmp6 = gen_reg_rtx (DImode);
2251   rtx tmp7 = gen_reg_rtx (DImode);
2252   rtx tmp8 = gen_reg_rtx (DImode);
2253   rtx tmp9 = gen_reg_rtx (DImode);
2254   rtx tmp10 = gen_reg_rtx (DImode);
2255   rtx tmp11 = gen_reg_rtx (DImode);
2256   rtx tmp12 = gen_reg_rtx (DImode);
2257   rtx tmp13 = gen_reg_rtx (DImode);
2258   rtx result_lo = gen_reg_rtx (DImode);
2259 
2260   if (sign)
2261     {
2262       emit_insn (gen_insn_mul_hs_lu (tmp0, op1, op2));
2263       emit_insn (gen_insn_mul_hs_lu (tmp1, op2, op1));
2264       emit_insn (gen_insn_mul_lu_lu (tmp2, op1, op2));
2265       emit_insn (gen_insn_mul_hs_hs (tmp3, op1, op2));
2266     }
2267   else
2268     {
2269       emit_insn (gen_insn_mul_hu_lu (tmp0, op1, op2));
2270       emit_insn (gen_insn_mul_hu_lu (tmp1, op2, op1));
2271       emit_insn (gen_insn_mul_lu_lu (tmp2, op1, op2));
2272       emit_insn (gen_insn_mul_hu_hu (tmp3, op1, op2));
2273     }
2274 
2275   emit_move_insn (tmp4, (gen_rtx_ASHIFT (DImode, tmp0, GEN_INT (32))));
2276 
2277   emit_move_insn (tmp5, (gen_rtx_ASHIFT (DImode, tmp1, GEN_INT (32))));
2278 
2279   emit_move_insn (tmp6, (gen_rtx_PLUS (DImode, tmp4, tmp5)));
2280   emit_move_insn (result_lo, (gen_rtx_PLUS (DImode, tmp2, tmp6)));
2281 
2282   emit_move_insn (tmp7, gen_rtx_LTU (DImode, tmp6, tmp4));
2283   emit_move_insn (tmp8, gen_rtx_LTU (DImode, result_lo, tmp2));
2284 
2285   if (sign)
2286     {
2287       emit_move_insn (tmp9, (gen_rtx_ASHIFTRT (DImode, tmp0, GEN_INT (32))));
2288       emit_move_insn (tmp10, (gen_rtx_ASHIFTRT (DImode, tmp1, GEN_INT (32))));
2289     }
2290   else
2291     {
2292       emit_move_insn (tmp9, (gen_rtx_LSHIFTRT (DImode, tmp0, GEN_INT (32))));
2293       emit_move_insn (tmp10, (gen_rtx_LSHIFTRT (DImode, tmp1, GEN_INT (32))));
2294     }
2295 
2296   emit_move_insn (tmp11, (gen_rtx_PLUS (DImode, tmp3, tmp7)));
2297   emit_move_insn (tmp12, (gen_rtx_PLUS (DImode, tmp8, tmp9)));
2298   emit_move_insn (tmp13, (gen_rtx_PLUS (DImode, tmp11, tmp12)));
2299   emit_move_insn (result, (gen_rtx_PLUS (DImode, tmp13, tmp10)));
2300 }
2301 
2302 
2303 /* Implement smuldi3_highpart.  */
2304 void
tilegx_expand_smuldi3_highpart(rtx op0,rtx op1,rtx op2)2305 tilegx_expand_smuldi3_highpart (rtx op0, rtx op1, rtx op2)
2306 {
2307   tilegx_expand_high_multiply (op0, op1, op2, true);
2308 }
2309 
2310 
2311 /* Implement umuldi3_highpart.  */
2312 void
tilegx_expand_umuldi3_highpart(rtx op0,rtx op1,rtx op2)2313 tilegx_expand_umuldi3_highpart (rtx op0, rtx op1, rtx op2)
2314 {
2315   tilegx_expand_high_multiply (op0, op1, op2, false);
2316 }
2317 
2318 
2319 
2320 /* Compare and branches  */
2321 
2322 /* Produce the rtx yielding a bool for a floating point
2323    comparison.  */
2324 static bool
tilegx_emit_fp_setcc(rtx res,enum rtx_code code,machine_mode mode,rtx op0,rtx op1)2325 tilegx_emit_fp_setcc (rtx res, enum rtx_code code, machine_mode mode,
2326 		      rtx op0, rtx op1)
2327 {
2328   /* TODO: Certain compares again constants can be done using entirely
2329      integer operations. But you have to get the special cases right
2330      e.g. NaN, +0 == -0, etc.  */
2331 
2332   rtx flags;
2333   int flag_index;
2334   rtx a = force_reg (DImode, gen_lowpart (DImode, op0));
2335   rtx b = force_reg (DImode, gen_lowpart (DImode, op1));
2336 
2337   flags = gen_reg_rtx (DImode);
2338 
2339   if (mode == SFmode)
2340     {
2341       emit_insn (gen_insn_fsingle_add1 (flags, a, b));
2342     }
2343   else
2344     {
2345       gcc_assert (mode == DFmode);
2346       emit_insn (gen_insn_fdouble_add_flags (flags, a, b));
2347     }
2348 
2349   switch (code)
2350     {
2351     case EQ: flag_index = 30; break;
2352     case NE: flag_index = 31; break;
2353     case LE: flag_index = 27; break;
2354     case LT: flag_index = 26; break;
2355     case GE: flag_index = 29; break;
2356     case GT: flag_index = 28; break;
2357     default: gcc_unreachable ();
2358     }
2359 
2360   gcc_assert (GET_MODE (res) == DImode);
2361   emit_move_insn (res, gen_rtx_ZERO_EXTRACT (DImode, flags, GEN_INT (1),
2362 					     GEN_INT (flag_index)));
2363   return true;
2364 }
2365 
2366 
2367 /* Certain simplifications can be done to make invalid setcc
2368    operations valid.  Return the final comparison, or NULL if we can't
2369    work.  */
2370 static bool
tilegx_emit_setcc_internal(rtx res,enum rtx_code code,rtx op0,rtx op1,machine_mode cmp_mode)2371 tilegx_emit_setcc_internal (rtx res, enum rtx_code code, rtx op0, rtx op1,
2372 			    machine_mode cmp_mode)
2373 {
2374   rtx tmp;
2375   bool swap = false;
2376 
2377   if (cmp_mode == SFmode || cmp_mode == DFmode)
2378     return tilegx_emit_fp_setcc (res, code, cmp_mode, op0, op1);
2379 
2380   /* The general case: fold the comparison code to the types of
2381      compares that we have, choosing the branch as necessary.  */
2382 
2383   switch (code)
2384     {
2385     case EQ:
2386     case NE:
2387     case LE:
2388     case LT:
2389     case LEU:
2390     case LTU:
2391       /* We have these compares.  */
2392       break;
2393 
2394     case GE:
2395     case GT:
2396     case GEU:
2397     case GTU:
2398       /* We do not have these compares, so we reverse the
2399 	 operands.  */
2400       swap = true;
2401       break;
2402 
2403     default:
2404       /* We should not have called this with any other code.  */
2405       gcc_unreachable ();
2406     }
2407 
2408   if (swap)
2409     {
2410       code = swap_condition (code);
2411       tmp = op0, op0 = op1, op1 = tmp;
2412     }
2413 
2414   if (!reg_or_0_operand (op0, cmp_mode))
2415     op0 = force_reg (cmp_mode, op0);
2416 
2417   if (!CONST_INT_P (op1) && !register_operand (op1, cmp_mode))
2418     op1 = force_reg (cmp_mode, op1);
2419 
2420   /* Return the setcc comparison.  */
2421   emit_insn (gen_rtx_SET (res, gen_rtx_fmt_ee (code, DImode, op0, op1)));
2422 
2423   return true;
2424 }
2425 
2426 
2427 /* Implement cstore patterns.  */
2428 bool
tilegx_emit_setcc(rtx operands[],machine_mode cmp_mode)2429 tilegx_emit_setcc (rtx operands[], machine_mode cmp_mode)
2430 {
2431   return
2432     tilegx_emit_setcc_internal (operands[0], GET_CODE (operands[1]),
2433 				operands[2], operands[3], cmp_mode);
2434 }
2435 
2436 
2437 /* Return whether CODE is a signed comparison.  */
2438 static bool
signed_compare_p(enum rtx_code code)2439 signed_compare_p (enum rtx_code code)
2440 {
2441   return (code == EQ || code == NE || code == LT || code == LE
2442 	  || code == GT || code == GE);
2443 }
2444 
2445 
2446 /* Generate the comparison for a DImode conditional branch.  */
2447 static rtx
tilegx_emit_cc_test(enum rtx_code code,rtx op0,rtx op1,machine_mode cmp_mode,bool eq_ne_only)2448 tilegx_emit_cc_test (enum rtx_code code, rtx op0, rtx op1,
2449 		     machine_mode cmp_mode, bool eq_ne_only)
2450 {
2451   enum rtx_code branch_code;
2452   rtx temp;
2453 
2454   if (cmp_mode == SFmode || cmp_mode == DFmode)
2455     {
2456       /* Compute a boolean saying whether the comparison is true.  */
2457       temp = gen_reg_rtx (DImode);
2458       tilegx_emit_setcc_internal (temp, code, op0, op1, cmp_mode);
2459 
2460       /* Test that flag.  */
2461       return gen_rtx_fmt_ee (NE, VOIDmode, temp, const0_rtx);
2462     }
2463 
2464   /* Check for a compare against zero using a comparison we can do
2465      directly.  */
2466   if (op1 == const0_rtx
2467       && (code == EQ || code == NE
2468 	  || (!eq_ne_only && signed_compare_p (code))))
2469     {
2470       op0 = force_reg (cmp_mode, op0);
2471       return gen_rtx_fmt_ee (code, VOIDmode, op0, const0_rtx);
2472     }
2473 
2474   /* The general case: fold the comparison code to the types of
2475      compares that we have, choosing the branch as necessary.  */
2476   switch (code)
2477     {
2478     case EQ:
2479     case LE:
2480     case LT:
2481     case LEU:
2482     case LTU:
2483       /* We have these compares.  */
2484       branch_code = NE;
2485       break;
2486 
2487     case NE:
2488     case GE:
2489     case GT:
2490     case GEU:
2491     case GTU:
2492       /* These must be reversed (except NE, but let's
2493 	 canonicalize).  */
2494       code = reverse_condition (code);
2495       branch_code = EQ;
2496       break;
2497 
2498     default:
2499       gcc_unreachable ();
2500     }
2501 
2502   if (CONST_INT_P (op1) && (!satisfies_constraint_I (op1) || code == LEU))
2503     {
2504       HOST_WIDE_INT n = INTVAL (op1);
2505 
2506       switch (code)
2507 	{
2508 	case EQ:
2509 	  /* Subtract off the value we want to compare against and see
2510 	     if we get zero.  This is cheaper than creating a constant
2511 	     in a register. Except that subtracting -128 is more
2512 	     expensive than seqi to -128, so we leave that alone.  */
2513 	  /* ??? Don't do this when comparing against symbols,
2514 	     otherwise we'll reduce (&x == 0x1234) to (&x-0x1234 ==
2515 	     0), which will be declared false out of hand (at least
2516 	     for non-weak).  */
2517 	  if (n != -128
2518 	      && add_operand (GEN_INT (-n), DImode)
2519 	      && !(symbolic_operand (op0, VOIDmode)
2520 		   || (REG_P (op0) && REG_POINTER (op0))))
2521 	    {
2522 	      /* TODO: Use a SIMD add immediate to hit zero for tiled
2523 		 constants in a single instruction.  */
2524 	      if (GET_MODE (op0) != DImode)
2525 		{
2526 		  /* Convert to DImode so we can use addli.  Note that
2527 		     this will not actually generate any code because
2528 		     sign extension from SI -> DI is a no-op.  I don't
2529 		     know if it's safe just to make a paradoxical
2530 		     subreg here though.  */
2531 		  rtx temp2 = gen_reg_rtx (DImode);
2532 		  emit_insn (gen_extendsidi2 (temp2, op0));
2533 		  op0 = temp2;
2534 		}
2535 	      else
2536 		{
2537 		  op0 = force_reg (DImode, op0);
2538 		}
2539 	      temp = gen_reg_rtx (DImode);
2540 	      emit_move_insn (temp, gen_rtx_PLUS (DImode, op0, GEN_INT (-n)));
2541 	      return gen_rtx_fmt_ee (reverse_condition (branch_code),
2542 				     VOIDmode, temp, const0_rtx);
2543 	    }
2544 	  break;
2545 
2546 	case LEU:
2547 	  if (n == -1)
2548 	    break;
2549 	  /* FALLTHRU */
2550 
2551 	case LTU:
2552 	  /* Change ((unsigned)x < 0x1000) into !((int)x >> 12), etc.
2553 	     We use arithmetic shift right because it's a 3-wide op,
2554 	     while logical shift right is not.  */
2555 	  {
2556 	    int first = exact_log2 (code == LTU ? n : n + 1);
2557 	    if (first != -1)
2558 	      {
2559 		op0 = force_reg (cmp_mode, op0);
2560 		temp = gen_reg_rtx (cmp_mode);
2561 		emit_move_insn (temp,
2562 				gen_rtx_ASHIFTRT (cmp_mode, op0,
2563 						  GEN_INT (first)));
2564 		return gen_rtx_fmt_ee (reverse_condition (branch_code),
2565 				       VOIDmode, temp, const0_rtx);
2566 	      }
2567 	  }
2568 	  break;
2569 
2570 	default:
2571 	  break;
2572 	}
2573     }
2574 
2575   /* Compute a flag saying whether we should branch.  */
2576   temp = gen_reg_rtx (DImode);
2577   tilegx_emit_setcc_internal (temp, code, op0, op1, cmp_mode);
2578 
2579   /* Return the branch comparison.  */
2580   return gen_rtx_fmt_ee (branch_code, VOIDmode, temp, const0_rtx);
2581 }
2582 
2583 
2584 /* Generate the comparison for a conditional branch.  */
2585 void
tilegx_emit_conditional_branch(rtx operands[],machine_mode cmp_mode)2586 tilegx_emit_conditional_branch (rtx operands[], machine_mode cmp_mode)
2587 {
2588   rtx cmp_rtx =
2589     tilegx_emit_cc_test (GET_CODE (operands[0]), operands[1], operands[2],
2590 			 cmp_mode, false);
2591   rtx branch_rtx = gen_rtx_SET (pc_rtx,
2592 				gen_rtx_IF_THEN_ELSE (VOIDmode, cmp_rtx,
2593 						      gen_rtx_LABEL_REF
2594 						      (VOIDmode,
2595 						       operands[3]),
2596 						      pc_rtx));
2597   emit_jump_insn (branch_rtx);
2598 }
2599 
2600 
2601 /* Implement the mov<mode>cc pattern.  */
2602 rtx
tilegx_emit_conditional_move(rtx cmp)2603 tilegx_emit_conditional_move (rtx cmp)
2604 {
2605   return
2606     tilegx_emit_cc_test (GET_CODE (cmp), XEXP (cmp, 0), XEXP (cmp, 1),
2607 			 GET_MODE (XEXP (cmp, 0)), true);
2608 }
2609 
2610 
2611 /* Return true if INSN is annotated with a REG_BR_PROB note that
2612    indicates it's a branch that's predicted taken.  */
2613 static bool
cbranch_predicted_p(rtx_insn * insn)2614 cbranch_predicted_p (rtx_insn *insn)
2615 {
2616   rtx x = find_reg_note (insn, REG_BR_PROB, 0);
2617 
2618   if (x)
2619     {
2620       int pred_val = XINT (x, 0);
2621 
2622       return pred_val >= REG_BR_PROB_BASE / 2;
2623     }
2624 
2625   return false;
2626 }
2627 
2628 
2629 /* Output assembly code for a specific branch instruction, appending
2630    the branch prediction flag to the opcode if appropriate.  */
2631 static const char *
tilegx_output_simple_cbranch_with_opcode(rtx_insn * insn,const char * opcode,int regop,bool reverse_predicted)2632 tilegx_output_simple_cbranch_with_opcode (rtx_insn *insn, const char *opcode,
2633 					  int regop, bool reverse_predicted)
2634 {
2635   static char buf[64];
2636   sprintf (buf, "%s%s\t%%r%d, %%l0", opcode,
2637 	   (cbranch_predicted_p (insn) ^ reverse_predicted) ? "t" : "",
2638 	   regop);
2639   return buf;
2640 }
2641 
2642 
2643 /* Output assembly code for a specific branch instruction, appending
2644    the branch prediction flag to the opcode if appropriate.  */
2645 const char *
tilegx_output_cbranch_with_opcode(rtx_insn * insn,rtx * operands,const char * opcode,const char * rev_opcode,int regop)2646 tilegx_output_cbranch_with_opcode (rtx_insn *insn, rtx *operands,
2647 				   const char *opcode,
2648 				   const char *rev_opcode, int regop)
2649 {
2650   const char *branch_if_false;
2651   rtx taken, not_taken;
2652   bool is_simple_branch;
2653 
2654   gcc_assert (LABEL_P (operands[0]));
2655 
2656   is_simple_branch = true;
2657   if (INSN_ADDRESSES_SET_P ())
2658     {
2659       int from_addr = INSN_ADDRESSES (INSN_UID (insn));
2660       int to_addr = INSN_ADDRESSES (INSN_UID (operands[0]));
2661       int delta = to_addr - from_addr;
2662       is_simple_branch = IN_RANGE (delta, -524288, 524280);
2663     }
2664 
2665   if (is_simple_branch)
2666     {
2667       /* Just a simple conditional branch.  */
2668       return
2669 	tilegx_output_simple_cbranch_with_opcode (insn, opcode, regop, false);
2670     }
2671 
2672   /* Generate a reversed branch around a direct jump.  This fallback
2673      does not use branch-likely instructions.  */
2674   not_taken = gen_label_rtx ();
2675   taken = operands[0];
2676 
2677   /* Generate the reversed branch to NOT_TAKEN.  */
2678   operands[0] = not_taken;
2679   branch_if_false =
2680     tilegx_output_simple_cbranch_with_opcode (insn, rev_opcode, regop, true);
2681   output_asm_insn (branch_if_false, operands);
2682 
2683   output_asm_insn ("j\t%l0", &taken);
2684 
2685   /* Output NOT_TAKEN.  */
2686   targetm.asm_out.internal_label (asm_out_file, "L",
2687 				  CODE_LABEL_NUMBER (not_taken));
2688   return "";
2689 }
2690 
2691 
2692 /* Output assembly code for a conditional branch instruction.  */
2693 const char *
tilegx_output_cbranch(rtx_insn * insn,rtx * operands,bool reversed)2694 tilegx_output_cbranch (rtx_insn *insn, rtx *operands, bool reversed)
2695 {
2696   enum rtx_code code = GET_CODE (operands[1]);
2697   const char *opcode;
2698   const char *rev_opcode;
2699 
2700   if (reversed)
2701     code = reverse_condition (code);
2702 
2703   switch (code)
2704     {
2705     case NE:
2706       opcode = "bnez";
2707       rev_opcode = "beqz";
2708       break;
2709     case EQ:
2710       opcode = "beqz";
2711       rev_opcode = "bnez";
2712       break;
2713     case GE:
2714       opcode = "bgez";
2715       rev_opcode = "bltz";
2716       break;
2717     case GT:
2718       opcode = "bgtz";
2719       rev_opcode = "blez";
2720       break;
2721     case LE:
2722       opcode = "blez";
2723       rev_opcode = "bgtz";
2724       break;
2725     case LT:
2726       opcode = "bltz";
2727       rev_opcode = "bgez";
2728       break;
2729     default:
2730       gcc_unreachable ();
2731     }
2732 
2733   return tilegx_output_cbranch_with_opcode (insn, operands, opcode,
2734 					    rev_opcode, 2);
2735 }
2736 
2737 
2738 /* Implement the tablejump pattern.  */
2739 void
tilegx_expand_tablejump(rtx op0,rtx op1)2740 tilegx_expand_tablejump (rtx op0, rtx op1)
2741 {
2742   if (flag_pic)
2743     {
2744       rtx temp = gen_reg_rtx (Pmode);
2745       rtx temp2 = gen_reg_rtx (Pmode);
2746 
2747       tilegx_compute_pcrel_address (temp, gen_rtx_LABEL_REF (Pmode, op1));
2748       emit_move_insn (temp2,
2749 		      gen_rtx_PLUS (Pmode,
2750 				    convert_to_mode (Pmode, op0, false),
2751 				    temp));
2752       op0 = temp2;
2753     }
2754 
2755   emit_jump_insn (gen_tablejump_aux (op0, op1));
2756 }
2757 
2758 
2759 /* Emit barrier before an atomic, as needed for the memory MODEL.  */
2760 void
tilegx_pre_atomic_barrier(enum memmodel model)2761 tilegx_pre_atomic_barrier (enum memmodel model)
2762 {
2763   if (need_atomic_barrier_p (model, true))
2764     emit_insn (gen_memory_barrier ());
2765 }
2766 
2767 
2768 /* Emit barrier after an atomic, as needed for the memory MODEL.  */
2769 void
tilegx_post_atomic_barrier(enum memmodel model)2770 tilegx_post_atomic_barrier (enum memmodel model)
2771 {
2772   if (need_atomic_barrier_p (model, false))
2773     emit_insn (gen_memory_barrier ());
2774 }
2775 
2776 
2777 
2778 /* Expand a builtin vector binary op, by calling gen function GEN with
2779    operands in the proper modes.  DEST is converted to DEST_MODE, and
2780    src0 and src1 (if DO_SRC1 is true) is converted to SRC_MODE.  */
2781 void
tilegx_expand_builtin_vector_binop(rtx (* gen)(rtx,rtx,rtx),machine_mode dest_mode,rtx dest,machine_mode src_mode,rtx src0,rtx src1,bool do_src1)2782 tilegx_expand_builtin_vector_binop (rtx (*gen) (rtx, rtx, rtx),
2783 				    machine_mode dest_mode,
2784 				    rtx dest,
2785 				    machine_mode src_mode,
2786 				    rtx src0, rtx src1, bool do_src1)
2787 {
2788   dest = gen_lowpart (dest_mode, dest);
2789 
2790   if (src0 == const0_rtx)
2791     src0 = CONST0_RTX (src_mode);
2792   else
2793     src0 = gen_lowpart (src_mode, src0);
2794 
2795   if (do_src1)
2796     {
2797       if (src1 == const0_rtx)
2798 	src1 = CONST0_RTX (src_mode);
2799       else
2800 	src1 = gen_lowpart (src_mode, src1);
2801     }
2802 
2803   emit_insn ((*gen) (dest, src0, src1));
2804 }
2805 
2806 
2807 
2808 /* Intrinsics  */
2809 
2810 
2811 struct tile_builtin_info
2812 {
2813   enum insn_code icode;
2814   tree fndecl;
2815 };
2816 
2817 static struct tile_builtin_info tilegx_builtin_info[TILEGX_BUILTIN_max] = {
2818   { CODE_FOR_adddi3,                    NULL }, /* add */
2819   { CODE_FOR_addsi3,                    NULL }, /* addx */
2820   { CODE_FOR_ssaddsi3,                  NULL }, /* addxsc */
2821   { CODE_FOR_anddi3,                    NULL }, /* and */
2822   { CODE_FOR_insn_bfexts,               NULL }, /* bfexts */
2823   { CODE_FOR_insn_bfextu,               NULL }, /* bfextu */
2824   { CODE_FOR_insn_bfins,                NULL }, /* bfins */
2825   { CODE_FOR_clzdi2,                    NULL }, /* clz */
2826   { CODE_FOR_insn_cmoveqz,              NULL }, /* cmoveqz */
2827   { CODE_FOR_insn_cmovnez,              NULL }, /* cmovnez */
2828   { CODE_FOR_insn_cmpeq_didi,           NULL }, /* cmpeq */
2829   { CODE_FOR_insn_cmpexch,              NULL }, /* cmpexch */
2830   { CODE_FOR_insn_cmpexch4,             NULL }, /* cmpexch4 */
2831   { CODE_FOR_insn_cmples_didi,          NULL }, /* cmples */
2832   { CODE_FOR_insn_cmpleu_didi,          NULL }, /* cmpleu */
2833   { CODE_FOR_insn_cmplts_didi,          NULL }, /* cmplts */
2834   { CODE_FOR_insn_cmpltu_didi,          NULL }, /* cmpltu */
2835   { CODE_FOR_insn_cmpne_didi,           NULL }, /* cmpne */
2836   { CODE_FOR_insn_cmul,                 NULL }, /* cmul */
2837   { CODE_FOR_insn_cmula,                NULL }, /* cmula */
2838   { CODE_FOR_insn_cmulaf,               NULL }, /* cmulaf */
2839   { CODE_FOR_insn_cmulf,                NULL }, /* cmulf */
2840   { CODE_FOR_insn_cmulfr,               NULL }, /* cmulfr */
2841   { CODE_FOR_insn_cmulh,                NULL }, /* cmulh */
2842   { CODE_FOR_insn_cmulhr,               NULL }, /* cmulhr */
2843   { CODE_FOR_insn_crc32_32,             NULL }, /* crc32_32 */
2844   { CODE_FOR_insn_crc32_8,              NULL }, /* crc32_8 */
2845   { CODE_FOR_ctzdi2,                    NULL }, /* ctz */
2846   { CODE_FOR_insn_dblalign,             NULL }, /* dblalign */
2847   { CODE_FOR_insn_dblalign2,            NULL }, /* dblalign2 */
2848   { CODE_FOR_insn_dblalign4,            NULL }, /* dblalign4 */
2849   { CODE_FOR_insn_dblalign6,            NULL }, /* dblalign6 */
2850   { CODE_FOR_insn_drain,                NULL }, /* drain */
2851   { CODE_FOR_insn_dtlbpr,               NULL }, /* dtlbpr */
2852   { CODE_FOR_insn_exch,                 NULL }, /* exch */
2853   { CODE_FOR_insn_exch4,                NULL }, /* exch4 */
2854   { CODE_FOR_insn_fdouble_add_flags,    NULL }, /* fdouble_add_flags */
2855   { CODE_FOR_insn_fdouble_addsub,       NULL }, /* fdouble_addsub */
2856   { CODE_FOR_insn_fdouble_mul_flags,    NULL }, /* fdouble_mul_flags */
2857   { CODE_FOR_insn_fdouble_pack1,        NULL }, /* fdouble_pack1 */
2858   { CODE_FOR_insn_fdouble_pack2,        NULL }, /* fdouble_pack2 */
2859   { CODE_FOR_insn_fdouble_sub_flags,    NULL }, /* fdouble_sub_flags */
2860   { CODE_FOR_insn_fdouble_unpack_max,   NULL }, /* fdouble_unpack_max */
2861   { CODE_FOR_insn_fdouble_unpack_min,   NULL }, /* fdouble_unpack_min */
2862   { CODE_FOR_insn_fetchadd,             NULL }, /* fetchadd */
2863   { CODE_FOR_insn_fetchadd4,            NULL }, /* fetchadd4 */
2864   { CODE_FOR_insn_fetchaddgez,          NULL }, /* fetchaddgez */
2865   { CODE_FOR_insn_fetchaddgez4,         NULL }, /* fetchaddgez4 */
2866   { CODE_FOR_insn_fetchand,             NULL }, /* fetchand */
2867   { CODE_FOR_insn_fetchand4,            NULL }, /* fetchand4 */
2868   { CODE_FOR_insn_fetchor,              NULL }, /* fetchor */
2869   { CODE_FOR_insn_fetchor4,             NULL }, /* fetchor4 */
2870   { CODE_FOR_insn_finv,                 NULL }, /* finv */
2871   { CODE_FOR_insn_flush,                NULL }, /* flush */
2872   { CODE_FOR_insn_flushwb,              NULL }, /* flushwb */
2873   { CODE_FOR_insn_fnop,                 NULL }, /* fnop */
2874   { CODE_FOR_insn_fsingle_add1,         NULL }, /* fsingle_add1 */
2875   { CODE_FOR_insn_fsingle_addsub2,      NULL }, /* fsingle_addsub2 */
2876   { CODE_FOR_insn_fsingle_mul1,         NULL }, /* fsingle_mul1 */
2877   { CODE_FOR_insn_fsingle_mul2,         NULL }, /* fsingle_mul2 */
2878   { CODE_FOR_insn_fsingle_pack1,        NULL }, /* fsingle_pack1 */
2879   { CODE_FOR_insn_fsingle_pack2,        NULL }, /* fsingle_pack2 */
2880   { CODE_FOR_insn_fsingle_sub1,         NULL }, /* fsingle_sub1 */
2881   { CODE_FOR_insn_icoh,                 NULL }, /* icoh */
2882   { CODE_FOR_insn_ill,                  NULL }, /* ill */
2883   { CODE_FOR_insn_info,                 NULL }, /* info */
2884   { CODE_FOR_insn_infol,                NULL }, /* infol */
2885   { CODE_FOR_insn_inv,                  NULL }, /* inv */
2886   { CODE_FOR_insn_ld,                   NULL }, /* ld */
2887   { CODE_FOR_insn_ld1s,                 NULL }, /* ld1s */
2888   { CODE_FOR_insn_ld1u,                 NULL }, /* ld1u */
2889   { CODE_FOR_insn_ld2s,                 NULL }, /* ld2s */
2890   { CODE_FOR_insn_ld2u,                 NULL }, /* ld2u */
2891   { CODE_FOR_insn_ld4s,                 NULL }, /* ld4s */
2892   { CODE_FOR_insn_ld4u,                 NULL }, /* ld4u */
2893   { CODE_FOR_insn_ldna,                 NULL }, /* ldna */
2894   { CODE_FOR_insn_ldnt,                 NULL }, /* ldnt */
2895   { CODE_FOR_insn_ldnt1s,               NULL }, /* ldnt1s */
2896   { CODE_FOR_insn_ldnt1u,               NULL }, /* ldnt1u */
2897   { CODE_FOR_insn_ldnt2s,               NULL }, /* ldnt2s */
2898   { CODE_FOR_insn_ldnt2u,               NULL }, /* ldnt2u */
2899   { CODE_FOR_insn_ldnt4s,               NULL }, /* ldnt4s */
2900   { CODE_FOR_insn_ldnt4u,               NULL }, /* ldnt4u */
2901   { CODE_FOR_insn_ld_L2,                NULL }, /* ld_L2 */
2902   { CODE_FOR_insn_ld1s_L2,              NULL }, /* ld1s_L2 */
2903   { CODE_FOR_insn_ld1u_L2,              NULL }, /* ld1u_L2 */
2904   { CODE_FOR_insn_ld2s_L2,              NULL }, /* ld2s_L2 */
2905   { CODE_FOR_insn_ld2u_L2,              NULL }, /* ld2u_L2 */
2906   { CODE_FOR_insn_ld4s_L2,              NULL }, /* ld4s_L2 */
2907   { CODE_FOR_insn_ld4u_L2,              NULL }, /* ld4u_L2 */
2908   { CODE_FOR_insn_ldna_L2,              NULL }, /* ldna_L2 */
2909   { CODE_FOR_insn_ldnt_L2,              NULL }, /* ldnt_L2 */
2910   { CODE_FOR_insn_ldnt1s_L2,            NULL }, /* ldnt1s_L2 */
2911   { CODE_FOR_insn_ldnt1u_L2,            NULL }, /* ldnt1u_L2 */
2912   { CODE_FOR_insn_ldnt2s_L2,            NULL }, /* ldnt2s_L2 */
2913   { CODE_FOR_insn_ldnt2u_L2,            NULL }, /* ldnt2u_L2 */
2914   { CODE_FOR_insn_ldnt4s_L2,            NULL }, /* ldnt4s_L2 */
2915   { CODE_FOR_insn_ldnt4u_L2,            NULL }, /* ldnt4u_L2 */
2916   { CODE_FOR_insn_ld_miss,              NULL }, /* ld_miss */
2917   { CODE_FOR_insn_ld1s_miss,            NULL }, /* ld1s_miss */
2918   { CODE_FOR_insn_ld1u_miss,            NULL }, /* ld1u_miss */
2919   { CODE_FOR_insn_ld2s_miss,            NULL }, /* ld2s_miss */
2920   { CODE_FOR_insn_ld2u_miss,            NULL }, /* ld2u_miss */
2921   { CODE_FOR_insn_ld4s_miss,            NULL }, /* ld4s_miss */
2922   { CODE_FOR_insn_ld4u_miss,            NULL }, /* ld4u_miss */
2923   { CODE_FOR_insn_ldna_miss,            NULL }, /* ldna_miss */
2924   { CODE_FOR_insn_ldnt_miss,            NULL }, /* ldnt_miss */
2925   { CODE_FOR_insn_ldnt1s_miss,          NULL }, /* ldnt1s_miss */
2926   { CODE_FOR_insn_ldnt1u_miss,          NULL }, /* ldnt1u_miss */
2927   { CODE_FOR_insn_ldnt2s_miss,          NULL }, /* ldnt2s_miss */
2928   { CODE_FOR_insn_ldnt2u_miss,          NULL }, /* ldnt2u_miss */
2929   { CODE_FOR_insn_ldnt4s_miss,          NULL }, /* ldnt4s_miss */
2930   { CODE_FOR_insn_ldnt4u_miss,          NULL }, /* ldnt4u_miss */
2931   { CODE_FOR_insn_lnk,                  NULL }, /* lnk */
2932   { CODE_FOR_memory_barrier,            NULL }, /* mf */
2933   { CODE_FOR_insn_mfspr,                NULL }, /* mfspr */
2934   { CODE_FOR_insn_mm,                   NULL }, /* mm */
2935   { CODE_FOR_insn_mnz,                  NULL }, /* mnz */
2936   { CODE_FOR_movdi,                     NULL }, /* move */
2937   { CODE_FOR_insn_mtspr,                NULL }, /* mtspr */
2938   { CODE_FOR_insn_mul_hs_hs,            NULL }, /* mul_hs_hs */
2939   { CODE_FOR_insn_mul_hs_hu,            NULL }, /* mul_hs_hu */
2940   { CODE_FOR_insn_mul_hs_ls,            NULL }, /* mul_hs_ls */
2941   { CODE_FOR_insn_mul_hs_lu,            NULL }, /* mul_hs_lu */
2942   { CODE_FOR_insn_mul_hu_hu,            NULL }, /* mul_hu_hu */
2943   { CODE_FOR_insn_mul_hu_ls,            NULL }, /* mul_hu_ls */
2944   { CODE_FOR_insn_mul_hu_lu,            NULL }, /* mul_hu_lu */
2945   { CODE_FOR_insn_mul_ls_ls,            NULL }, /* mul_ls_ls */
2946   { CODE_FOR_insn_mul_ls_lu,            NULL }, /* mul_ls_lu */
2947   { CODE_FOR_insn_mul_lu_lu,            NULL }, /* mul_lu_lu */
2948   { CODE_FOR_insn_mula_hs_hs,           NULL }, /* mula_hs_hs */
2949   { CODE_FOR_insn_mula_hs_hu,           NULL }, /* mula_hs_hu */
2950   { CODE_FOR_insn_mula_hs_ls,           NULL }, /* mula_hs_ls */
2951   { CODE_FOR_insn_mula_hs_lu,           NULL }, /* mula_hs_lu */
2952   { CODE_FOR_insn_mula_hu_hu,           NULL }, /* mula_hu_hu */
2953   { CODE_FOR_insn_mula_hu_ls,           NULL }, /* mula_hu_ls */
2954   { CODE_FOR_insn_mula_hu_lu,           NULL }, /* mula_hu_lu */
2955   { CODE_FOR_insn_mula_ls_ls,           NULL }, /* mula_ls_ls */
2956   { CODE_FOR_insn_mula_ls_lu,           NULL }, /* mula_ls_lu */
2957   { CODE_FOR_insn_mula_lu_lu,           NULL }, /* mula_lu_lu */
2958   { CODE_FOR_insn_mulax,                NULL }, /* mulax */
2959   { CODE_FOR_mulsi3,                    NULL }, /* mulx */
2960   { CODE_FOR_insn_mz,                   NULL }, /* mz */
2961   { CODE_FOR_insn_nap,                  NULL }, /* nap */
2962   { CODE_FOR_nop,                       NULL }, /* nop */
2963   { CODE_FOR_insn_nor_di,               NULL }, /* nor */
2964   { CODE_FOR_iordi3,                    NULL }, /* or */
2965   { CODE_FOR_popcountdi2,               NULL }, /* pcnt */
2966   { CODE_FOR_insn_prefetch_l1,          NULL }, /* prefetch_l1 */
2967   { CODE_FOR_insn_prefetch_l1_fault,    NULL }, /* prefetch_l1_fault */
2968   { CODE_FOR_insn_prefetch_l2,          NULL }, /* prefetch_l2 */
2969   { CODE_FOR_insn_prefetch_l2_fault,    NULL }, /* prefetch_l2_fault */
2970   { CODE_FOR_insn_prefetch_l3,          NULL }, /* prefetch_l3 */
2971   { CODE_FOR_insn_prefetch_l3_fault,    NULL }, /* prefetch_l3_fault */
2972   { CODE_FOR_insn_revbits,              NULL }, /* revbits */
2973   { CODE_FOR_bswapdi2,                  NULL }, /* revbytes */
2974   { CODE_FOR_rotldi3,                   NULL }, /* rotl */
2975   { CODE_FOR_ashldi3,                   NULL }, /* shl */
2976   { CODE_FOR_insn_shl16insli,           NULL }, /* shl16insli */
2977   { CODE_FOR_insn_shl1add,              NULL }, /* shl1add */
2978   { CODE_FOR_insn_shl1addx,             NULL }, /* shl1addx */
2979   { CODE_FOR_insn_shl2add,              NULL }, /* shl2add */
2980   { CODE_FOR_insn_shl2addx,             NULL }, /* shl2addx */
2981   { CODE_FOR_insn_shl3add,              NULL }, /* shl3add */
2982   { CODE_FOR_insn_shl3addx,             NULL }, /* shl3addx */
2983   { CODE_FOR_ashlsi3,                   NULL }, /* shlx */
2984   { CODE_FOR_ashrdi3,                   NULL }, /* shrs */
2985   { CODE_FOR_lshrdi3,                   NULL }, /* shru */
2986   { CODE_FOR_lshrsi3,                   NULL }, /* shrux */
2987   { CODE_FOR_insn_shufflebytes,         NULL }, /* shufflebytes */
2988   { CODE_FOR_insn_shufflebytes1,        NULL }, /* shufflebytes1 */
2989   { CODE_FOR_insn_st,                   NULL }, /* st */
2990   { CODE_FOR_insn_st1,                  NULL }, /* st1 */
2991   { CODE_FOR_insn_st2,                  NULL }, /* st2 */
2992   { CODE_FOR_insn_st4,                  NULL }, /* st4 */
2993   { CODE_FOR_insn_stnt,                 NULL }, /* stnt */
2994   { CODE_FOR_insn_stnt1,                NULL }, /* stnt1 */
2995   { CODE_FOR_insn_stnt2,                NULL }, /* stnt2 */
2996   { CODE_FOR_insn_stnt4,                NULL }, /* stnt4 */
2997   { CODE_FOR_subdi3,                    NULL }, /* sub */
2998   { CODE_FOR_subsi3,                    NULL }, /* subx */
2999   { CODE_FOR_sssubsi3,                  NULL }, /* subxsc */
3000   { CODE_FOR_insn_tblidxb0,             NULL }, /* tblidxb0 */
3001   { CODE_FOR_insn_tblidxb1,             NULL }, /* tblidxb1 */
3002   { CODE_FOR_insn_tblidxb2,             NULL }, /* tblidxb2 */
3003   { CODE_FOR_insn_tblidxb3,             NULL }, /* tblidxb3 */
3004   { CODE_FOR_insn_v1add,                NULL }, /* v1add */
3005   { CODE_FOR_insn_v1addi,               NULL }, /* v1addi */
3006   { CODE_FOR_insn_v1adduc,              NULL }, /* v1adduc */
3007   { CODE_FOR_insn_v1adiffu,             NULL }, /* v1adiffu */
3008   { CODE_FOR_insn_v1avgu,               NULL }, /* v1avgu */
3009   { CODE_FOR_insn_v1cmpeq,              NULL }, /* v1cmpeq */
3010   { CODE_FOR_insn_v1cmpeqi,             NULL }, /* v1cmpeqi */
3011   { CODE_FOR_insn_v1cmples,             NULL }, /* v1cmples */
3012   { CODE_FOR_insn_v1cmpleu,             NULL }, /* v1cmpleu */
3013   { CODE_FOR_insn_v1cmplts,             NULL }, /* v1cmplts */
3014   { CODE_FOR_insn_v1cmpltsi,            NULL }, /* v1cmpltsi */
3015   { CODE_FOR_insn_v1cmpltu,             NULL }, /* v1cmpltu */
3016   { CODE_FOR_insn_v1cmpltui,            NULL }, /* v1cmpltui */
3017   { CODE_FOR_insn_v1cmpne,              NULL }, /* v1cmpne */
3018   { CODE_FOR_insn_v1ddotpu,             NULL }, /* v1ddotpu */
3019   { CODE_FOR_insn_v1ddotpua,            NULL }, /* v1ddotpua */
3020   { CODE_FOR_insn_v1ddotpus,            NULL }, /* v1ddotpus */
3021   { CODE_FOR_insn_v1ddotpusa,           NULL }, /* v1ddotpusa */
3022   { CODE_FOR_insn_v1dotp,               NULL }, /* v1dotp */
3023   { CODE_FOR_insn_v1dotpa,              NULL }, /* v1dotpa */
3024   { CODE_FOR_insn_v1dotpu,              NULL }, /* v1dotpu */
3025   { CODE_FOR_insn_v1dotpua,             NULL }, /* v1dotpua */
3026   { CODE_FOR_insn_v1dotpus,             NULL }, /* v1dotpus */
3027   { CODE_FOR_insn_v1dotpusa,            NULL }, /* v1dotpusa */
3028   { CODE_FOR_insn_v1int_h,              NULL }, /* v1int_h */
3029   { CODE_FOR_insn_v1int_l,              NULL }, /* v1int_l */
3030   { CODE_FOR_insn_v1maxu,               NULL }, /* v1maxu */
3031   { CODE_FOR_insn_v1maxui,              NULL }, /* v1maxui */
3032   { CODE_FOR_insn_v1minu,               NULL }, /* v1minu */
3033   { CODE_FOR_insn_v1minui,              NULL }, /* v1minui */
3034   { CODE_FOR_insn_v1mnz,                NULL }, /* v1mnz */
3035   { CODE_FOR_insn_v1multu,              NULL }, /* v1multu */
3036   { CODE_FOR_insn_v1mulu,               NULL }, /* v1mulu */
3037   { CODE_FOR_insn_v1mulus,              NULL }, /* v1mulus */
3038   { CODE_FOR_insn_v1mz,                 NULL }, /* v1mz */
3039   { CODE_FOR_insn_v1sadau,              NULL }, /* v1sadau */
3040   { CODE_FOR_insn_v1sadu,               NULL }, /* v1sadu */
3041   { CODE_FOR_insn_v1shl,                NULL }, /* v1shl */
3042   { CODE_FOR_insn_v1shl,                NULL }, /* v1shli */
3043   { CODE_FOR_insn_v1shrs,               NULL }, /* v1shrs */
3044   { CODE_FOR_insn_v1shrs,               NULL }, /* v1shrsi */
3045   { CODE_FOR_insn_v1shru,               NULL }, /* v1shru */
3046   { CODE_FOR_insn_v1shru,               NULL }, /* v1shrui */
3047   { CODE_FOR_insn_v1sub,                NULL }, /* v1sub */
3048   { CODE_FOR_insn_v1subuc,              NULL }, /* v1subuc */
3049   { CODE_FOR_insn_v2add,                NULL }, /* v2add */
3050   { CODE_FOR_insn_v2addi,               NULL }, /* v2addi */
3051   { CODE_FOR_insn_v2addsc,              NULL }, /* v2addsc */
3052   { CODE_FOR_insn_v2adiffs,             NULL }, /* v2adiffs */
3053   { CODE_FOR_insn_v2avgs,               NULL }, /* v2avgs */
3054   { CODE_FOR_insn_v2cmpeq,              NULL }, /* v2cmpeq */
3055   { CODE_FOR_insn_v2cmpeqi,             NULL }, /* v2cmpeqi */
3056   { CODE_FOR_insn_v2cmples,             NULL }, /* v2cmples */
3057   { CODE_FOR_insn_v2cmpleu,             NULL }, /* v2cmpleu */
3058   { CODE_FOR_insn_v2cmplts,             NULL }, /* v2cmplts */
3059   { CODE_FOR_insn_v2cmpltsi,            NULL }, /* v2cmpltsi */
3060   { CODE_FOR_insn_v2cmpltu,             NULL }, /* v2cmpltu */
3061   { CODE_FOR_insn_v2cmpltui,            NULL }, /* v2cmpltui */
3062   { CODE_FOR_insn_v2cmpne,              NULL }, /* v2cmpne */
3063   { CODE_FOR_insn_v2dotp,               NULL }, /* v2dotp */
3064   { CODE_FOR_insn_v2dotpa,              NULL }, /* v2dotpa */
3065   { CODE_FOR_insn_v2int_h,              NULL }, /* v2int_h */
3066   { CODE_FOR_insn_v2int_l,              NULL }, /* v2int_l */
3067   { CODE_FOR_insn_v2maxs,               NULL }, /* v2maxs */
3068   { CODE_FOR_insn_v2maxsi,              NULL }, /* v2maxsi */
3069   { CODE_FOR_insn_v2mins,               NULL }, /* v2mins */
3070   { CODE_FOR_insn_v2minsi,              NULL }, /* v2minsi */
3071   { CODE_FOR_insn_v2mnz,                NULL }, /* v2mnz */
3072   { CODE_FOR_insn_v2mulfsc,             NULL }, /* v2mulfsc */
3073   { CODE_FOR_insn_v2muls,               NULL }, /* v2muls */
3074   { CODE_FOR_insn_v2mults,              NULL }, /* v2mults */
3075   { CODE_FOR_insn_v2mz,                 NULL }, /* v2mz */
3076   { CODE_FOR_insn_v2packh,              NULL }, /* v2packh */
3077   { CODE_FOR_insn_v2packl,              NULL }, /* v2packl */
3078   { CODE_FOR_insn_v2packuc,             NULL }, /* v2packuc */
3079   { CODE_FOR_insn_v2sadas,              NULL }, /* v2sadas */
3080   { CODE_FOR_insn_v2sadau,              NULL }, /* v2sadau */
3081   { CODE_FOR_insn_v2sads,               NULL }, /* v2sads */
3082   { CODE_FOR_insn_v2sadu,               NULL }, /* v2sadu */
3083   { CODE_FOR_insn_v2shl,                NULL }, /* v2shl */
3084   { CODE_FOR_insn_v2shl,                NULL }, /* v2shli */
3085   { CODE_FOR_insn_v2shlsc,              NULL }, /* v2shlsc */
3086   { CODE_FOR_insn_v2shrs,               NULL }, /* v2shrs */
3087   { CODE_FOR_insn_v2shrs,               NULL }, /* v2shrsi */
3088   { CODE_FOR_insn_v2shru,               NULL }, /* v2shru */
3089   { CODE_FOR_insn_v2shru,               NULL }, /* v2shrui */
3090   { CODE_FOR_insn_v2sub,                NULL }, /* v2sub */
3091   { CODE_FOR_insn_v2subsc,              NULL }, /* v2subsc */
3092   { CODE_FOR_insn_v4add,                NULL }, /* v4add */
3093   { CODE_FOR_insn_v4addsc,              NULL }, /* v4addsc */
3094   { CODE_FOR_insn_v4int_h,              NULL }, /* v4int_h */
3095   { CODE_FOR_insn_v4int_l,              NULL }, /* v4int_l */
3096   { CODE_FOR_insn_v4packsc,             NULL }, /* v4packsc */
3097   { CODE_FOR_insn_v4shl,                NULL }, /* v4shl */
3098   { CODE_FOR_insn_v4shlsc,              NULL }, /* v4shlsc */
3099   { CODE_FOR_insn_v4shrs,               NULL }, /* v4shrs */
3100   { CODE_FOR_insn_v4shru,               NULL }, /* v4shru */
3101   { CODE_FOR_insn_v4sub,                NULL }, /* v4sub */
3102   { CODE_FOR_insn_v4subsc,              NULL }, /* v4subsc */
3103   { CODE_FOR_insn_wh64,                 NULL }, /* wh64 */
3104   { CODE_FOR_xordi3,                    NULL }, /* xor */
3105   { CODE_FOR_tilegx_network_barrier,    NULL }, /* network_barrier */
3106   { CODE_FOR_tilegx_idn0_receive,       NULL }, /* idn0_receive */
3107   { CODE_FOR_tilegx_idn1_receive,       NULL }, /* idn1_receive */
3108   { CODE_FOR_tilegx_idn_send,           NULL }, /* idn_send */
3109   { CODE_FOR_tilegx_udn0_receive,       NULL }, /* udn0_receive */
3110   { CODE_FOR_tilegx_udn1_receive,       NULL }, /* udn1_receive */
3111   { CODE_FOR_tilegx_udn2_receive,       NULL }, /* udn2_receive */
3112   { CODE_FOR_tilegx_udn3_receive,       NULL }, /* udn3_receive */
3113   { CODE_FOR_tilegx_udn_send,           NULL }, /* udn_send */
3114 };
3115 
3116 
3117 struct tilegx_builtin_def
3118 {
3119   const char *name;
3120   enum tilegx_builtin code;
3121   bool is_const;
3122   /* The first character is the return type.  Subsequent characters
3123      are the argument types. See char_to_type.  */
3124   const char *type;
3125 };
3126 
3127 
3128 static const struct tilegx_builtin_def tilegx_builtins[] = {
3129   { "__insn_add",                TILEGX_INSN_ADD,                true,  "lll"  },
3130   { "__insn_addi",               TILEGX_INSN_ADD,                true,  "lll"  },
3131   { "__insn_addli",              TILEGX_INSN_ADD,                true,  "lll"  },
3132   { "__insn_addx",               TILEGX_INSN_ADDX,               true,  "iii"  },
3133   { "__insn_addxi",              TILEGX_INSN_ADDX,               true,  "iii"  },
3134   { "__insn_addxli",             TILEGX_INSN_ADDX,               true,  "iii"  },
3135   { "__insn_addxsc",             TILEGX_INSN_ADDXSC,             true,  "iii"  },
3136   { "__insn_and",                TILEGX_INSN_AND,                true,  "lll"  },
3137   { "__insn_andi",               TILEGX_INSN_AND,                true,  "lll"  },
3138   { "__insn_bfexts",             TILEGX_INSN_BFEXTS,             true,  "llll" },
3139   { "__insn_bfextu",             TILEGX_INSN_BFEXTU,             true,  "llll" },
3140   { "__insn_bfins",              TILEGX_INSN_BFINS,              true,  "lllll"},
3141   { "__insn_clz",                TILEGX_INSN_CLZ,                true,  "ll"   },
3142   { "__insn_cmoveqz",            TILEGX_INSN_CMOVEQZ,            true,  "llll" },
3143   { "__insn_cmovnez",            TILEGX_INSN_CMOVNEZ,            true,  "llll" },
3144   { "__insn_cmpeq",              TILEGX_INSN_CMPEQ,              true,  "lll"  },
3145   { "__insn_cmpeqi",             TILEGX_INSN_CMPEQ,              true,  "lll"  },
3146   { "__insn_cmpexch",            TILEGX_INSN_CMPEXCH,            false, "lpl"  },
3147   { "__insn_cmpexch4",           TILEGX_INSN_CMPEXCH4,           false, "ipi"  },
3148   { "__insn_cmples",             TILEGX_INSN_CMPLES,             true,  "lll"  },
3149   { "__insn_cmpleu",             TILEGX_INSN_CMPLEU,             true,  "lll"  },
3150   { "__insn_cmplts",             TILEGX_INSN_CMPLTS,             true,  "lll"  },
3151   { "__insn_cmpltsi",            TILEGX_INSN_CMPLTS,             true,  "lll"  },
3152   { "__insn_cmpltu",             TILEGX_INSN_CMPLTU,             true,  "lll"  },
3153   { "__insn_cmpltui",            TILEGX_INSN_CMPLTU,             true,  "lll"  },
3154   { "__insn_cmpne",              TILEGX_INSN_CMPNE,              true,  "lll"  },
3155   { "__insn_cmul",               TILEGX_INSN_CMUL,               true,  "lll"  },
3156   { "__insn_cmula",              TILEGX_INSN_CMULA,              true,  "llll" },
3157   { "__insn_cmulaf",             TILEGX_INSN_CMULAF,             true,  "llll" },
3158   { "__insn_cmulf",              TILEGX_INSN_CMULF,              true,  "lll"  },
3159   { "__insn_cmulfr",             TILEGX_INSN_CMULFR,             true,  "lll"  },
3160   { "__insn_cmulh",              TILEGX_INSN_CMULH,              true,  "lll"  },
3161   { "__insn_cmulhr",             TILEGX_INSN_CMULHR,             true,  "lll"  },
3162   { "__insn_crc32_32",           TILEGX_INSN_CRC32_32,           true,  "lll"  },
3163   { "__insn_crc32_8",            TILEGX_INSN_CRC32_8,            true,  "lll"  },
3164   { "__insn_ctz",                TILEGX_INSN_CTZ,                true,  "ll"   },
3165   { "__insn_dblalign",           TILEGX_INSN_DBLALIGN,           true,  "lllk" },
3166   { "__insn_dblalign2",          TILEGX_INSN_DBLALIGN2,          true,  "lll"  },
3167   { "__insn_dblalign4",          TILEGX_INSN_DBLALIGN4,          true,  "lll"  },
3168   { "__insn_dblalign6",          TILEGX_INSN_DBLALIGN6,          true,  "lll"  },
3169   { "__insn_drain",              TILEGX_INSN_DRAIN,              false, "v"    },
3170   { "__insn_dtlbpr",             TILEGX_INSN_DTLBPR,             false, "vl"   },
3171   { "__insn_exch",               TILEGX_INSN_EXCH,               false, "lpl"  },
3172   { "__insn_exch4",              TILEGX_INSN_EXCH4,              false, "ipi"  },
3173   { "__insn_fdouble_add_flags",  TILEGX_INSN_FDOUBLE_ADD_FLAGS,  true,  "lll"  },
3174   { "__insn_fdouble_addsub",     TILEGX_INSN_FDOUBLE_ADDSUB,     true,  "llll" },
3175   { "__insn_fdouble_mul_flags",  TILEGX_INSN_FDOUBLE_MUL_FLAGS,  true,  "lll"  },
3176   { "__insn_fdouble_pack1",      TILEGX_INSN_FDOUBLE_PACK1,      true,  "lll"  },
3177   { "__insn_fdouble_pack2",      TILEGX_INSN_FDOUBLE_PACK2,      true,  "llll" },
3178   { "__insn_fdouble_sub_flags",  TILEGX_INSN_FDOUBLE_SUB_FLAGS,  true,  "lll"  },
3179   { "__insn_fdouble_unpack_max", TILEGX_INSN_FDOUBLE_UNPACK_MAX, true,  "lll"  },
3180   { "__insn_fdouble_unpack_min", TILEGX_INSN_FDOUBLE_UNPACK_MIN, true,  "lll"  },
3181   { "__insn_fetchadd",           TILEGX_INSN_FETCHADD,           false, "lpl"  },
3182   { "__insn_fetchadd4",          TILEGX_INSN_FETCHADD4,          false, "ipi"  },
3183   { "__insn_fetchaddgez",        TILEGX_INSN_FETCHADDGEZ,        false, "lpl"  },
3184   { "__insn_fetchaddgez4",       TILEGX_INSN_FETCHADDGEZ4,       false, "ipi"  },
3185   { "__insn_fetchand",           TILEGX_INSN_FETCHAND,           false, "lpl"  },
3186   { "__insn_fetchand4",          TILEGX_INSN_FETCHAND4,          false, "ipi"  },
3187   { "__insn_fetchor",            TILEGX_INSN_FETCHOR,            false, "lpl"  },
3188   { "__insn_fetchor4",           TILEGX_INSN_FETCHOR4,           false, "ipi"  },
3189   { "__insn_finv",               TILEGX_INSN_FINV,               false, "vk"   },
3190   { "__insn_flush",              TILEGX_INSN_FLUSH,              false, "vk"   },
3191   { "__insn_flushwb",            TILEGX_INSN_FLUSHWB,            false, "v"    },
3192   { "__insn_fnop",               TILEGX_INSN_FNOP,               false, "v"    },
3193   { "__insn_fsingle_add1",       TILEGX_INSN_FSINGLE_ADD1,       true,  "lll"  },
3194   { "__insn_fsingle_addsub2",    TILEGX_INSN_FSINGLE_ADDSUB2,    true,  "llll" },
3195   { "__insn_fsingle_mul1",       TILEGX_INSN_FSINGLE_MUL1,       true,  "lll"  },
3196   { "__insn_fsingle_mul2",       TILEGX_INSN_FSINGLE_MUL2,       true,  "lll"  },
3197   { "__insn_fsingle_pack1",      TILEGX_INSN_FSINGLE_PACK1,      true,  "ll"   },
3198   { "__insn_fsingle_pack2",      TILEGX_INSN_FSINGLE_PACK2,      true,  "lll"  },
3199   { "__insn_fsingle_sub1",       TILEGX_INSN_FSINGLE_SUB1,       true,  "lll"  },
3200   { "__insn_icoh",               TILEGX_INSN_ICOH,               false, "vk"   },
3201   { "__insn_ill",                TILEGX_INSN_ILL,                false, "v"    },
3202   { "__insn_info",               TILEGX_INSN_INFO,               false, "vl"   },
3203   { "__insn_infol",              TILEGX_INSN_INFOL,              false, "vl"   },
3204   { "__insn_inv",                TILEGX_INSN_INV,                false, "vp"   },
3205   { "__insn_ld",                 TILEGX_INSN_LD,                 false, "lk"   },
3206   { "__insn_ld1s",               TILEGX_INSN_LD1S,               false, "lk"   },
3207   { "__insn_ld1u",               TILEGX_INSN_LD1U,               false, "lk"   },
3208   { "__insn_ld2s",               TILEGX_INSN_LD2S,               false, "lk"   },
3209   { "__insn_ld2u",               TILEGX_INSN_LD2U,               false, "lk"   },
3210   { "__insn_ld4s",               TILEGX_INSN_LD4S,               false, "lk"   },
3211   { "__insn_ld4u",               TILEGX_INSN_LD4U,               false, "lk"   },
3212   { "__insn_ldna",               TILEGX_INSN_LDNA,               false, "lk"   },
3213   { "__insn_ldnt",               TILEGX_INSN_LDNT,               false, "lk"   },
3214   { "__insn_ldnt1s",             TILEGX_INSN_LDNT1S,             false, "lk"   },
3215   { "__insn_ldnt1u",             TILEGX_INSN_LDNT1U,             false, "lk"   },
3216   { "__insn_ldnt2s",             TILEGX_INSN_LDNT2S,             false, "lk"   },
3217   { "__insn_ldnt2u",             TILEGX_INSN_LDNT2U,             false, "lk"   },
3218   { "__insn_ldnt4s",             TILEGX_INSN_LDNT4S,             false, "lk"   },
3219   { "__insn_ldnt4u",             TILEGX_INSN_LDNT4U,             false, "lk"   },
3220   { "__insn_ld_L2",              TILEGX_INSN_LD_L2,              false, "lk"   },
3221   { "__insn_ld1s_L2",            TILEGX_INSN_LD1S_L2,            false, "lk"   },
3222   { "__insn_ld1u_L2",            TILEGX_INSN_LD1U_L2,            false, "lk"   },
3223   { "__insn_ld2s_L2",            TILEGX_INSN_LD2S_L2,            false, "lk"   },
3224   { "__insn_ld2u_L2",            TILEGX_INSN_LD2U_L2,            false, "lk"   },
3225   { "__insn_ld4s_L2",            TILEGX_INSN_LD4S_L2,            false, "lk"   },
3226   { "__insn_ld4u_L2",            TILEGX_INSN_LD4U_L2,            false, "lk"   },
3227   { "__insn_ldna_L2",            TILEGX_INSN_LDNA_L2,            false, "lk"   },
3228   { "__insn_ldnt_L2",            TILEGX_INSN_LDNT_L2,            false, "lk"   },
3229   { "__insn_ldnt1s_L2",          TILEGX_INSN_LDNT1S_L2,          false, "lk"   },
3230   { "__insn_ldnt1u_L2",          TILEGX_INSN_LDNT1U_L2,          false, "lk"   },
3231   { "__insn_ldnt2s_L2",          TILEGX_INSN_LDNT2S_L2,          false, "lk"   },
3232   { "__insn_ldnt2u_L2",          TILEGX_INSN_LDNT2U_L2,          false, "lk"   },
3233   { "__insn_ldnt4s_L2",          TILEGX_INSN_LDNT4S_L2,          false, "lk"   },
3234   { "__insn_ldnt4u_L2",          TILEGX_INSN_LDNT4U_L2,          false, "lk"   },
3235   { "__insn_ld_miss",            TILEGX_INSN_LD_MISS,            false, "lk"   },
3236   { "__insn_ld1s_miss",          TILEGX_INSN_LD1S_MISS,          false, "lk"   },
3237   { "__insn_ld1u_miss",          TILEGX_INSN_LD1U_MISS,          false, "lk"   },
3238   { "__insn_ld2s_miss",          TILEGX_INSN_LD2S_MISS,          false, "lk"   },
3239   { "__insn_ld2u_miss",          TILEGX_INSN_LD2U_MISS,          false, "lk"   },
3240   { "__insn_ld4s_miss",          TILEGX_INSN_LD4S_MISS,          false, "lk"   },
3241   { "__insn_ld4u_miss",          TILEGX_INSN_LD4U_MISS,          false, "lk"   },
3242   { "__insn_ldna_miss",          TILEGX_INSN_LDNA_MISS,          false, "lk"   },
3243   { "__insn_ldnt_miss",          TILEGX_INSN_LDNT_MISS,          false, "lk"   },
3244   { "__insn_ldnt1s_miss",        TILEGX_INSN_LDNT1S_MISS,        false, "lk"   },
3245   { "__insn_ldnt1u_miss",        TILEGX_INSN_LDNT1U_MISS,        false, "lk"   },
3246   { "__insn_ldnt2s_miss",        TILEGX_INSN_LDNT2S_MISS,        false, "lk"   },
3247   { "__insn_ldnt2u_miss",        TILEGX_INSN_LDNT2U_MISS,        false, "lk"   },
3248   { "__insn_ldnt4s_miss",        TILEGX_INSN_LDNT4S_MISS,        false, "lk"   },
3249   { "__insn_ldnt4u_miss",        TILEGX_INSN_LDNT4U_MISS,        false, "lk"   },
3250   { "__insn_lnk",                TILEGX_INSN_LNK,                true,  "l"    },
3251   { "__insn_mf",                 TILEGX_INSN_MF,                 false, "v"    },
3252   { "__insn_mfspr",              TILEGX_INSN_MFSPR,              false, "ll"   },
3253   { "__insn_mm",                 TILEGX_INSN_MM,                 true,  "lllll"},
3254   { "__insn_mnz",                TILEGX_INSN_MNZ,                true,  "lll"  },
3255   { "__insn_move",               TILEGX_INSN_MOVE,               true,  "ll"   },
3256   { "__insn_movei",              TILEGX_INSN_MOVE,               true,  "ll"   },
3257   { "__insn_moveli",             TILEGX_INSN_MOVE,               true,  "ll"   },
3258   { "__insn_mtspr",              TILEGX_INSN_MTSPR,              false, "vll"  },
3259   { "__insn_mul_hs_hs",          TILEGX_INSN_MUL_HS_HS,          true,  "lll"  },
3260   { "__insn_mul_hs_hu",          TILEGX_INSN_MUL_HS_HU,          true,  "lll"  },
3261   { "__insn_mul_hs_ls",          TILEGX_INSN_MUL_HS_LS,          true,  "lll"  },
3262   { "__insn_mul_hs_lu",          TILEGX_INSN_MUL_HS_LU,          true,  "lll"  },
3263   { "__insn_mul_hu_hu",          TILEGX_INSN_MUL_HU_HU,          true,  "lll"  },
3264   { "__insn_mul_hu_ls",          TILEGX_INSN_MUL_HU_LS,          true,  "lll"  },
3265   { "__insn_mul_hu_lu",          TILEGX_INSN_MUL_HU_LU,          true,  "lll"  },
3266   { "__insn_mul_ls_ls",          TILEGX_INSN_MUL_LS_LS,          true,  "lll"  },
3267   { "__insn_mul_ls_lu",          TILEGX_INSN_MUL_LS_LU,          true,  "lll"  },
3268   { "__insn_mul_lu_lu",          TILEGX_INSN_MUL_LU_LU,          true,  "lll"  },
3269   { "__insn_mula_hs_hs",         TILEGX_INSN_MULA_HS_HS,         true,  "llll" },
3270   { "__insn_mula_hs_hu",         TILEGX_INSN_MULA_HS_HU,         true,  "llll" },
3271   { "__insn_mula_hs_ls",         TILEGX_INSN_MULA_HS_LS,         true,  "llll" },
3272   { "__insn_mula_hs_lu",         TILEGX_INSN_MULA_HS_LU,         true,  "llll" },
3273   { "__insn_mula_hu_hu",         TILEGX_INSN_MULA_HU_HU,         true,  "llll" },
3274   { "__insn_mula_hu_ls",         TILEGX_INSN_MULA_HU_LS,         true,  "llll" },
3275   { "__insn_mula_hu_lu",         TILEGX_INSN_MULA_HU_LU,         true,  "llll" },
3276   { "__insn_mula_ls_ls",         TILEGX_INSN_MULA_LS_LS,         true,  "llll" },
3277   { "__insn_mula_ls_lu",         TILEGX_INSN_MULA_LS_LU,         true,  "llll" },
3278   { "__insn_mula_lu_lu",         TILEGX_INSN_MULA_LU_LU,         true,  "llll" },
3279   { "__insn_mulax",              TILEGX_INSN_MULAX,              true,  "iiii" },
3280   { "__insn_mulx",               TILEGX_INSN_MULX,               true,  "iii"  },
3281   { "__insn_mz",                 TILEGX_INSN_MZ,                 true,  "lll"  },
3282   { "__insn_nap",                TILEGX_INSN_NAP,                false, "v"    },
3283   { "__insn_nop",                TILEGX_INSN_NOP,                true,  "v"    },
3284   { "__insn_nor",                TILEGX_INSN_NOR,                true,  "lll"  },
3285   { "__insn_or",                 TILEGX_INSN_OR,                 true,  "lll"  },
3286   { "__insn_ori",                TILEGX_INSN_OR,                 true,  "lll"  },
3287   { "__insn_pcnt",               TILEGX_INSN_PCNT,               true,  "ll"   },
3288   { "__insn_prefetch",           TILEGX_INSN_PREFETCH_L1,        false, "vk"   },
3289   { "__insn_prefetch_l1",        TILEGX_INSN_PREFETCH_L1,        false, "vk"   },
3290   { "__insn_prefetch_l1_fault",  TILEGX_INSN_PREFETCH_L1_FAULT,  false, "vk"   },
3291   { "__insn_prefetch_l2",        TILEGX_INSN_PREFETCH_L2,        false, "vk"   },
3292   { "__insn_prefetch_l2_fault",  TILEGX_INSN_PREFETCH_L2_FAULT,  false, "vk"   },
3293   { "__insn_prefetch_l3",        TILEGX_INSN_PREFETCH_L3,        false, "vk"   },
3294   { "__insn_prefetch_l3_fault",  TILEGX_INSN_PREFETCH_L3_FAULT,  false, "vk"   },
3295   { "__insn_revbits",            TILEGX_INSN_REVBITS,            true,  "ll"   },
3296   { "__insn_revbytes",           TILEGX_INSN_REVBYTES,           true,  "ll"   },
3297   { "__insn_rotl",               TILEGX_INSN_ROTL,               true,  "lli"  },
3298   { "__insn_rotli",              TILEGX_INSN_ROTL,               true,  "lli"  },
3299   { "__insn_shl",                TILEGX_INSN_SHL,                true,  "lli"  },
3300   { "__insn_shl16insli",         TILEGX_INSN_SHL16INSLI,         true,  "lll"  },
3301   { "__insn_shl1add",            TILEGX_INSN_SHL1ADD,            true,  "lll"  },
3302   { "__insn_shl1addx",           TILEGX_INSN_SHL1ADDX,           true,  "iii"  },
3303   { "__insn_shl2add",            TILEGX_INSN_SHL2ADD,            true,  "lll"  },
3304   { "__insn_shl2addx",           TILEGX_INSN_SHL2ADDX,           true,  "iii"  },
3305   { "__insn_shl3add",            TILEGX_INSN_SHL3ADD,            true,  "lll"  },
3306   { "__insn_shl3addx",           TILEGX_INSN_SHL3ADDX,           true,  "iii"  },
3307   { "__insn_shli",               TILEGX_INSN_SHL,                true,  "lli"  },
3308   { "__insn_shlx",               TILEGX_INSN_SHLX,               true,  "iii"  },
3309   { "__insn_shlxi",              TILEGX_INSN_SHLX,               true,  "iii"  },
3310   { "__insn_shrs",               TILEGX_INSN_SHRS,               true,  "lli"  },
3311   { "__insn_shrsi",              TILEGX_INSN_SHRS,               true,  "lli"  },
3312   { "__insn_shru",               TILEGX_INSN_SHRU,               true,  "lli"  },
3313   { "__insn_shrui",              TILEGX_INSN_SHRU,               true,  "lli"  },
3314   { "__insn_shrux",              TILEGX_INSN_SHRUX,              true,  "iii"  },
3315   { "__insn_shruxi",             TILEGX_INSN_SHRUX,              true,  "iii"  },
3316   { "__insn_shufflebytes",       TILEGX_INSN_SHUFFLEBYTES,       true,  "llll" },
3317   { "__insn_shufflebytes1",      TILEGX_INSN_SHUFFLEBYTES1,      true,  "lll"  },
3318   { "__insn_st",                 TILEGX_INSN_ST,                 false, "vpl"  },
3319   { "__insn_st1",                TILEGX_INSN_ST1,                false, "vpl"  },
3320   { "__insn_st2",                TILEGX_INSN_ST2,                false, "vpl"  },
3321   { "__insn_st4",                TILEGX_INSN_ST4,                false, "vpl"  },
3322   { "__insn_stnt",               TILEGX_INSN_STNT,               false, "vpl"  },
3323   { "__insn_stnt1",              TILEGX_INSN_STNT1,              false, "vpl"  },
3324   { "__insn_stnt2",              TILEGX_INSN_STNT2,              false, "vpl"  },
3325   { "__insn_stnt4",              TILEGX_INSN_STNT4,              false, "vpl"  },
3326   { "__insn_sub",                TILEGX_INSN_SUB,                true,  "lll"  },
3327   { "__insn_subx",               TILEGX_INSN_SUBX,               true,  "iii"  },
3328   { "__insn_subxsc",             TILEGX_INSN_SUBXSC,             true,  "iii"  },
3329   { "__insn_tblidxb0",           TILEGX_INSN_TBLIDXB0,           true,  "lll"  },
3330   { "__insn_tblidxb1",           TILEGX_INSN_TBLIDXB1,           true,  "lll"  },
3331   { "__insn_tblidxb2",           TILEGX_INSN_TBLIDXB2,           true,  "lll"  },
3332   { "__insn_tblidxb3",           TILEGX_INSN_TBLIDXB3,           true,  "lll"  },
3333   { "__insn_v1add",              TILEGX_INSN_V1ADD,              true,  "lll"  },
3334   { "__insn_v1addi",             TILEGX_INSN_V1ADDI,             true,  "lll"  },
3335   { "__insn_v1adduc",            TILEGX_INSN_V1ADDUC,            true,  "lll"  },
3336   { "__insn_v1adiffu",           TILEGX_INSN_V1ADIFFU,           true,  "lll"  },
3337   { "__insn_v1avgu",             TILEGX_INSN_V1AVGU,             true,  "lll"  },
3338   { "__insn_v1cmpeq",            TILEGX_INSN_V1CMPEQ,            true,  "lll"  },
3339   { "__insn_v1cmpeqi",           TILEGX_INSN_V1CMPEQI,           true,  "lll"  },
3340   { "__insn_v1cmples",           TILEGX_INSN_V1CMPLES,           true,  "lll"  },
3341   { "__insn_v1cmpleu",           TILEGX_INSN_V1CMPLEU,           true,  "lll"  },
3342   { "__insn_v1cmplts",           TILEGX_INSN_V1CMPLTS,           true,  "lll"  },
3343   { "__insn_v1cmpltsi",          TILEGX_INSN_V1CMPLTSI,          true,  "lll"  },
3344   { "__insn_v1cmpltu",           TILEGX_INSN_V1CMPLTU,           true,  "lll"  },
3345   { "__insn_v1cmpltui",          TILEGX_INSN_V1CMPLTUI,          true,  "lll"  },
3346   { "__insn_v1cmpne",            TILEGX_INSN_V1CMPNE,            true,  "lll"  },
3347   { "__insn_v1ddotpu",           TILEGX_INSN_V1DDOTPU,           true,  "lll"  },
3348   { "__insn_v1ddotpua",          TILEGX_INSN_V1DDOTPUA,          true,  "llll" },
3349   { "__insn_v1ddotpus",          TILEGX_INSN_V1DDOTPUS,          true,  "lll"  },
3350   { "__insn_v1ddotpusa",         TILEGX_INSN_V1DDOTPUSA,         true,  "llll" },
3351   { "__insn_v1dotp",             TILEGX_INSN_V1DOTP,             true,  "lll"  },
3352   { "__insn_v1dotpa",            TILEGX_INSN_V1DOTPA,            true,  "llll" },
3353   { "__insn_v1dotpu",            TILEGX_INSN_V1DOTPU,            true,  "lll"  },
3354   { "__insn_v1dotpua",           TILEGX_INSN_V1DOTPUA,           true,  "llll" },
3355   { "__insn_v1dotpus",           TILEGX_INSN_V1DOTPUS,           true,  "lll"  },
3356   { "__insn_v1dotpusa",          TILEGX_INSN_V1DOTPUSA,          true,  "llll" },
3357   { "__insn_v1int_h",            TILEGX_INSN_V1INT_H,            true,  "lll"  },
3358   { "__insn_v1int_l",            TILEGX_INSN_V1INT_L,            true,  "lll"  },
3359   { "__insn_v1maxu",             TILEGX_INSN_V1MAXU,             true,  "lll"  },
3360   { "__insn_v1maxui",            TILEGX_INSN_V1MAXUI,            true,  "lll"  },
3361   { "__insn_v1minu",             TILEGX_INSN_V1MINU,             true,  "lll"  },
3362   { "__insn_v1minui",            TILEGX_INSN_V1MINUI,            true,  "lll"  },
3363   { "__insn_v1mnz",              TILEGX_INSN_V1MNZ,              true,  "lll"  },
3364   { "__insn_v1multu",            TILEGX_INSN_V1MULTU,            true,  "lll"  },
3365   { "__insn_v1mulu",             TILEGX_INSN_V1MULU,             true,  "lll"  },
3366   { "__insn_v1mulus",            TILEGX_INSN_V1MULUS,            true,  "lll"  },
3367   { "__insn_v1mz",               TILEGX_INSN_V1MZ,               true,  "lll"  },
3368   { "__insn_v1sadau",            TILEGX_INSN_V1SADAU,            true,  "llll" },
3369   { "__insn_v1sadu",             TILEGX_INSN_V1SADU,             true,  "lll"  },
3370   { "__insn_v1shl",              TILEGX_INSN_V1SHL,              true,  "lll"  },
3371   { "__insn_v1shli",             TILEGX_INSN_V1SHLI,             true,  "lll"  },
3372   { "__insn_v1shrs",             TILEGX_INSN_V1SHRS,             true,  "lll"  },
3373   { "__insn_v1shrsi",            TILEGX_INSN_V1SHRSI,            true,  "lll"  },
3374   { "__insn_v1shru",             TILEGX_INSN_V1SHRU,             true,  "lll"  },
3375   { "__insn_v1shrui",            TILEGX_INSN_V1SHRUI,            true,  "lll"  },
3376   { "__insn_v1sub",              TILEGX_INSN_V1SUB,              true,  "lll"  },
3377   { "__insn_v1subuc",            TILEGX_INSN_V1SUBUC,            true,  "lll"  },
3378   { "__insn_v2add",              TILEGX_INSN_V2ADD,              true,  "lll"  },
3379   { "__insn_v2addi",             TILEGX_INSN_V2ADDI,             true,  "lll"  },
3380   { "__insn_v2addsc",            TILEGX_INSN_V2ADDSC,            true,  "lll"  },
3381   { "__insn_v2adiffs",           TILEGX_INSN_V2ADIFFS,           true,  "lll"  },
3382   { "__insn_v2avgs",             TILEGX_INSN_V2AVGS,             true,  "lll"  },
3383   { "__insn_v2cmpeq",            TILEGX_INSN_V2CMPEQ,            true,  "lll"  },
3384   { "__insn_v2cmpeqi",           TILEGX_INSN_V2CMPEQI,           true,  "lll"  },
3385   { "__insn_v2cmples",           TILEGX_INSN_V2CMPLES,           true,  "lll"  },
3386   { "__insn_v2cmpleu",           TILEGX_INSN_V2CMPLEU,           true,  "lll"  },
3387   { "__insn_v2cmplts",           TILEGX_INSN_V2CMPLTS,           true,  "lll"  },
3388   { "__insn_v2cmpltsi",          TILEGX_INSN_V2CMPLTSI,          true,  "lll"  },
3389   { "__insn_v2cmpltu",           TILEGX_INSN_V2CMPLTU,           true,  "lll"  },
3390   { "__insn_v2cmpltui",          TILEGX_INSN_V2CMPLTUI,          true,  "lll"  },
3391   { "__insn_v2cmpne",            TILEGX_INSN_V2CMPNE,            true,  "lll"  },
3392   { "__insn_v2dotp",             TILEGX_INSN_V2DOTP,             true,  "lll"  },
3393   { "__insn_v2dotpa",            TILEGX_INSN_V2DOTPA,            true,  "llll" },
3394   { "__insn_v2int_h",            TILEGX_INSN_V2INT_H,            true,  "lll"  },
3395   { "__insn_v2int_l",            TILEGX_INSN_V2INT_L,            true,  "lll"  },
3396   { "__insn_v2maxs",             TILEGX_INSN_V2MAXS,             true,  "lll"  },
3397   { "__insn_v2maxsi",            TILEGX_INSN_V2MAXSI,            true,  "lll"  },
3398   { "__insn_v2mins",             TILEGX_INSN_V2MINS,             true,  "lll"  },
3399   { "__insn_v2minsi",            TILEGX_INSN_V2MINSI,            true,  "lll"  },
3400   { "__insn_v2mnz",              TILEGX_INSN_V2MNZ,              true,  "lll"  },
3401   { "__insn_v2mulfsc",           TILEGX_INSN_V2MULFSC,           true,  "lll"  },
3402   { "__insn_v2muls",             TILEGX_INSN_V2MULS,             true,  "lll"  },
3403   { "__insn_v2mults",            TILEGX_INSN_V2MULTS,            true,  "lll"  },
3404   { "__insn_v2mz",               TILEGX_INSN_V2MZ,               true,  "lll"  },
3405   { "__insn_v2packh",            TILEGX_INSN_V2PACKH,            true,  "lll"  },
3406   { "__insn_v2packl",            TILEGX_INSN_V2PACKL,            true,  "lll"  },
3407   { "__insn_v2packuc",           TILEGX_INSN_V2PACKUC,           true,  "lll"  },
3408   { "__insn_v2sadas",            TILEGX_INSN_V2SADAS,            true,  "llll" },
3409   { "__insn_v2sadau",            TILEGX_INSN_V2SADAU,            true,  "llll" },
3410   { "__insn_v2sads",             TILEGX_INSN_V2SADS,             true,  "lll"  },
3411   { "__insn_v2sadu",             TILEGX_INSN_V2SADU,             true,  "lll"  },
3412   { "__insn_v2shl",              TILEGX_INSN_V2SHL,              true,  "lll"  },
3413   { "__insn_v2shli",             TILEGX_INSN_V2SHLI,             true,  "lll"  },
3414   { "__insn_v2shlsc",            TILEGX_INSN_V2SHLSC,            true,  "lll"  },
3415   { "__insn_v2shrs",             TILEGX_INSN_V2SHRS,             true,  "lll"  },
3416   { "__insn_v2shrsi",            TILEGX_INSN_V2SHRSI,            true,  "lll"  },
3417   { "__insn_v2shru",             TILEGX_INSN_V2SHRU,             true,  "lll"  },
3418   { "__insn_v2shrui",            TILEGX_INSN_V2SHRUI,            true,  "lll"  },
3419   { "__insn_v2sub",              TILEGX_INSN_V2SUB,              true,  "lll"  },
3420   { "__insn_v2subsc",            TILEGX_INSN_V2SUBSC,            true,  "lll"  },
3421   { "__insn_v4add",              TILEGX_INSN_V4ADD,              true,  "lll"  },
3422   { "__insn_v4addsc",            TILEGX_INSN_V4ADDSC,            true,  "lll"  },
3423   { "__insn_v4int_h",            TILEGX_INSN_V4INT_H,            true,  "lll"  },
3424   { "__insn_v4int_l",            TILEGX_INSN_V4INT_L,            true,  "lll"  },
3425   { "__insn_v4packsc",           TILEGX_INSN_V4PACKSC,           true,  "lll"  },
3426   { "__insn_v4shl",              TILEGX_INSN_V4SHL,              true,  "lll"  },
3427   { "__insn_v4shlsc",            TILEGX_INSN_V4SHLSC,            true,  "lll"  },
3428   { "__insn_v4shrs",             TILEGX_INSN_V4SHRS,             true,  "lll"  },
3429   { "__insn_v4shru",             TILEGX_INSN_V4SHRU,             true,  "lll"  },
3430   { "__insn_v4sub",              TILEGX_INSN_V4SUB,              true,  "lll"  },
3431   { "__insn_v4subsc",            TILEGX_INSN_V4SUBSC,            true,  "lll"  },
3432   { "__insn_wh64",               TILEGX_INSN_WH64,               false, "vp"   },
3433   { "__insn_xor",                TILEGX_INSN_XOR,                true,  "lll"  },
3434   { "__insn_xori",               TILEGX_INSN_XOR,                true,  "lll"  },
3435   { "__tile_network_barrier",    TILEGX_NETWORK_BARRIER,         false, "v"    },
3436   { "__tile_idn0_receive",       TILEGX_IDN0_RECEIVE,            false, "l"    },
3437   { "__tile_idn1_receive",       TILEGX_IDN1_RECEIVE,            false, "l"    },
3438   { "__tile_idn_send",           TILEGX_IDN_SEND,                false, "vl"   },
3439   { "__tile_udn0_receive",       TILEGX_UDN0_RECEIVE,            false, "l"    },
3440   { "__tile_udn1_receive",       TILEGX_UDN1_RECEIVE,            false, "l"    },
3441   { "__tile_udn2_receive",       TILEGX_UDN2_RECEIVE,            false, "l"    },
3442   { "__tile_udn3_receive",       TILEGX_UDN3_RECEIVE,            false, "l"    },
3443   { "__tile_udn_send",           TILEGX_UDN_SEND,                false, "vl"   },
3444 };
3445 
3446 
3447 /* Convert a character in a builtin type string to a tree type.  */
3448 static tree
char_to_type(char c)3449 char_to_type (char c)
3450 {
3451   static tree volatile_ptr_type_node = NULL;
3452   static tree volatile_const_ptr_type_node = NULL;
3453 
3454   if (volatile_ptr_type_node == NULL)
3455     {
3456       volatile_ptr_type_node =
3457 	build_pointer_type (build_qualified_type (void_type_node,
3458 						  TYPE_QUAL_VOLATILE));
3459       volatile_const_ptr_type_node =
3460 	build_pointer_type (build_qualified_type (void_type_node,
3461 						  TYPE_QUAL_CONST
3462 						  | TYPE_QUAL_VOLATILE));
3463     }
3464 
3465   switch (c)
3466     {
3467     case 'v':
3468       return void_type_node;
3469     case 'i':
3470       return unsigned_type_node;
3471     case 'l':
3472       return long_long_unsigned_type_node;
3473     case 'p':
3474       return volatile_ptr_type_node;
3475     case 'k':
3476       return volatile_const_ptr_type_node;
3477     default:
3478       gcc_unreachable ();
3479     }
3480 }
3481 
3482 
3483 /* Implement TARGET_INIT_BUILTINS.  */
3484 static void
tilegx_init_builtins(void)3485 tilegx_init_builtins (void)
3486 {
3487   size_t i;
3488 
3489   for (i = 0; i < ARRAY_SIZE (tilegx_builtins); i++)
3490     {
3491       const struct tilegx_builtin_def *p = &tilegx_builtins[i];
3492       tree ftype, ret_type, arg_type_list = void_list_node;
3493       tree decl;
3494       int j;
3495 
3496       for (j = strlen (p->type) - 1; j > 0; j--)
3497 	{
3498 	  arg_type_list =
3499 	    tree_cons (NULL_TREE, char_to_type (p->type[j]), arg_type_list);
3500 	}
3501 
3502       ret_type = char_to_type (p->type[0]);
3503 
3504       ftype = build_function_type (ret_type, arg_type_list);
3505 
3506       decl = add_builtin_function (p->name, ftype, p->code, BUILT_IN_MD,
3507 				   NULL, NULL);
3508 
3509       if (p->is_const)
3510 	TREE_READONLY (decl) = 1;
3511       TREE_NOTHROW (decl) = 1;
3512 
3513       if (tilegx_builtin_info[p->code].fndecl == NULL)
3514 	tilegx_builtin_info[p->code].fndecl = decl;
3515     }
3516 }
3517 
3518 
3519 /* Implement TARGET_EXPAND_BUILTIN.  */
3520 static rtx
tilegx_expand_builtin(tree exp,rtx target,rtx subtarget ATTRIBUTE_UNUSED,machine_mode mode ATTRIBUTE_UNUSED,int ignore ATTRIBUTE_UNUSED)3521 tilegx_expand_builtin (tree exp,
3522 		       rtx target,
3523 		       rtx subtarget ATTRIBUTE_UNUSED,
3524 		       machine_mode mode ATTRIBUTE_UNUSED,
3525 		       int ignore ATTRIBUTE_UNUSED)
3526 {
3527 #define MAX_BUILTIN_ARGS 4
3528 
3529   tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
3530   unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
3531   tree arg;
3532   call_expr_arg_iterator iter;
3533   enum insn_code icode;
3534   rtx op[MAX_BUILTIN_ARGS + 1], pat;
3535   int opnum;
3536   bool nonvoid;
3537   insn_gen_fn fn;
3538 
3539   if (fcode >= TILEGX_BUILTIN_max)
3540     internal_error ("bad builtin fcode");
3541   icode = tilegx_builtin_info[fcode].icode;
3542   if (icode == 0)
3543     internal_error ("bad builtin icode");
3544 
3545   nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
3546 
3547   opnum = nonvoid;
3548   FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
3549     {
3550       const struct insn_operand_data *insn_op;
3551 
3552       if (arg == error_mark_node)
3553 	return NULL_RTX;
3554       if (opnum > MAX_BUILTIN_ARGS)
3555 	return NULL_RTX;
3556 
3557       insn_op = &insn_data[icode].operand[opnum];
3558 
3559       op[opnum] = expand_expr (arg, NULL_RTX, insn_op->mode, EXPAND_NORMAL);
3560 
3561       if (!(*insn_op->predicate) (op[opnum], insn_op->mode))
3562 	{
3563 	  machine_mode opmode = insn_op->mode;
3564 
3565 	  /* pointer_operand and pmode_register_operand operands do
3566 	     not specify a mode, so use the operand's mode instead
3567 	     (which should always be right by the time we get here,
3568 	     except for constants, which are VOIDmode).  */
3569 	  if (opmode == VOIDmode)
3570 	    {
3571 	      machine_mode m = GET_MODE (op[opnum]);
3572 	      gcc_assert (m == Pmode || m == VOIDmode);
3573 	      opmode = Pmode;
3574 	    }
3575 
3576 	  op[opnum] = copy_to_mode_reg (opmode, op[opnum]);
3577 	}
3578 
3579       if (!(*insn_op->predicate) (op[opnum], insn_op->mode))
3580 	{
3581 	  /* We still failed to meet the predicate even after moving
3582 	     into a register. Assume we needed an immediate.  */
3583 	  error_at (EXPR_LOCATION (exp),
3584 		    "operand must be an immediate of the right size");
3585 	  return const0_rtx;
3586 	}
3587 
3588       opnum++;
3589     }
3590 
3591   if (nonvoid)
3592     {
3593       machine_mode tmode = insn_data[icode].operand[0].mode;
3594       if (!target
3595 	  || GET_MODE (target) != tmode
3596 	  || !(*insn_data[icode].operand[0].predicate) (target, tmode))
3597 	{
3598 	  if (tmode == VOIDmode)
3599 	    {
3600 	      /* get the mode from the return type.  */
3601 	      tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (fndecl)));
3602 	    }
3603 	  target = gen_reg_rtx (tmode);
3604 	}
3605       op[0] = target;
3606     }
3607 
3608   fn = GEN_FCN (icode);
3609   switch (opnum)
3610     {
3611     case 0:
3612       pat = fn (NULL_RTX);
3613       break;
3614     case 1:
3615       pat = fn (op[0]);
3616       break;
3617     case 2:
3618       pat = fn (op[0], op[1]);
3619       break;
3620     case 3:
3621       pat = fn (op[0], op[1], op[2]);
3622       break;
3623     case 4:
3624       pat = fn (op[0], op[1], op[2], op[3]);
3625       break;
3626     case 5:
3627       pat = fn (op[0], op[1], op[2], op[3], op[4]);
3628       break;
3629     default:
3630       gcc_unreachable ();
3631     }
3632   if (!pat)
3633     return NULL_RTX;
3634 
3635   /* If we are generating a prefetch, tell the scheduler not to move
3636      it around.  */
3637   if (GET_CODE (pat) == PREFETCH)
3638     PREFETCH_SCHEDULE_BARRIER_P (pat) = true;
3639 
3640   emit_insn (pat);
3641 
3642   if (nonvoid)
3643     return target;
3644   else
3645     return const0_rtx;
3646 }
3647 
3648 
3649 /* Implement TARGET_BUILTIN_DECL.  */
3650 static tree
tilegx_builtin_decl(unsigned code,bool initialize_p ATTRIBUTE_UNUSED)3651 tilegx_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
3652 {
3653   if (code >= TILEGX_BUILTIN_max)
3654     return error_mark_node;
3655 
3656   return tilegx_builtin_info[code].fndecl;
3657 }
3658 
3659 
3660 
3661 /* Stack frames  */
3662 
3663 /* Return whether REGNO needs to be saved in the stack frame.  */
3664 static bool
need_to_save_reg(unsigned int regno)3665 need_to_save_reg (unsigned int regno)
3666 {
3667   if (!fixed_regs[regno] && !call_used_regs[regno]
3668       && df_regs_ever_live_p (regno))
3669     return true;
3670 
3671   if (flag_pic
3672       && (regno == PIC_OFFSET_TABLE_REGNUM
3673 	  || regno == TILEGX_PIC_TEXT_LABEL_REGNUM)
3674       && (crtl->uses_pic_offset_table || crtl->saves_all_registers))
3675     return true;
3676 
3677   if (crtl->calls_eh_return)
3678     {
3679       unsigned i;
3680       for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; i++)
3681 	{
3682 	  if (regno == EH_RETURN_DATA_REGNO (i))
3683 	    return true;
3684 	}
3685     }
3686 
3687   return false;
3688 }
3689 
3690 
3691 /* Return the size of the register savev area.  This function is only
3692    correct starting with local register allocation */
3693 static int
tilegx_saved_regs_size(void)3694 tilegx_saved_regs_size (void)
3695 {
3696   int reg_save_size = 0;
3697   int regno;
3698   int offset_to_frame;
3699   int align_mask;
3700 
3701   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
3702     if (need_to_save_reg (regno))
3703       reg_save_size += UNITS_PER_WORD;
3704 
3705   /* Pad out the register save area if necessary to make
3706      frame_pointer_rtx be as aligned as the stack pointer.  */
3707   offset_to_frame = crtl->args.pretend_args_size + reg_save_size;
3708   align_mask = (STACK_BOUNDARY / BITS_PER_UNIT) - 1;
3709   reg_save_size += (-offset_to_frame) & align_mask;
3710 
3711   return reg_save_size;
3712 }
3713 
3714 
3715 /* Round up frame size SIZE.  */
3716 static int
round_frame_size(int size)3717 round_frame_size (int size)
3718 {
3719   return ((size + STACK_BOUNDARY / BITS_PER_UNIT - 1)
3720 	  & -STACK_BOUNDARY / BITS_PER_UNIT);
3721 }
3722 
3723 
3724 /* Emit a store in the stack frame to save REGNO at address ADDR, and
3725    emit the corresponding REG_CFA_OFFSET note described by CFA and
3726    CFA_OFFSET.  Return the emitted insn.  */
3727 static rtx
frame_emit_store(int regno,int regno_note,rtx addr,rtx cfa,int cfa_offset)3728 frame_emit_store (int regno, int regno_note, rtx addr, rtx cfa,
3729 		  int cfa_offset)
3730 {
3731   rtx reg = gen_rtx_REG (DImode, regno);
3732   rtx mem = gen_frame_mem (DImode, addr);
3733   rtx mov = gen_movdi (mem, reg);
3734 
3735   /* Describe what just happened in a way that dwarf understands.  We
3736      use temporary registers to hold the address to make scheduling
3737      easier, and use the REG_CFA_OFFSET to describe the address as an
3738      offset from the CFA.  */
3739   rtx reg_note = gen_rtx_REG (DImode, regno_note);
3740   rtx cfa_relative_addr = gen_rtx_PLUS (Pmode, cfa, GEN_INT (cfa_offset));
3741   rtx cfa_relative_mem = gen_frame_mem (DImode, cfa_relative_addr);
3742   rtx real = gen_rtx_SET (cfa_relative_mem, reg_note);
3743   add_reg_note (mov, REG_CFA_OFFSET, real);
3744 
3745   return emit_insn (mov);
3746 }
3747 
3748 
3749 /* Emit a load in the stack frame to load REGNO from address ADDR.
3750    Add a REG_CFA_RESTORE note to CFA_RESTORES if CFA_RESTORES is
3751    non-null.  Return the emitted insn.  */
3752 static rtx_insn *
frame_emit_load(int regno,rtx addr,rtx * cfa_restores)3753 frame_emit_load (int regno, rtx addr, rtx *cfa_restores)
3754 {
3755   rtx reg = gen_rtx_REG (DImode, regno);
3756   rtx mem = gen_frame_mem (DImode, addr);
3757   if (cfa_restores)
3758     *cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, *cfa_restores);
3759   return emit_insn (gen_movdi (reg, mem));
3760 }
3761 
3762 
3763 /* Helper function to set RTX_FRAME_RELATED_P on instructions,
3764    including sequences.  */
3765 static rtx
set_frame_related_p(void)3766 set_frame_related_p (void)
3767 {
3768   rtx_insn *seq = get_insns ();
3769   rtx_insn *insn;
3770 
3771   end_sequence ();
3772 
3773   if (!seq)
3774     return NULL_RTX;
3775 
3776   if (INSN_P (seq))
3777     {
3778       insn = seq;
3779       while (insn != NULL_RTX)
3780 	{
3781 	  RTX_FRAME_RELATED_P (insn) = 1;
3782 	  insn = NEXT_INSN (insn);
3783 	}
3784       seq = emit_insn (seq);
3785     }
3786   else
3787     {
3788       seq = emit_insn (seq);
3789       RTX_FRAME_RELATED_P (seq) = 1;
3790     }
3791   return seq;
3792 }
3793 
3794 
3795 #define FRP(exp)  (start_sequence (), exp, set_frame_related_p ())
3796 
3797 /* This emits code for 'sp += offset'.
3798 
3799    The ABI only allows us to modify 'sp' in a single 'addi' or
3800    'addli', so the backtracer understands it. Larger amounts cannot
3801    use those instructions, so are added by placing the offset into a
3802    large register and using 'add'.
3803 
3804    This happens after reload, so we need to expand it ourselves.  */
3805 static rtx_insn *
emit_sp_adjust(int offset,int * next_scratch_regno,bool frame_related,rtx reg_notes)3806 emit_sp_adjust (int offset, int *next_scratch_regno, bool frame_related,
3807 		rtx reg_notes)
3808 {
3809   rtx to_add;
3810   rtx imm_rtx = GEN_INT (offset);
3811   rtx pat;
3812   rtx_insn *insn;
3813 
3814   if (satisfies_constraint_J (imm_rtx))
3815     {
3816       /* We can add this using a single immediate add.  */
3817       to_add = imm_rtx;
3818     }
3819   else
3820     {
3821       rtx tmp = gen_rtx_REG (Pmode, (*next_scratch_regno)--);
3822       tilegx_expand_set_const64 (tmp, imm_rtx);
3823       to_add = tmp;
3824     }
3825 
3826   /* Actually adjust the stack pointer.  */
3827   if (TARGET_32BIT)
3828     pat = gen_sp_adjust_32bit (stack_pointer_rtx, stack_pointer_rtx, to_add);
3829   else
3830     pat = gen_sp_adjust (stack_pointer_rtx, stack_pointer_rtx, to_add);
3831 
3832   insn = emit_insn (pat);
3833   REG_NOTES (insn) = reg_notes;
3834 
3835   /* Describe what just happened in a way that dwarf understands.  */
3836   if (frame_related)
3837     {
3838       rtx real = gen_rtx_SET (stack_pointer_rtx,
3839 			      gen_rtx_PLUS (Pmode, stack_pointer_rtx,
3840 					    imm_rtx));
3841       RTX_FRAME_RELATED_P (insn) = 1;
3842       add_reg_note (insn, REG_CFA_ADJUST_CFA, real);
3843     }
3844 
3845   return insn;
3846 }
3847 
3848 
3849 /* Return whether the current function is leaf.  This takes into
3850    account whether the function calls tls_get_addr.  */
3851 static bool
tilegx_current_function_is_leaf(void)3852 tilegx_current_function_is_leaf (void)
3853 {
3854   return crtl->is_leaf && !cfun->machine->calls_tls_get_addr;
3855 }
3856 
3857 
3858 /* Return the frame size.  */
3859 static int
compute_total_frame_size(void)3860 compute_total_frame_size (void)
3861 {
3862   int total_size = (get_frame_size () + tilegx_saved_regs_size ()
3863 		    + crtl->outgoing_args_size
3864 		    + crtl->args.pretend_args_size);
3865 
3866   if (!tilegx_current_function_is_leaf () || cfun->calls_alloca)
3867     {
3868       /* Make room for save area in callee.  */
3869       total_size += STACK_POINTER_OFFSET;
3870     }
3871 
3872   return round_frame_size (total_size);
3873 }
3874 
3875 
3876 /* Return nonzero if this function is known to have a null epilogue.
3877    This allows the optimizer to omit jumps to jumps if no stack was
3878    created.  */
3879 bool
tilegx_can_use_return_insn_p(void)3880 tilegx_can_use_return_insn_p (void)
3881 {
3882   return (reload_completed
3883 	  && cfun->static_chain_decl == 0
3884 	  && compute_total_frame_size () == 0
3885 	  && tilegx_current_function_is_leaf ()
3886 	  && !crtl->profile && !df_regs_ever_live_p (TILEGX_LINK_REGNUM));
3887 }
3888 
3889 
3890 /* Returns an rtx for a stack slot at 'FP + offset_from_fp'.  If there
3891    is a frame pointer, it computes the value relative to
3892    that. Otherwise it uses the stack pointer.  */
3893 static rtx
compute_frame_addr(int offset_from_fp,int * next_scratch_regno)3894 compute_frame_addr (int offset_from_fp, int *next_scratch_regno)
3895 {
3896   rtx base_reg_rtx, tmp_reg_rtx, offset_rtx;
3897   int offset_from_base;
3898 
3899   if (frame_pointer_needed)
3900     {
3901       base_reg_rtx = hard_frame_pointer_rtx;
3902       offset_from_base = offset_from_fp;
3903     }
3904   else
3905     {
3906       int offset_from_sp = compute_total_frame_size () + offset_from_fp;
3907       offset_from_base = offset_from_sp;
3908       base_reg_rtx = stack_pointer_rtx;
3909     }
3910 
3911   if (offset_from_base == 0)
3912     return base_reg_rtx;
3913 
3914   /* Compute the new value of the stack pointer.  */
3915   tmp_reg_rtx = gen_rtx_REG (Pmode, (*next_scratch_regno)--);
3916   offset_rtx = GEN_INT (offset_from_base);
3917 
3918   if (!add_operand (offset_rtx, Pmode))
3919     {
3920       expand_set_cint64 (tmp_reg_rtx, offset_rtx);
3921       offset_rtx = tmp_reg_rtx;
3922     }
3923 
3924   emit_insn (gen_rtx_SET (tmp_reg_rtx,
3925 			  gen_rtx_PLUS (Pmode, base_reg_rtx, offset_rtx)));
3926 
3927   return tmp_reg_rtx;
3928 }
3929 
3930 
3931 /* The stack frame looks like this:
3932          +-------------+
3933          |    ...      |
3934          |  incoming   |
3935          | stack args  |
3936    AP -> +-------------+
3937          | caller's HFP|
3938          +-------------+
3939          | lr save     |
3940   HFP -> +-------------+
3941          |  var args   |
3942          |  reg save   | crtl->args.pretend_args_size bytes
3943          +-------------+
3944          |    ...      |
3945          | saved regs  | tilegx_saved_regs_size() bytes
3946    FP -> +-------------+
3947          |    ...      |
3948          |   vars      | get_frame_size() bytes
3949          +-------------+
3950          |    ...      |
3951          |  outgoing   |
3952          |  stack args | crtl->outgoing_args_size bytes
3953          +-------------+
3954          | HFP         | ptr_size bytes (only here if nonleaf / alloca)
3955          +-------------+
3956          | callee lr   | ptr_size bytes (only here if nonleaf / alloca)
3957          | save        |
3958    SP -> +-------------+
3959 
3960   HFP == incoming SP.
3961 
3962   For functions with a frame larger than 32767 bytes, or which use
3963   alloca (), r52 is used as a frame pointer.  Otherwise there is no
3964   frame pointer.
3965 
3966   FP is saved at SP+ptr_size before calling a subroutine so the callee
3967   can chain.  */
3968 void
tilegx_expand_prologue(void)3969 tilegx_expand_prologue (void)
3970 {
3971 #define ROUND_ROBIN_SIZE 4
3972   /* We round-robin through four scratch registers to hold temporary
3973      addresses for saving registers, to make instruction scheduling
3974      easier.  */
3975   rtx reg_save_addr[ROUND_ROBIN_SIZE] = {
3976     NULL_RTX, NULL_RTX, NULL_RTX, NULL_RTX
3977   };
3978   rtx insn, cfa;
3979   unsigned int which_scratch;
3980   int offset, start_offset, regno;
3981 
3982   /* A register that holds a copy of the incoming fp.  */
3983   int fp_copy_regno = -1;
3984 
3985   /* A register that holds a copy of the incoming sp.  */
3986   int sp_copy_regno = -1;
3987 
3988   /* Next scratch register number to hand out (postdecrementing).  */
3989   int next_scratch_regno = 29;
3990 
3991   int total_size = compute_total_frame_size ();
3992 
3993   if (flag_stack_usage_info)
3994     current_function_static_stack_size = total_size;
3995 
3996   /* Save lr first in its special location because code after this
3997      might use the link register as a scratch register.  */
3998   if (df_regs_ever_live_p (TILEGX_LINK_REGNUM) || crtl->calls_eh_return)
3999     {
4000       FRP (frame_emit_store (TILEGX_LINK_REGNUM, TILEGX_LINK_REGNUM,
4001 			     stack_pointer_rtx, stack_pointer_rtx, 0));
4002       emit_insn (gen_blockage ());
4003     }
4004 
4005   if (total_size == 0)
4006     {
4007       /* Load the PIC register if needed.  */
4008       if (flag_pic && crtl->uses_pic_offset_table)
4009 	load_pic_register (false);
4010 
4011       return;
4012     }
4013 
4014   cfa = stack_pointer_rtx;
4015 
4016   if (frame_pointer_needed)
4017     {
4018       fp_copy_regno = next_scratch_regno--;
4019 
4020       /* Copy the old frame pointer aside so we can save it later.  */
4021       insn =
4022 	FRP (emit_move_insn (gen_rtx_REG (word_mode, fp_copy_regno),
4023 			     gen_lowpart (word_mode, hard_frame_pointer_rtx)));
4024       add_reg_note (insn, REG_CFA_REGISTER, NULL_RTX);
4025 
4026       /* Set up the frame pointer.  */
4027       insn = FRP (emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx));
4028       add_reg_note (insn, REG_CFA_DEF_CFA, hard_frame_pointer_rtx);
4029       cfa = hard_frame_pointer_rtx;
4030       REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = STACK_BOUNDARY;
4031 
4032       /* fp holds a copy of the incoming sp, in case we need to store
4033 	 it.  */
4034       sp_copy_regno = HARD_FRAME_POINTER_REGNUM;
4035     }
4036   else if (!tilegx_current_function_is_leaf ())
4037     {
4038       /* Copy the old stack pointer aside so we can save it later.  */
4039       sp_copy_regno = next_scratch_regno--;
4040       emit_move_insn (gen_rtx_REG (Pmode, sp_copy_regno),
4041 		      stack_pointer_rtx);
4042     }
4043 
4044   if (tilegx_current_function_is_leaf ())
4045     {
4046       /* No need to store chain pointer to caller's frame.  */
4047       emit_sp_adjust (-total_size, &next_scratch_regno,
4048 		      !frame_pointer_needed, NULL_RTX);
4049     }
4050   else
4051     {
4052       /* Save the frame pointer (incoming sp value) to support
4053          backtracing.  First we need to create an rtx with the store
4054          address.  */
4055       rtx chain_addr = gen_rtx_REG (Pmode, next_scratch_regno--);
4056       rtx size_rtx = GEN_INT (-(total_size - UNITS_PER_WORD));
4057 
4058       if (add_operand (size_rtx, Pmode))
4059 	{
4060 	  /* Expose more parallelism by computing this value from the
4061 	     original stack pointer, not the one after we have pushed
4062 	     the frame.  */
4063 	  rtx p = gen_rtx_PLUS (Pmode, stack_pointer_rtx, size_rtx);
4064 	  emit_insn (gen_rtx_SET (chain_addr, p));
4065 	  emit_sp_adjust (-total_size, &next_scratch_regno,
4066 			  !frame_pointer_needed, NULL_RTX);
4067 	}
4068       else
4069 	{
4070 	  /* The stack frame is large, so just store the incoming sp
4071 	     value at *(new_sp + UNITS_PER_WORD).  */
4072 	  rtx p;
4073 	  emit_sp_adjust (-total_size, &next_scratch_regno,
4074 			  !frame_pointer_needed, NULL_RTX);
4075 	  p = gen_rtx_PLUS (Pmode, stack_pointer_rtx,
4076 			    GEN_INT (UNITS_PER_WORD));
4077 	  emit_insn (gen_rtx_SET (chain_addr, p));
4078 	}
4079 
4080       /* Save our frame pointer for backtrace chaining.  */
4081       emit_insn (gen_movdi (gen_frame_mem (DImode, chain_addr),
4082 			    gen_rtx_REG (DImode, sp_copy_regno)));
4083     }
4084 
4085   /* Compute where to start storing registers we need to save.  */
4086   start_offset = -crtl->args.pretend_args_size - UNITS_PER_WORD;
4087   offset = start_offset;
4088 
4089   /* Store all registers that need saving.  */
4090   which_scratch = 0;
4091   for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4092     if (need_to_save_reg (regno))
4093       {
4094 	rtx r = reg_save_addr[which_scratch];
4095 	int from_regno;
4096 	int cfa_offset = frame_pointer_needed ? offset : total_size + offset;
4097 
4098 	if (r == NULL_RTX)
4099 	  {
4100 	    int prev_scratch_regno = next_scratch_regno;
4101 	    r = compute_frame_addr (offset, &next_scratch_regno);
4102 	    if (prev_scratch_regno != next_scratch_regno)
4103 	      reg_save_addr[which_scratch] = r;
4104 	  }
4105 	else
4106 	  {
4107 	    /* Advance to the next stack slot to store this
4108 	       register.  */
4109 	    int stride = ROUND_ROBIN_SIZE * -UNITS_PER_WORD;
4110 	    rtx p = gen_rtx_PLUS (Pmode, r, GEN_INT (stride));
4111 	    emit_insn (gen_rtx_SET (r, p));
4112 	  }
4113 
4114 	/* Save this register to the stack (but use the old fp value
4115 	   we copied aside if appropriate).  */
4116 	from_regno =
4117 	  (fp_copy_regno >= 0 && regno == HARD_FRAME_POINTER_REGNUM)
4118 	  ? fp_copy_regno : regno;
4119 	FRP (frame_emit_store (from_regno, regno, r, cfa, cfa_offset));
4120 
4121 	offset -= UNITS_PER_WORD;
4122 	which_scratch = (which_scratch + 1) % ROUND_ROBIN_SIZE;
4123       }
4124 
4125   /* If profiling, force that to happen after the frame is set up.  */
4126   if (crtl->profile)
4127     emit_insn (gen_blockage ());
4128 
4129   /* Load the PIC register if needed.  */
4130   if (flag_pic && crtl->uses_pic_offset_table)
4131     load_pic_register (false);
4132 }
4133 
4134 
4135 /* Implement the epilogue and sibcall_epilogue patterns.  SIBCALL_P is
4136    true for a sibcall_epilogue pattern, and false for an epilogue
4137    pattern.  */
4138 void
tilegx_expand_epilogue(bool sibcall_p)4139 tilegx_expand_epilogue (bool sibcall_p)
4140 {
4141   /* We round-robin through four scratch registers to hold temporary
4142      addresses for saving registers, to make instruction scheduling
4143      easier.  */
4144   rtx reg_save_addr[ROUND_ROBIN_SIZE] = {
4145     NULL_RTX, NULL_RTX, NULL_RTX, NULL_RTX
4146   };
4147   rtx_insn *last_insn, *insn;
4148   unsigned int which_scratch;
4149   int offset, start_offset, regno;
4150   rtx cfa_restores = NULL_RTX;
4151 
4152   /* A register that holds a copy of the incoming fp.  */
4153   int fp_copy_regno = -1;
4154 
4155   /* Next scratch register number to hand out (postdecrementing).  */
4156   int next_scratch_regno = 29;
4157 
4158   int total_size = compute_total_frame_size ();
4159 
4160   last_insn = get_last_insn ();
4161 
4162   /* Load lr first since we are going to need it first.  */
4163   insn = NULL;
4164   if (df_regs_ever_live_p (TILEGX_LINK_REGNUM))
4165     {
4166       insn = frame_emit_load (TILEGX_LINK_REGNUM,
4167 			      compute_frame_addr (0, &next_scratch_regno),
4168 			      &cfa_restores);
4169     }
4170 
4171   if (total_size == 0)
4172     {
4173       if (insn)
4174 	{
4175 	  RTX_FRAME_RELATED_P (insn) = 1;
4176 	  REG_NOTES (insn) = cfa_restores;
4177 	}
4178       goto done;
4179     }
4180 
4181   /* Compute where to start restoring registers.  */
4182   start_offset = -crtl->args.pretend_args_size - UNITS_PER_WORD;
4183   offset = start_offset;
4184 
4185   if (frame_pointer_needed)
4186     fp_copy_regno = next_scratch_regno--;
4187 
4188   /* Restore all callee-saved registers.  */
4189   which_scratch = 0;
4190   for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4191     if (need_to_save_reg (regno))
4192       {
4193 	rtx r = reg_save_addr[which_scratch];
4194 	if (r == NULL_RTX)
4195 	  {
4196 	    r = compute_frame_addr (offset, &next_scratch_regno);
4197 	    reg_save_addr[which_scratch] = r;
4198 	  }
4199 	else
4200 	  {
4201 	    /* Advance to the next stack slot to store this register.  */
4202 	    int stride = ROUND_ROBIN_SIZE * -UNITS_PER_WORD;
4203 	    rtx p = gen_rtx_PLUS (Pmode, r, GEN_INT (stride));
4204 	    emit_insn (gen_rtx_SET (r, p));
4205 	  }
4206 
4207 	if (fp_copy_regno >= 0 && regno == HARD_FRAME_POINTER_REGNUM)
4208 	  frame_emit_load (fp_copy_regno, r, NULL);
4209 	else
4210 	  frame_emit_load (regno, r, &cfa_restores);
4211 
4212 	offset -= UNITS_PER_WORD;
4213 	which_scratch = (which_scratch + 1) % ROUND_ROBIN_SIZE;
4214       }
4215 
4216   if (!tilegx_current_function_is_leaf ())
4217     cfa_restores =
4218       alloc_reg_note (REG_CFA_RESTORE, stack_pointer_rtx, cfa_restores);
4219 
4220   emit_insn (gen_blockage ());
4221 
4222   if (frame_pointer_needed)
4223     {
4224       /* Restore the old stack pointer by copying from the frame
4225 	 pointer.  */
4226       if (TARGET_32BIT)
4227 	{
4228 	  insn = emit_insn (gen_sp_restore_32bit (stack_pointer_rtx,
4229 						  hard_frame_pointer_rtx));
4230 	}
4231       else
4232 	{
4233 	  insn = emit_insn (gen_sp_restore (stack_pointer_rtx,
4234 					    hard_frame_pointer_rtx));
4235 	}
4236       RTX_FRAME_RELATED_P (insn) = 1;
4237       REG_NOTES (insn) = cfa_restores;
4238       add_reg_note (insn, REG_CFA_DEF_CFA, stack_pointer_rtx);
4239     }
4240   else
4241     {
4242       insn = emit_sp_adjust (total_size, &next_scratch_regno, true,
4243 			     cfa_restores);
4244     }
4245 
4246   if (crtl->calls_eh_return)
4247     {
4248       if (TARGET_32BIT)
4249 	emit_insn (gen_sp_adjust_32bit (stack_pointer_rtx, stack_pointer_rtx,
4250 					EH_RETURN_STACKADJ_RTX));
4251       else
4252 	emit_insn (gen_sp_adjust (stack_pointer_rtx, stack_pointer_rtx,
4253 				  EH_RETURN_STACKADJ_RTX));
4254     }
4255 
4256   /* Restore the old frame pointer.  */
4257   if (frame_pointer_needed)
4258     {
4259       insn = emit_move_insn (gen_lowpart (DImode, hard_frame_pointer_rtx),
4260 			     gen_rtx_REG (DImode, fp_copy_regno));
4261       add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
4262     }
4263 
4264   /* Mark the pic registers as live outside of the function.  */
4265   if (flag_pic)
4266     {
4267       emit_use (cfun->machine->text_label_rtx);
4268       emit_use (cfun->machine->got_rtx);
4269     }
4270 
4271 done:
4272   if (!sibcall_p)
4273     {
4274       emit_jump_insn (gen__return ());
4275     }
4276   else
4277     {
4278       emit_use (gen_rtx_REG (Pmode, TILEGX_LINK_REGNUM));
4279     }
4280 
4281   /* Mark all insns we just emitted as frame-related.  */
4282   for (; last_insn != NULL_RTX; last_insn = next_insn (last_insn))
4283     RTX_FRAME_RELATED_P (last_insn) = 1;
4284 }
4285 
4286 #undef ROUND_ROBIN_SIZE
4287 
4288 
4289 /* Implement INITIAL_ELIMINATION_OFFSET.  */
4290 int
tilegx_initial_elimination_offset(int from,int to)4291 tilegx_initial_elimination_offset (int from, int to)
4292 {
4293   int total_size = compute_total_frame_size ();
4294 
4295   if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
4296     {
4297       return (total_size - crtl->args.pretend_args_size
4298 	      - tilegx_saved_regs_size ());
4299     }
4300   else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4301     {
4302       return -(crtl->args.pretend_args_size + tilegx_saved_regs_size ());
4303     }
4304   else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
4305     {
4306       return STACK_POINTER_OFFSET + total_size;
4307     }
4308   else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4309     {
4310       return STACK_POINTER_OFFSET;
4311     }
4312   else
4313     gcc_unreachable ();
4314 }
4315 
4316 
4317 /* Return an RTX indicating where the return address to the calling
4318    function can be found.  */
4319 rtx
tilegx_return_addr(int count,rtx frame ATTRIBUTE_UNUSED)4320 tilegx_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
4321 {
4322   if (count != 0)
4323     return const0_rtx;
4324 
4325   return get_hard_reg_initial_val (Pmode, TILEGX_LINK_REGNUM);
4326 }
4327 
4328 
4329 /* Implement EH_RETURN_HANDLER_RTX.  The MEM needs to be volatile to
4330    prevent it from being deleted.  */
4331 rtx
tilegx_eh_return_handler_rtx(void)4332 tilegx_eh_return_handler_rtx (void)
4333 {
4334   rtx tmp = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
4335   MEM_VOLATILE_P (tmp) = true;
4336   return tmp;
4337 }
4338 
4339 
4340 
4341 /* Registers  */
4342 
4343 /* Implemnet TARGET_CONDITIONAL_REGISTER_USAGE.  */
4344 static void
tilegx_conditional_register_usage(void)4345 tilegx_conditional_register_usage (void)
4346 {
4347   global_regs[TILEGX_NETORDER_REGNUM] = 1;
4348   /* TILEGX_PIC_TEXT_LABEL_REGNUM is conditionally used.  It is a
4349      member of fixed_regs, and therefore must be member of
4350      call_used_regs, but it is not a member of call_really_used_regs[]
4351      because it is not clobbered by a call.  */
4352   if (TILEGX_PIC_TEXT_LABEL_REGNUM != INVALID_REGNUM)
4353     {
4354       fixed_regs[TILEGX_PIC_TEXT_LABEL_REGNUM] = 1;
4355       call_used_regs[TILEGX_PIC_TEXT_LABEL_REGNUM] = 1;
4356     }
4357   if (PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
4358     {
4359       fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
4360       call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
4361     }
4362 }
4363 
4364 
4365 /* Implement TARGET_FRAME_POINTER_REQUIRED.  */
4366 static bool
tilegx_frame_pointer_required(void)4367 tilegx_frame_pointer_required (void)
4368 {
4369   return crtl->calls_eh_return || cfun->calls_alloca;
4370 }
4371 
4372 
4373 
4374 /* Scheduling and reorg  */
4375 
4376 /* Return the length of INSN.  LENGTH is the initial length computed
4377    by attributes in the machine-description file.  This is where we
4378    account for bundles.  */
4379 int
tilegx_adjust_insn_length(rtx_insn * insn,int length)4380 tilegx_adjust_insn_length (rtx_insn *insn, int length)
4381 {
4382   machine_mode mode = GET_MODE (insn);
4383 
4384   /* A non-termininating instruction in a bundle has length 0.  */
4385   if (mode == SImode)
4386     return 0;
4387 
4388   /* By default, there is not length adjustment.  */
4389   return length;
4390 }
4391 
4392 
4393 /* Implement TARGET_SCHED_ISSUE_RATE.  */
4394 static int
tilegx_issue_rate(void)4395 tilegx_issue_rate (void)
4396 {
4397   return 3;
4398 }
4399 
4400 
4401 /* Return the rtx for the jump target.  */
4402 static rtx
get_jump_target(rtx branch)4403 get_jump_target (rtx branch)
4404 {
4405   if (CALL_P (branch))
4406     {
4407       rtx call;
4408       call = PATTERN (branch);
4409 
4410       if (GET_CODE (call) == PARALLEL)
4411 	call = XVECEXP (call, 0, 0);
4412 
4413       if (GET_CODE (call) == SET)
4414 	call = SET_SRC (call);
4415 
4416       if (GET_CODE (call) == CALL)
4417 	return XEXP (XEXP (call, 0), 0);
4418     }
4419   return 0;
4420 }
4421 
4422 
4423 /* Implement TARGET_SCHED_ADJUST_COST.  */
4424 static int
tilegx_sched_adjust_cost(rtx_insn * insn,rtx link,rtx_insn * dep_insn,int cost)4425 tilegx_sched_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn,
4426 			  int cost)
4427 {
4428   /* If we have a true dependence, INSN is a call, and DEP_INSN
4429      defines a register that is needed by the call (argument or stack
4430      pointer) , set its latency to 0 so that it can be bundled with
4431      the call.  Explicitly check for and exclude the case when
4432      DEP_INSN defines the target of the jump.  */
4433   if (CALL_P (insn) && REG_NOTE_KIND (link) == REG_DEP_TRUE)
4434     {
4435       rtx target = get_jump_target (insn);
4436       if (!REG_P (target) || !set_of (target, dep_insn))
4437 	return 0;
4438     }
4439 
4440   return cost;
4441 }
4442 
4443 
4444 /* Skip over irrelevant NOTEs and such and look for the next insn we
4445    would consider bundling.  */
4446 static rtx_insn *
next_insn_to_bundle(rtx_insn * r,rtx_insn * end)4447 next_insn_to_bundle (rtx_insn *r, rtx_insn *end)
4448 {
4449   for (; r != end; r = NEXT_INSN (r))
4450     {
4451       if (NONDEBUG_INSN_P (r)
4452 	  && GET_CODE (PATTERN (r)) != USE
4453 	  && GET_CODE (PATTERN (r)) != CLOBBER)
4454 	return r;
4455     }
4456 
4457   return NULL;
4458 }
4459 
4460 
4461 /* Go through all insns, and use the information generated during
4462    scheduling to generate SEQUENCEs to represent bundles of
4463    instructions issued simultaneously.  */
4464 static void
tilegx_gen_bundles(void)4465 tilegx_gen_bundles (void)
4466 {
4467   basic_block bb;
4468   FOR_EACH_BB_FN (bb, cfun)
4469     {
4470       rtx_insn *insn, *next, *prev;
4471       rtx_insn *end = NEXT_INSN (BB_END (bb));
4472 
4473       prev = NULL;
4474       for (insn = next_insn_to_bundle (BB_HEAD (bb), end); insn; insn = next)
4475 	{
4476 	  next = next_insn_to_bundle (NEXT_INSN (insn), end);
4477 
4478 	  /* Never wrap {} around inline asm.  */
4479 	  if (GET_CODE (PATTERN (insn)) != ASM_INPUT)
4480 	    {
4481 	      if (next == NULL_RTX || GET_MODE (next) == TImode
4482 		  /* NOTE: The scheduler incorrectly believes a call
4483 		     insn can execute in the same cycle as the insn
4484 		     after the call.  This is of course impossible.
4485 		     Really we need to fix the scheduler somehow, so
4486 		     the code after the call gets scheduled
4487 		     optimally.  */
4488 		  || CALL_P (insn))
4489 		{
4490 		  /* Mark current insn as the end of a bundle.  */
4491 		  PUT_MODE (insn, QImode);
4492 		}
4493 	      else
4494 		{
4495 		  /* Mark it as part of a bundle.  */
4496 		  PUT_MODE (insn, SImode);
4497 		}
4498 	    }
4499 
4500 	  /* Delete barrier insns, because they can mess up the
4501 	     emitting of bundle braces.  If it is end-of-bundle, then
4502 	     the previous insn must be marked end-of-bundle.  */
4503 	  if (get_attr_type (insn) == TYPE_NOTHING) {
4504 	    if (GET_MODE (insn) == QImode && prev != NULL
4505 		&& GET_MODE (prev) == SImode)
4506 	      {
4507 		PUT_MODE (prev, QImode);
4508 	      }
4509 	    delete_insn (insn);
4510 
4511             // Note: prev remains the same for next iteration.
4512 	  }
4513           else
4514             prev = insn;
4515 	}
4516     }
4517 }
4518 
4519 
4520 /* Replace OLD_INSN with NEW_INSN.  */
4521 static void
replace_insns(rtx_insn * old_insn,rtx_insn * new_insns)4522 replace_insns (rtx_insn *old_insn, rtx_insn *new_insns)
4523 {
4524   if (new_insns)
4525     emit_insn_before (new_insns, old_insn);
4526 
4527   delete_insn (old_insn);
4528 }
4529 
4530 
4531 /* Returns true if INSN is the first instruction of a pc-relative
4532    address compuatation.  */
4533 static bool
match_pcrel_step1(rtx insn)4534 match_pcrel_step1 (rtx insn)
4535 {
4536   rtx pattern = PATTERN (insn);
4537   rtx src;
4538 
4539   if (GET_CODE (pattern) != SET)
4540     return false;
4541 
4542   src = SET_SRC (pattern);
4543 
4544   return (GET_CODE (src) == CONST
4545 	  && GET_CODE (XEXP (src, 0)) == UNSPEC
4546 	  && XINT (XEXP (src, 0), 1) == UNSPEC_HW1_LAST_PCREL);
4547 }
4548 
4549 
4550 /* Do the first replacement step in tilegx_fixup_pcrel_references.  */
4551 static void
replace_mov_pcrel_step1(rtx_insn * insn)4552 replace_mov_pcrel_step1 (rtx_insn *insn)
4553 {
4554   rtx pattern = PATTERN (insn);
4555   rtx unspec;
4556   rtx opnds[2];
4557   rtx_insn *new_insns;
4558 
4559   gcc_assert (GET_CODE (pattern) == SET);
4560   opnds[0] = SET_DEST (pattern);
4561 
4562   gcc_assert (GET_CODE (SET_SRC (pattern)) == CONST);
4563 
4564   unspec = XEXP (SET_SRC (pattern), 0);
4565   gcc_assert (GET_CODE (unspec) == UNSPEC);
4566   gcc_assert (XINT (unspec, 1) == UNSPEC_HW1_LAST_PCREL);
4567   opnds[1] = XVECEXP (unspec, 0, 0);
4568 
4569   /* We only need to replace SYMBOL_REFs, not LABEL_REFs.  */
4570   if (GET_CODE (opnds[1]) != SYMBOL_REF)
4571     return;
4572 
4573   start_sequence ();
4574 
4575   if (flag_pic != 1)
4576     {
4577       if (TARGET_32BIT)
4578 	emit_insn (gen_mov_got32_step1_32bit (opnds[0], opnds[1]));
4579       else
4580 	emit_insn (gen_mov_got32_step1 (opnds[0], opnds[1]));
4581     }
4582 
4583   new_insns = get_insns ();
4584   end_sequence ();
4585 
4586   replace_insns (insn, new_insns);
4587 }
4588 
4589 
4590 /* Returns true if INSN is the second instruction of a pc-relative
4591    address compuatation.  */
4592 static bool
match_pcrel_step2(rtx_insn * insn)4593 match_pcrel_step2 (rtx_insn *insn)
4594 {
4595   rtx unspec;
4596   rtx addr;
4597 
4598   if (TARGET_32BIT)
4599     {
4600       if (recog_memoized (insn) != CODE_FOR_insn_addr_shl16insli_32bit)
4601 	return false;
4602     }
4603   else
4604     {
4605       if (recog_memoized (insn) != CODE_FOR_insn_addr_shl16insli)
4606 	return false;
4607     }
4608 
4609   unspec = SET_SRC (PATTERN (insn));
4610   addr = XVECEXP (unspec, 0, 1);
4611 
4612   return (GET_CODE (addr) == CONST
4613 	  && GET_CODE (XEXP (addr, 0)) == UNSPEC
4614 	  && XINT (XEXP (addr, 0), 1) == UNSPEC_HW0_PCREL);
4615 }
4616 
4617 
4618 /* Do the second replacement step in tilegx_fixup_pcrel_references.  */
4619 static void
replace_mov_pcrel_step2(rtx_insn * insn)4620 replace_mov_pcrel_step2 (rtx_insn *insn)
4621 {
4622   rtx pattern = PATTERN (insn);
4623   rtx unspec;
4624   rtx addr;
4625   rtx opnds[3];
4626   rtx_insn *new_insns;
4627   rtx got_rtx = tilegx_got_rtx ();
4628 
4629   gcc_assert (GET_CODE (pattern) == SET);
4630   opnds[0] = SET_DEST (pattern);
4631 
4632   unspec = SET_SRC (pattern);
4633   gcc_assert (GET_CODE (unspec) == UNSPEC);
4634   gcc_assert (XINT (unspec, 1) == UNSPEC_INSN_ADDR_SHL16INSLI);
4635 
4636   opnds[1] = XVECEXP (unspec, 0, 0);
4637 
4638   addr = XVECEXP (unspec, 0, 1);
4639   gcc_assert (GET_CODE (addr) == CONST);
4640 
4641   unspec = XEXP (addr, 0);
4642   gcc_assert (GET_CODE (unspec) == UNSPEC);
4643   gcc_assert (XINT (unspec, 1) == UNSPEC_HW0_PCREL);
4644   opnds[2] = XVECEXP (unspec, 0, 0);
4645 
4646   /* We only need to replace SYMBOL_REFs, not LABEL_REFs.  */
4647   if (GET_CODE (opnds[2]) != SYMBOL_REF)
4648     return;
4649 
4650   start_sequence ();
4651 
4652   if (flag_pic == 1)
4653     {
4654       if (TARGET_32BIT)
4655 	emit_insn (gen_add_got16_32bit (opnds[0], got_rtx, opnds[2]));
4656       else
4657 	emit_insn (gen_add_got16 (opnds[0], got_rtx, opnds[2]));
4658     }
4659   else
4660     {
4661       if (TARGET_32BIT)
4662 	emit_insn (gen_mov_got32_step2_32bit
4663 		   (opnds[0], opnds[1], opnds[2]));
4664       else
4665 	emit_insn (gen_mov_got32_step2 (opnds[0], opnds[1], opnds[2]));
4666     }
4667 
4668   new_insns = get_insns ();
4669   end_sequence ();
4670 
4671   replace_insns (insn, new_insns);
4672 }
4673 
4674 
4675 /* Do the third replacement step in tilegx_fixup_pcrel_references.  */
4676 static void
replace_mov_pcrel_step3(rtx_insn * insn)4677 replace_mov_pcrel_step3 (rtx_insn *insn)
4678 {
4679   rtx pattern = PATTERN (insn);
4680   rtx unspec;
4681   rtx opnds[4];
4682   rtx_insn *new_insns;
4683   rtx got_rtx = tilegx_got_rtx ();
4684   rtx text_label_rtx = tilegx_text_label_rtx ();
4685 
4686   gcc_assert (GET_CODE (pattern) == SET);
4687   opnds[0] = SET_DEST (pattern);
4688 
4689   unspec = SET_SRC (pattern);
4690   gcc_assert (GET_CODE (unspec) == UNSPEC);
4691   gcc_assert (XINT (unspec, 1) == UNSPEC_MOV_PCREL_STEP3);
4692 
4693   opnds[1] = got_rtx;
4694 
4695   if (XVECEXP (unspec, 0, 0) == text_label_rtx)
4696     opnds[2] = XVECEXP (unspec, 0, 1);
4697   else
4698     {
4699       gcc_assert (XVECEXP (unspec, 0, 1) == text_label_rtx);
4700       opnds[2] = XVECEXP (unspec, 0, 0);
4701     }
4702 
4703   opnds[3] = XVECEXP (unspec, 0, 2);
4704 
4705   /* We only need to replace SYMBOL_REFs, not LABEL_REFs.  */
4706   if (GET_CODE (opnds[3]) != SYMBOL_REF)
4707     return;
4708 
4709   start_sequence ();
4710 
4711   if (flag_pic == 1)
4712     {
4713       emit_move_insn (opnds[0], gen_const_mem (Pmode, opnds[2]));
4714     }
4715   else
4716     {
4717       emit_move_insn (opnds[0], gen_rtx_PLUS (Pmode, opnds[1], opnds[2]));
4718       emit_move_insn (opnds[0], gen_const_mem (Pmode, opnds[0]));
4719     }
4720 
4721   new_insns = get_insns ();
4722   end_sequence ();
4723 
4724   replace_insns (insn, new_insns);
4725 }
4726 
4727 
4728 /* We generate PC relative SYMBOL_REFs as an optimization, to avoid
4729    going through the GOT when the symbol is local to the compilation
4730    unit.  But such a symbol requires that the common text_label that
4731    we generate at the beginning of the function be in the same section
4732    as the reference to the SYMBOL_REF.  This may not be true if we
4733    generate hot/cold sections.  This function looks for such cases and
4734    replaces such references with the longer sequence going through the
4735    GOT.
4736 
4737    We expect following instruction sequence:
4738    moveli      tmp1, hw1_last(x-.L_PICLNK)          [1]
4739    shl16insli  tmp2, tmp1, hw0(x-.L_PICLNK)         [2]
4740    add<x>      tmp3, txt_label_reg, tmp2            [3]
4741 
4742    If we're compiling -fpic, we replace with the following sequence
4743    (the numbers in brackets match the instructions they're replacing
4744    above).
4745 
4746    add<x>li    tmp2, got_reg, hw0_last_got(x)       [2]
4747    ld<4>       tmp3, tmp2                           [3]
4748 
4749    If we're compiling -fPIC, we replace the first instruction with:
4750 
4751    moveli      tmp1, hw1_last_got(x)                [1]
4752    shl16insli  tmp2, tmp1, hw0_got(x)               [2]
4753    add<x>      tmp3, got_reg, tmp2                  [3]
4754    ld<4>       tmp3, tmp3                           [3]
4755 
4756    Note that we're careful to disturb the instruction sequence as
4757    little as possible, since it's very late in the compilation
4758    process.  */
4759 static void
tilegx_fixup_pcrel_references(void)4760 tilegx_fixup_pcrel_references (void)
4761 {
4762   rtx_insn *insn, *next_insn;
4763   bool same_section_as_entry = true;
4764 
4765   for (insn = get_insns (); insn; insn = next_insn)
4766     {
4767       next_insn = NEXT_INSN (insn);
4768 
4769       if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_SWITCH_TEXT_SECTIONS)
4770 	{
4771 	  same_section_as_entry = !same_section_as_entry;
4772 	  continue;
4773 	}
4774 
4775       if (same_section_as_entry)
4776 	continue;
4777 
4778       if (!(INSN_P (insn)
4779 	    && GET_CODE (PATTERN (insn)) != USE
4780 	    && GET_CODE (PATTERN (insn)) != CLOBBER))
4781 	continue;
4782 
4783       if (TARGET_32BIT)
4784 	{
4785 	  if (match_pcrel_step1 (insn))
4786 	    replace_mov_pcrel_step1 (insn);
4787 	  else if (match_pcrel_step2 (insn))
4788 	    replace_mov_pcrel_step2 (insn);
4789 	  else if (recog_memoized (insn) == CODE_FOR_mov_pcrel_step3_32bit)
4790 	    replace_mov_pcrel_step3 (insn);
4791 	}
4792       else
4793 	{
4794 	  if (match_pcrel_step1 (insn))
4795 	    replace_mov_pcrel_step1 (insn);
4796 	  else if (match_pcrel_step2 (insn))
4797 	    replace_mov_pcrel_step2 (insn);
4798 	  else if (recog_memoized (insn) == CODE_FOR_mov_pcrel_step3)
4799 	    replace_mov_pcrel_step3 (insn);
4800 	}
4801     }
4802 }
4803 
4804 
4805 /* Ensure that no var tracking notes are emitted in the middle of a
4806    three-instruction bundle.  */
4807 static void
reorder_var_tracking_notes(void)4808 reorder_var_tracking_notes (void)
4809 {
4810   basic_block bb;
4811   FOR_EACH_BB_FN (bb, cfun)
4812   {
4813     rtx_insn *insn, *next;
4814     rtx_insn *queue = NULL;
4815     bool in_bundle = false;
4816 
4817     for (insn = BB_HEAD (bb); insn != BB_END (bb); insn = next)
4818       {
4819 	next = NEXT_INSN (insn);
4820 
4821 	if (INSN_P (insn))
4822 	  {
4823 	    /* Emit queued up notes at the last instruction of a
4824 	       bundle.  */
4825 	    if (GET_MODE (insn) == QImode)
4826 	      {
4827 		while (queue)
4828 		  {
4829 		    rtx_insn *next_queue = PREV_INSN (queue);
4830 		    SET_PREV_INSN (NEXT_INSN (insn)) = queue;
4831 		    SET_NEXT_INSN (queue) = NEXT_INSN (insn);
4832 		    SET_NEXT_INSN (insn) = queue;
4833 		    SET_PREV_INSN (queue) = insn;
4834 		    queue = next_queue;
4835 		  }
4836 		in_bundle = false;
4837 	      }
4838 	    else if (GET_MODE (insn) == SImode)
4839 	      in_bundle = true;
4840 	  }
4841 	else if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_VAR_LOCATION)
4842 	  {
4843 	    if (in_bundle)
4844 	      {
4845 		rtx_insn *prev = PREV_INSN (insn);
4846 		SET_PREV_INSN (next) = prev;
4847 		SET_NEXT_INSN (prev) = next;
4848 
4849 		SET_PREV_INSN (insn) = queue;
4850 		queue = insn;
4851 	      }
4852 	  }
4853       }
4854   }
4855 }
4856 
4857 
4858 /* Perform machine dependent operations on the rtl chain INSNS.  */
4859 static void
tilegx_reorg(void)4860 tilegx_reorg (void)
4861 {
4862   /* We are freeing block_for_insn in the toplev to keep compatibility
4863      with old MDEP_REORGS that are not CFG based.  Recompute it
4864      now.  */
4865   compute_bb_for_insn ();
4866 
4867   if (flag_reorder_blocks_and_partition)
4868     {
4869       tilegx_fixup_pcrel_references ();
4870     }
4871 
4872   if (flag_schedule_insns_after_reload)
4873     {
4874       split_all_insns ();
4875 
4876       timevar_push (TV_SCHED2);
4877       schedule_insns ();
4878       timevar_pop (TV_SCHED2);
4879 
4880       /* Examine the schedule to group into bundles.  */
4881       tilegx_gen_bundles ();
4882     }
4883 
4884   df_analyze ();
4885 
4886   if (flag_var_tracking)
4887     {
4888       timevar_push (TV_VAR_TRACKING);
4889       variable_tracking_main ();
4890       reorder_var_tracking_notes ();
4891       timevar_pop (TV_VAR_TRACKING);
4892     }
4893 
4894   df_finish_pass (false);
4895 }
4896 
4897 
4898 
4899 /* Assembly  */
4900 
4901 /* Select a format to encode pointers in exception handling data.
4902    CODE is 0 for data, 1 for code labels, 2 for function pointers.
4903    GLOBAL is true if the symbol may be affected by dynamic
4904    relocations.  */
4905 int
tilegx_asm_preferred_eh_data_format(int code ATTRIBUTE_UNUSED,int global)4906 tilegx_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED, int global)
4907 {
4908   int type = TARGET_32BIT ? DW_EH_PE_sdata4 : DW_EH_PE_sdata8;
4909   return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
4910 }
4911 
4912 
4913 /* Implement TARGET_ASM_OUTPUT_MI_THUNK.  */
4914 static void
tilegx_output_mi_thunk(FILE * file,tree thunk_fndecl ATTRIBUTE_UNUSED,HOST_WIDE_INT delta,HOST_WIDE_INT vcall_offset,tree function)4915 tilegx_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
4916 			HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
4917 			tree function)
4918 {
4919   rtx this_rtx, funexp, addend;
4920   rtx_insn *insn;
4921 
4922   /* Pretend to be a post-reload pass while generating rtl.  */
4923   reload_completed = 1;
4924 
4925   /* Mark the end of the (empty) prologue.  */
4926   emit_note (NOTE_INSN_PROLOGUE_END);
4927 
4928   /* Find the "this" pointer.  If the function returns a structure,
4929      the structure return pointer is in $1.  */
4930   if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
4931     this_rtx = gen_rtx_REG (Pmode, 1);
4932   else
4933     this_rtx = gen_rtx_REG (Pmode, 0);
4934 
4935   /* Add DELTA to THIS_RTX.  */
4936   if (!(delta >= -32868 && delta <= 32767))
4937     {
4938       addend = gen_rtx_REG (Pmode, 29);
4939       emit_move_insn (addend, GEN_INT (delta));
4940     }
4941   else
4942     addend = GEN_INT (delta);
4943 
4944   if (TARGET_32BIT)
4945     emit_insn (gen_addsi3 (this_rtx, this_rtx, addend));
4946   else
4947     emit_insn (gen_adddi3 (this_rtx, this_rtx, addend));
4948 
4949   /* If needed, add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX.  */
4950   if (vcall_offset)
4951     {
4952       rtx tmp;
4953 
4954       tmp = gen_rtx_REG (Pmode, 29);
4955       emit_move_insn (tmp, gen_rtx_MEM (Pmode, this_rtx));
4956 
4957       if (!(vcall_offset >= -32868 && vcall_offset <= 32767))
4958 	{
4959 	  addend = gen_rtx_REG (Pmode, 28);
4960 	  emit_move_insn (addend, GEN_INT (vcall_offset));
4961 	}
4962       else
4963 	addend = GEN_INT (vcall_offset);
4964 
4965       if (TARGET_32BIT)
4966 	emit_insn (gen_addsi3 (tmp, tmp, addend));
4967       else
4968 	emit_insn (gen_adddi3 (tmp, tmp, addend));
4969 
4970       emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp));
4971 
4972       if (TARGET_32BIT)
4973 	emit_insn (gen_addsi3 (this_rtx, this_rtx, tmp));
4974       else
4975 	emit_insn (gen_adddi3 (this_rtx, this_rtx, tmp));
4976     }
4977 
4978   /* Generate a tail call to the target function.  */
4979   if (!TREE_USED (function))
4980     {
4981       assemble_external (function);
4982       TREE_USED (function) = 1;
4983     }
4984   funexp = XEXP (DECL_RTL (function), 0);
4985   funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
4986   insn = emit_call_insn (gen_sibcall (funexp, const0_rtx));
4987   SIBLING_CALL_P (insn) = 1;
4988 
4989   /* Run just enough of rest_of_compilation to get the insns emitted.
4990      There's not really enough bulk here to make other passes such as
4991      instruction scheduling worth while.  Note that use_thunk calls
4992      assemble_start_function and assemble_end_function.
4993 
4994      We don't currently bundle, but the instruciton sequence is all
4995      serial except for the tail call, so we're only wasting one cycle.
4996    */
4997   insn = get_insns ();
4998   shorten_branches (insn);
4999   final_start_function (insn, file, 1);
5000   final (insn, file, 1);
5001   final_end_function ();
5002 
5003   /* Stop pretending to be a post-reload pass.  */
5004   reload_completed = 0;
5005 }
5006 
5007 
5008 /* Implement TARGET_ASM_TRAMPOLINE_TEMPLATE.  */
5009 static void
tilegx_asm_trampoline_template(FILE * file)5010 tilegx_asm_trampoline_template (FILE *file)
5011 {
5012   int ptr_mode_size = GET_MODE_SIZE (ptr_mode);
5013   if (TARGET_32BIT)
5014     {
5015       fprintf (file, "\tlnk      r10\n");
5016       fprintf (file, "\taddxi    r10, r10, 32\n");
5017       fprintf (file, "\tld4s_add r11, r10, %d\n", ptr_mode_size);
5018       fprintf (file, "\tld4s     r10, r10\n");
5019       fprintf (file, "\tjr       r11\n");
5020       fprintf (file, "\t.word 0 # <function address>\n");
5021       fprintf (file, "\t.word 0 # <static chain value>\n");
5022     }
5023   else
5024     {
5025       fprintf (file, "\tlnk      r10\n");
5026       fprintf (file, "\taddi     r10, r10, 32\n");
5027       fprintf (file, "\tld_add   r11, r10, %d\n", ptr_mode_size);
5028       fprintf (file, "\tld       r10, r10\n");
5029       fprintf (file, "\tjr       r11\n");
5030       fprintf (file, "\t.quad 0 # <function address>\n");
5031       fprintf (file, "\t.quad 0 # <static chain value>\n");
5032     }
5033 }
5034 
5035 
5036 /* Implement TARGET_TRAMPOLINE_INIT.  */
5037 static void
tilegx_trampoline_init(rtx m_tramp,tree fndecl,rtx static_chain)5038 tilegx_trampoline_init (rtx m_tramp, tree fndecl, rtx static_chain)
5039 {
5040   rtx fnaddr, chaddr;
5041   rtx mem;
5042   rtx begin_addr, end_addr;
5043   int ptr_mode_size = GET_MODE_SIZE (ptr_mode);
5044 
5045   fnaddr = copy_to_reg (XEXP (DECL_RTL (fndecl), 0));
5046   chaddr = copy_to_reg (static_chain);
5047 
5048   emit_block_move (m_tramp, assemble_trampoline_template (),
5049 		   GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
5050 
5051   mem = adjust_address (m_tramp, ptr_mode,
5052 			TRAMPOLINE_SIZE - 2 * ptr_mode_size);
5053   emit_move_insn (mem, fnaddr);
5054   mem = adjust_address (m_tramp, ptr_mode,
5055 			TRAMPOLINE_SIZE - ptr_mode_size);
5056   emit_move_insn (mem, chaddr);
5057 
5058   /* Get pointers to the beginning and end of the code block.  */
5059   begin_addr = force_reg (Pmode, XEXP (m_tramp, 0));
5060   end_addr = force_reg (Pmode, plus_constant (Pmode, XEXP (m_tramp, 0),
5061 					      TRAMPOLINE_SIZE));
5062 
5063   emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
5064 		     LCT_NORMAL, VOIDmode, 2, begin_addr, Pmode,
5065 		     end_addr, Pmode);
5066 }
5067 
5068 
5069 /* Implement TARGET_PRINT_OPERAND.  */
5070 static void
tilegx_print_operand(FILE * file,rtx x,int code)5071 tilegx_print_operand (FILE *file, rtx x, int code)
5072 {
5073   switch (code)
5074     {
5075     case 'c':
5076       /* Print the compare operator opcode for conditional moves.  */
5077       switch (GET_CODE (x))
5078 	{
5079 	case EQ:
5080 	  fputs ("z", file);
5081 	  break;
5082 	case NE:
5083 	  fputs ("nz", file);
5084 	  break;
5085 	default:
5086 	  output_operand_lossage ("invalid %%c operand");
5087 	}
5088       return;
5089 
5090     case 'C':
5091       /* Print the compare operator opcode for conditional moves.  */
5092       switch (GET_CODE (x))
5093 	{
5094 	case EQ:
5095 	  fputs ("nz", file);
5096 	  break;
5097 	case NE:
5098 	  fputs ("z", file);
5099 	  break;
5100 	default:
5101 	  output_operand_lossage ("invalid %%C operand");
5102 	}
5103       return;
5104 
5105     case 'd':
5106       {
5107 	/* Print the compare operator opcode for conditional moves.  */
5108 	switch (GET_CODE (x))
5109 	  {
5110 	  case EQ:
5111 	    fputs ("eq", file);
5112 	    break;
5113 	  case NE:
5114 	    fputs ("ne", file);
5115 	    break;
5116 	  default:
5117 	    output_operand_lossage ("invalid %%d operand");
5118 	  }
5119 	return;
5120       }
5121 
5122     case 'D':
5123       {
5124 	/* Print the compare operator opcode for conditional moves.  */
5125 	switch (GET_CODE (x))
5126 	  {
5127 	  case EQ:
5128 	    fputs ("ne", file);
5129 	    break;
5130 	  case NE:
5131 	    fputs ("eq", file);
5132 	    break;
5133 	  default:
5134 	    output_operand_lossage ("invalid %%D operand");
5135 	  }
5136 	return;
5137       }
5138 
5139     case 'H':
5140       {
5141       if (GET_CODE (x) == CONST
5142 	  && GET_CODE (XEXP (x, 0)) == UNSPEC)
5143 	{
5144 	  rtx addr = XVECEXP (XEXP (x, 0), 0, 0);
5145 	  int unspec = XINT (XEXP (x, 0), 1);
5146 	  const char *opstr = NULL;
5147 	  switch (unspec)
5148 	    {
5149 	    case UNSPEC_HW0:
5150 	    case UNSPEC_HW0_PCREL:
5151 	      opstr = "hw0";
5152 	      break;
5153 	    case UNSPEC_HW1:
5154 	    case UNSPEC_HW1_PCREL:
5155 	      opstr = "hw1";
5156 	      break;
5157 	    case UNSPEC_HW2:
5158 	      opstr = "hw2";
5159 	      break;
5160 	    case UNSPEC_HW3:
5161 	      opstr = "hw3";
5162 	      break;
5163 	    case UNSPEC_HW0_LAST:
5164 	      opstr = "hw0_last";
5165 	      break;
5166 	    case UNSPEC_HW1_LAST:
5167 	    case UNSPEC_HW1_LAST_PCREL:
5168 	      opstr = "hw1_last";
5169 	      break;
5170 	    case UNSPEC_HW2_LAST:
5171 	    case UNSPEC_HW2_LAST_PCREL:
5172 	      opstr = "hw2_last";
5173 	      break;
5174 	    case UNSPEC_HW0_GOT:
5175 	      opstr = "hw0_got";
5176 	      break;
5177 	    case UNSPEC_HW0_LAST_GOT:
5178 	      opstr = "hw0_last_got";
5179 	      break;
5180 	    case UNSPEC_HW1_LAST_GOT:
5181 	      opstr = "hw1_last_got";
5182 	      break;
5183 	    case UNSPEC_HW0_TLS_GD:
5184 	      opstr = "hw0_tls_gd";
5185 	      break;
5186 	    case UNSPEC_HW1_LAST_TLS_GD:
5187 	      opstr = "hw1_last_tls_gd";
5188 	      break;
5189 	    case UNSPEC_HW0_TLS_IE:
5190 	      opstr = "hw0_tls_ie";
5191 	      break;
5192 	    case UNSPEC_HW1_LAST_TLS_IE:
5193 	      opstr = "hw1_last_tls_ie";
5194 	      break;
5195 	    case UNSPEC_HW0_TLS_LE:
5196 	      opstr = "hw0_tls_le";
5197 	      break;
5198 	    case UNSPEC_HW1_LAST_TLS_LE:
5199 	      opstr = "hw1_last_tls_le";
5200 	      break;
5201 	    case UNSPEC_HW0_PLT_PCREL:
5202 	      opstr = "hw0_plt";
5203 	      break;
5204 	    case UNSPEC_HW1_PLT_PCREL:
5205 	      opstr = "hw1_plt";
5206 	      break;
5207 	    case UNSPEC_HW1_LAST_PLT_PCREL:
5208 	      opstr = "hw1_last_plt";
5209 	      break;
5210 	    case UNSPEC_HW2_LAST_PLT_PCREL:
5211 	      opstr = "hw2_last_plt";
5212 	      break;
5213 	    default:
5214 	      output_operand_lossage ("invalid %%H specifier");
5215 	    }
5216 
5217 	  fputs (opstr, file);
5218 	  fputc ('(', file);
5219 	  output_addr_const (file, addr);
5220 
5221 	  if (unspec == UNSPEC_HW0_PCREL
5222 	      || unspec == UNSPEC_HW1_PCREL
5223 	      || unspec == UNSPEC_HW1_LAST_PCREL
5224 	      || unspec == UNSPEC_HW2_LAST_PCREL
5225 	      || unspec == UNSPEC_HW0_PLT_PCREL
5226 	      || unspec == UNSPEC_HW1_PLT_PCREL
5227 	      || unspec == UNSPEC_HW1_LAST_PLT_PCREL
5228 	      || unspec == UNSPEC_HW2_LAST_PLT_PCREL)
5229 	    {
5230 	      rtx addr2 = XVECEXP (XEXP (x, 0), 0, 1);
5231 	      fputs (" - " , file);
5232 	      output_addr_const (file, addr2);
5233 	    }
5234 
5235 	  fputc (')', file);
5236 	  return;
5237 	}
5238       else if (symbolic_operand (x, VOIDmode))
5239 	{
5240 	  output_addr_const (file, x);
5241 	  return;
5242 	}
5243       }
5244       /* FALLTHRU */
5245 
5246     case 'h':
5247       {
5248 	/* Print the low 16 bits of a constant.  */
5249 	HOST_WIDE_INT i;
5250 	if (CONST_INT_P (x))
5251 	  i = INTVAL (x);
5252 	else if (GET_CODE (x) == CONST_DOUBLE)
5253 	  i = CONST_DOUBLE_LOW (x);
5254 	else
5255 	  {
5256 	    output_operand_lossage ("invalid %%h operand");
5257 	    return;
5258 	  }
5259 	i = trunc_int_for_mode (i, HImode);
5260 	fprintf (file, HOST_WIDE_INT_PRINT_DEC, i);
5261 	return;
5262       }
5263 
5264     case 'I':
5265       /* Print an auto-inc memory operand.  */
5266       if (!MEM_P (x))
5267 	{
5268 	  output_operand_lossage ("invalid %%I operand");
5269 	  return;
5270 	}
5271 
5272       output_memory_autoinc_first = true;
5273       output_address (GET_MODE (x), XEXP (x, 0));
5274       return;
5275 
5276     case 'i':
5277       /* Print an auto-inc memory operand.  */
5278       if (!MEM_P (x))
5279 	{
5280 	  output_operand_lossage ("invalid %%i operand");
5281 	  return;
5282 	}
5283 
5284       output_memory_autoinc_first = false;
5285       output_address (GET_MODE (x), XEXP (x, 0));
5286       return;
5287 
5288     case 'j':
5289       {
5290 	/* Print the low 8 bits of a constant.  */
5291 	HOST_WIDE_INT i;
5292 	if (CONST_INT_P (x))
5293 	  i = INTVAL (x);
5294 	else if (GET_CODE (x) == CONST_DOUBLE)
5295 	  i = CONST_DOUBLE_LOW (x);
5296 	else if (GET_CODE (x) == CONST_VECTOR
5297 		 && CONST_INT_P (CONST_VECTOR_ELT (x, 0)))
5298 	  i = INTVAL (CONST_VECTOR_ELT (x, 0));
5299 	else
5300 	  {
5301 	    output_operand_lossage ("invalid %%j operand");
5302 	    return;
5303 	  }
5304 	i = trunc_int_for_mode (i, QImode);
5305 	fprintf (file, HOST_WIDE_INT_PRINT_DEC, i);
5306 	return;
5307       }
5308 
5309     case 'P':
5310       {
5311 	/* Print a constant plus one.  */
5312 	if (!CONST_INT_P (x))
5313 	  {
5314 	    output_operand_lossage ("invalid %%P operand");
5315 	    return;
5316 	  }
5317 	fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) + 1);
5318 	return;
5319       }
5320 
5321     case 'm':
5322     case 'M':
5323       {
5324 	/* Print a bfextu-style bit range.  */
5325 	int first_bit, last_bit;
5326 	HOST_WIDE_INT flip = (code == 'm') ? ~0 : 0;
5327 
5328 	if (!CONST_INT_P (x)
5329 	    || !tilegx_bitfield_operand_p (INTVAL (x) ^ flip,
5330 					   &first_bit, &last_bit))
5331 	  {
5332 	    output_operand_lossage ("invalid %%%c operand", code);
5333 	    return;
5334 	  }
5335 
5336 	fprintf (file, "%d, %d", first_bit, last_bit);
5337 	return;
5338       }
5339 
5340     case 'N':
5341       {
5342 	const char *reg = NULL;
5343 
5344 	/* Print a network register.  */
5345 	if (!CONST_INT_P (x))
5346 	  {
5347 	    output_operand_lossage ("invalid %%N operand");
5348 	    return;
5349 	  }
5350 
5351 	switch (INTVAL (x))
5352 	  {
5353 	  case TILEGX_NETREG_IDN0: reg = "idn0"; break;
5354 	  case TILEGX_NETREG_IDN1: reg = "idn1"; break;
5355 	  case TILEGX_NETREG_UDN0: reg = "udn0"; break;
5356 	  case TILEGX_NETREG_UDN1: reg = "udn1"; break;
5357 	  case TILEGX_NETREG_UDN2: reg = "udn2"; break;
5358 	  case TILEGX_NETREG_UDN3: reg = "udn3"; break;
5359 	  default:
5360 	    gcc_unreachable ();
5361 	  }
5362 
5363 	fprintf (file, reg);
5364 	return;
5365       }
5366 
5367     case 'p':
5368       if (GET_CODE (x) == SYMBOL_REF)
5369 	{
5370 	  if (flag_pic && !SYMBOL_REF_LOCAL_P (x))
5371 	    fprintf (file, "plt(");
5372 	  output_addr_const (file, x);
5373 	  if (flag_pic && !SYMBOL_REF_LOCAL_P (x))
5374 	    fprintf (file, ")");
5375 	}
5376       else
5377 	output_addr_const (file, x);
5378       return;
5379 
5380     case 'r':
5381       /* In this case we need a register.  Use 'zero' if the operand
5382 	 is const0_rtx.  */
5383       if (x == const0_rtx
5384 	  || (GET_MODE (x) != VOIDmode && x == CONST0_RTX (GET_MODE (x))))
5385 	{
5386 	  fputs ("zero", file);
5387 	  return;
5388 	}
5389       else if (!REG_P (x))
5390 	{
5391 	  output_operand_lossage ("invalid operand for 'r' specifier");
5392 	  return;
5393 	}
5394       /* FALLTHRU */
5395 
5396     case 0:
5397       if (REG_P (x))
5398 	{
5399 	  fprintf (file, "%s", reg_names[REGNO (x)]);
5400 	  return;
5401 	}
5402       else if (MEM_P (x))
5403 	{
5404 	  output_address (VOIDmode, XEXP (x, 0));
5405 	  return;
5406 	}
5407       else
5408 	{
5409 	  output_addr_const (file, x);
5410 	  return;
5411 	}
5412     }
5413 
5414   debug_rtx (x);
5415   output_operand_lossage ("unable to print out operand yet; code == %d (%c)",
5416 			  code, code);
5417 }
5418 
5419 
5420 /* Implement TARGET_PRINT_OPERAND_ADDRESS.  */
5421 static void
tilegx_print_operand_address(FILE * file,machine_mode mode,rtx addr)5422 tilegx_print_operand_address (FILE *file, machine_mode mode, rtx addr)
5423 {
5424   if (GET_CODE (addr) == POST_DEC
5425       || GET_CODE (addr) == POST_INC)
5426     {
5427       int offset = GET_MODE_SIZE (mode);
5428 
5429       gcc_assert (mode != VOIDmode);
5430 
5431       if (output_memory_autoinc_first)
5432 	fprintf (file, "%s", reg_names[REGNO (XEXP (addr, 0))]);
5433       else
5434 	fprintf (file, "%d",
5435 		 GET_CODE (addr) == POST_DEC ? -offset : offset);
5436     }
5437   else if (GET_CODE (addr) == POST_MODIFY)
5438     {
5439       gcc_assert (mode != VOIDmode);
5440 
5441       gcc_assert (GET_CODE (XEXP (addr, 1)) == PLUS);
5442 
5443       if (output_memory_autoinc_first)
5444 	fprintf (file, "%s", reg_names[REGNO (XEXP (addr, 0))]);
5445       else
5446 	fprintf (file, HOST_WIDE_INT_PRINT_DEC,
5447 		 INTVAL (XEXP (XEXP (addr, 1), 1)));
5448     }
5449   else
5450     tilegx_print_operand (file, addr, 'r');
5451 }
5452 
5453 
5454 /* Machine mode of current insn, for determining curly brace
5455    placement.  */
5456 static machine_mode insn_mode;
5457 
5458 
5459 /* Implement FINAL_PRESCAN_INSN.  This is used to emit bundles.  */
5460 void
tilegx_final_prescan_insn(rtx_insn * insn)5461 tilegx_final_prescan_insn (rtx_insn *insn)
5462 {
5463   /* Record this for tilegx_asm_output_opcode to examine.  */
5464   insn_mode = GET_MODE (insn);
5465 }
5466 
5467 
5468 /* While emitting asm, are we currently inside '{' for a bundle?  */
5469 static bool tilegx_in_bundle = false;
5470 
5471 /* Implement ASM_OUTPUT_OPCODE.  Prepend/append curly braces as
5472    appropriate given the bundling information recorded by
5473    tilegx_gen_bundles.  */
5474 const char *
tilegx_asm_output_opcode(FILE * stream,const char * code)5475 tilegx_asm_output_opcode (FILE *stream, const char *code)
5476 {
5477   bool pseudo = !strcmp (code, "pseudo");
5478 
5479   if (!tilegx_in_bundle && insn_mode == SImode)
5480     {
5481       /* Start a new bundle.  */
5482       fprintf (stream, "{\n\t");
5483       tilegx_in_bundle = true;
5484     }
5485 
5486   if (tilegx_in_bundle && insn_mode == QImode)
5487     {
5488       /* Close an existing bundle.  */
5489       static char buf[100];
5490 
5491       gcc_assert (strlen (code) + 3 + 1 < sizeof (buf));
5492 
5493       strcpy (buf, pseudo ? "" : code);
5494       strcat (buf, "\n\t}");
5495       tilegx_in_bundle = false;
5496 
5497       return buf;
5498     }
5499   else
5500     {
5501       return pseudo ? "" : code;
5502     }
5503 }
5504 
5505 
5506 /* Output assembler code to FILE to increment profiler label # LABELNO
5507    for profiling a function entry.  */
5508 void
tilegx_function_profiler(FILE * file,int labelno ATTRIBUTE_UNUSED)5509 tilegx_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
5510 {
5511   if (tilegx_in_bundle)
5512     {
5513       fprintf (file, "\t}\n");
5514     }
5515 
5516   if (flag_pic)
5517     {
5518       fprintf (file,
5519 	       "\t{\n"
5520 	       "\tmove\tr10, lr\n"
5521 	       "\tjal\tplt(%s)\n"
5522 	       "\t}\n", MCOUNT_NAME);
5523     }
5524   else
5525     {
5526       fprintf (file,
5527 	       "\t{\n"
5528 	       "\tmove\tr10, lr\n"
5529 	       "\tjal\t%s\n"
5530 	       "\t}\n", MCOUNT_NAME);
5531     }
5532 
5533   tilegx_in_bundle = false;
5534 }
5535 
5536 
5537 /* Implement TARGET_ASM_FILE_END.  */
5538 static void
tilegx_file_end(void)5539 tilegx_file_end (void)
5540 {
5541   if (NEED_INDICATE_EXEC_STACK)
5542     file_end_indicate_exec_stack ();
5543 }
5544 
5545 
5546 
5547 #undef  TARGET_HAVE_TLS
5548 #define TARGET_HAVE_TLS HAVE_AS_TLS
5549 
5550 #undef  TARGET_OPTION_OVERRIDE
5551 #define TARGET_OPTION_OVERRIDE tilegx_option_override
5552 
5553 #undef  TARGET_SCALAR_MODE_SUPPORTED_P
5554 #define TARGET_SCALAR_MODE_SUPPORTED_P tilegx_scalar_mode_supported_p
5555 
5556 #undef  TARGET_VECTOR_MODE_SUPPORTED_P
5557 #define TARGET_VECTOR_MODE_SUPPORTED_P tilegx_vector_mode_supported_p
5558 
5559 #undef  TARGET_CANNOT_FORCE_CONST_MEM
5560 #define TARGET_CANNOT_FORCE_CONST_MEM tilegx_cannot_force_const_mem
5561 
5562 #undef  TARGET_FUNCTION_OK_FOR_SIBCALL
5563 #define TARGET_FUNCTION_OK_FOR_SIBCALL tilegx_function_ok_for_sibcall
5564 
5565 #undef  TARGET_PASS_BY_REFERENCE
5566 #define TARGET_PASS_BY_REFERENCE tilegx_pass_by_reference
5567 
5568 #undef  TARGET_RETURN_IN_MSB
5569 #define TARGET_RETURN_IN_MSB tilegx_return_in_msb
5570 
5571 #undef  TARGET_RETURN_IN_MEMORY
5572 #define TARGET_RETURN_IN_MEMORY tilegx_return_in_memory
5573 
5574 #undef  TARGET_MODE_REP_EXTENDED
5575 #define TARGET_MODE_REP_EXTENDED tilegx_mode_rep_extended
5576 
5577 #undef  TARGET_FUNCTION_ARG_BOUNDARY
5578 #define TARGET_FUNCTION_ARG_BOUNDARY tilegx_function_arg_boundary
5579 
5580 #undef  TARGET_FUNCTION_ARG
5581 #define TARGET_FUNCTION_ARG tilegx_function_arg
5582 
5583 #undef  TARGET_FUNCTION_ARG_ADVANCE
5584 #define TARGET_FUNCTION_ARG_ADVANCE tilegx_function_arg_advance
5585 
5586 #undef  TARGET_FUNCTION_VALUE
5587 #define TARGET_FUNCTION_VALUE tilegx_function_value
5588 
5589 #undef  TARGET_LIBCALL_VALUE
5590 #define TARGET_LIBCALL_VALUE tilegx_libcall_value
5591 
5592 #undef  TARGET_FUNCTION_VALUE_REGNO_P
5593 #define TARGET_FUNCTION_VALUE_REGNO_P tilegx_function_value_regno_p
5594 
5595 #undef  TARGET_PROMOTE_FUNCTION_MODE
5596 #define TARGET_PROMOTE_FUNCTION_MODE default_promote_function_mode_always_promote
5597 
5598 #undef  TARGET_PROMOTE_PROTOTYPES
5599 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_false
5600 
5601 #undef  TARGET_BUILD_BUILTIN_VA_LIST
5602 #define TARGET_BUILD_BUILTIN_VA_LIST tilegx_build_builtin_va_list
5603 
5604 #undef  TARGET_EXPAND_BUILTIN_VA_START
5605 #define TARGET_EXPAND_BUILTIN_VA_START tilegx_va_start
5606 
5607 #undef  TARGET_SETUP_INCOMING_VARARGS
5608 #define TARGET_SETUP_INCOMING_VARARGS tilegx_setup_incoming_varargs
5609 
5610 #undef  TARGET_GIMPLIFY_VA_ARG_EXPR
5611 #define TARGET_GIMPLIFY_VA_ARG_EXPR tilegx_gimplify_va_arg_expr
5612 
5613 #undef  TARGET_RTX_COSTS
5614 #define TARGET_RTX_COSTS tilegx_rtx_costs
5615 
5616 #undef  TARGET_EXPAND_TO_RTL_HOOK
5617 #define TARGET_EXPAND_TO_RTL_HOOK tilegx_expand_to_rtl_hook
5618 
5619 #undef  TARGET_SHIFT_TRUNCATION_MASK
5620 #define TARGET_SHIFT_TRUNCATION_MASK tilegx_shift_truncation_mask
5621 
5622 #undef  TARGET_INIT_LIBFUNCS
5623 #define TARGET_INIT_LIBFUNCS tilegx_init_libfuncs
5624 
5625 /* Limit to what we can reach in one addli.  */
5626 #undef  TARGET_MIN_ANCHOR_OFFSET
5627 #define TARGET_MIN_ANCHOR_OFFSET -32768
5628 #undef  TARGET_MAX_ANCHOR_OFFSET
5629 #define TARGET_MAX_ANCHOR_OFFSET 32767
5630 
5631 #undef  TARGET_LEGITIMATE_CONSTANT_P
5632 #define TARGET_LEGITIMATE_CONSTANT_P tilegx_legitimate_constant_p
5633 
5634 #undef  TARGET_LEGITIMATE_ADDRESS_P
5635 #define TARGET_LEGITIMATE_ADDRESS_P tilegx_legitimate_address_p
5636 
5637 #undef  TARGET_LEGITIMIZE_ADDRESS
5638 #define TARGET_LEGITIMIZE_ADDRESS tilegx_legitimize_address
5639 
5640 #undef  TARGET_DELEGITIMIZE_ADDRESS
5641 #define TARGET_DELEGITIMIZE_ADDRESS tilegx_delegitimize_address
5642 
5643 #undef  TARGET_INIT_BUILTINS
5644 #define TARGET_INIT_BUILTINS  tilegx_init_builtins
5645 
5646 #undef  TARGET_BUILTIN_DECL
5647 #define TARGET_BUILTIN_DECL tilegx_builtin_decl
5648 
5649 #undef  TARGET_EXPAND_BUILTIN
5650 #define TARGET_EXPAND_BUILTIN tilegx_expand_builtin
5651 
5652 #undef  TARGET_CONDITIONAL_REGISTER_USAGE
5653 #define TARGET_CONDITIONAL_REGISTER_USAGE tilegx_conditional_register_usage
5654 
5655 #undef  TARGET_FRAME_POINTER_REQUIRED
5656 #define TARGET_FRAME_POINTER_REQUIRED tilegx_frame_pointer_required
5657 
5658 #undef  TARGET_DELAY_SCHED2
5659 #define TARGET_DELAY_SCHED2 true
5660 
5661 #undef  TARGET_DELAY_VARTRACK
5662 #define TARGET_DELAY_VARTRACK true
5663 
5664 #undef  TARGET_SCHED_ISSUE_RATE
5665 #define TARGET_SCHED_ISSUE_RATE tilegx_issue_rate
5666 
5667 #undef  TARGET_SCHED_ADJUST_COST
5668 #define TARGET_SCHED_ADJUST_COST tilegx_sched_adjust_cost
5669 
5670 #undef  TARGET_MACHINE_DEPENDENT_REORG
5671 #define TARGET_MACHINE_DEPENDENT_REORG tilegx_reorg
5672 
5673 #undef  TARGET_ASM_CAN_OUTPUT_MI_THUNK
5674 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
5675   hook_bool_const_tree_hwi_hwi_const_tree_true
5676 
5677 #undef  TARGET_ASM_OUTPUT_MI_THUNK
5678 #define TARGET_ASM_OUTPUT_MI_THUNK tilegx_output_mi_thunk
5679 
5680 #undef  TARGET_ASM_TRAMPOLINE_TEMPLATE
5681 #define TARGET_ASM_TRAMPOLINE_TEMPLATE tilegx_asm_trampoline_template
5682 
5683 #undef  TARGET_TRAMPOLINE_INIT
5684 #define TARGET_TRAMPOLINE_INIT tilegx_trampoline_init
5685 
5686 #undef  TARGET_PRINT_OPERAND
5687 #define TARGET_PRINT_OPERAND tilegx_print_operand
5688 
5689 #undef  TARGET_PRINT_OPERAND_ADDRESS
5690 #define TARGET_PRINT_OPERAND_ADDRESS tilegx_print_operand_address
5691 
5692 #undef  TARGET_ASM_FILE_END
5693 #define TARGET_ASM_FILE_END tilegx_file_end
5694 
5695 #undef  TARGET_ASM_ALIGNED_DI_OP
5696 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
5697 
5698 #undef  TARGET_CAN_USE_DOLOOP_P
5699 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
5700 
5701 struct gcc_target targetm = TARGET_INITIALIZER;
5702 
5703 #include "gt-tilegx.h"
5704