1 /* Target Definitions for NVPTX.
2    Copyright (C) 2014-2019 Free Software Foundation, Inc.
3    Contributed by Bernd Schmidt <bernds@codesourcery.com>
4 
5    This file is part of GCC.
6 
7    GCC is free software; you can redistribute it and/or modify it
8    under the terms of the GNU General Public License as published
9    by the Free Software Foundation; either version 3, or (at your
10    option) any later version.
11 
12    GCC is distributed in the hope that it will be useful, but WITHOUT
13    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
15    License for more details.
16 
17    You should have received a copy of the GNU General Public License
18    along with GCC; see the file COPYING3.  If not see
19    <http://www.gnu.org/licenses/>.  */
20 
21 #ifndef GCC_NVPTX_H
22 #define GCC_NVPTX_H
23 
24 #ifndef NVPTX_OPTS_H
25 #include "config/nvptx/nvptx-opts.h"
26 #endif
27 
28 /* Run-time Target.  */
29 
30 #define STARTFILE_SPEC "%{mmainkernel:crt0.o}"
31 
32 #define ASM_SPEC "%{misa=*:-m %*}"
33 
34 #define TARGET_CPU_CPP_BUILTINS()		\
35   do						\
36     {						\
37       builtin_assert ("machine=nvptx");		\
38       builtin_assert ("cpu=nvptx");		\
39       builtin_define ("__nvptx__");		\
40       if (TARGET_SOFT_STACK)			\
41         builtin_define ("__nvptx_softstack__");	\
42       if (TARGET_UNIFORM_SIMT)			\
43         builtin_define ("__nvptx_unisimt__");	\
44     } while (0)
45 
46 /* Avoid the default in ../../gcc.c, which adds "-pthread", which is not
47    supported for nvptx.  */
48 #define GOMP_SELF_SPECS ""
49 
50 /* Storage Layout.  */
51 
52 #define BITS_BIG_ENDIAN 0
53 #define BYTES_BIG_ENDIAN 0
54 #define WORDS_BIG_ENDIAN 0
55 
56 /* Chosen such that we won't have to deal with multi-word subregs.  */
57 #define UNITS_PER_WORD 8
58 
59 /* Alignments in bits.  */
60 #define PARM_BOUNDARY 32
61 #define STACK_BOUNDARY 128
62 #define FUNCTION_BOUNDARY 32
63 #define BIGGEST_ALIGNMENT 128
64 #define STRICT_ALIGNMENT 1
65 
66 #define MAX_STACK_ALIGNMENT (1024 * 8)
67 
68 #define DATA_ALIGNMENT nvptx_data_alignment
69 
70 /* Copied from elf.h and other places.  We'd otherwise use
71    BIGGEST_ALIGNMENT and fail a number of testcases.  */
72 #define MAX_OFILE_ALIGNMENT (32768 * 8)
73 
74 /* Type Layout.  */
75 
76 #define DEFAULT_SIGNED_CHAR 1
77 
78 #define SHORT_TYPE_SIZE 16
79 #define INT_TYPE_SIZE 32
80 #define LONG_TYPE_SIZE (TARGET_ABI64 ? 64 : 32)
81 #define LONG_LONG_TYPE_SIZE 64
82 #define FLOAT_TYPE_SIZE 32
83 #define DOUBLE_TYPE_SIZE 64
84 #define LONG_DOUBLE_TYPE_SIZE 64
85 #define TARGET_SUPPORTS_WIDE_INT 1
86 
87 #undef SIZE_TYPE
88 #define SIZE_TYPE (TARGET_ABI64 ? "long unsigned int" : "unsigned int")
89 #undef PTRDIFF_TYPE
90 #define PTRDIFF_TYPE (TARGET_ABI64 ? "long int" : "int")
91 
92 #define POINTER_SIZE (TARGET_ABI64 ? 64 : 32)
93 #define Pmode (TARGET_ABI64 ? DImode : SImode)
94 #define STACK_SIZE_MODE Pmode
95 
96 #define TARGET_SM35 (ptx_isa_option >= PTX_ISA_SM35)
97 
98 /* Registers.  Since ptx is a virtual target, we just define a few
99    hard registers for special purposes and leave pseudos unallocated.
100    We have to have some available hard registers, to keep gcc setup
101    happy.  */
102 #define FIRST_PSEUDO_REGISTER 16
103 #define FIXED_REGISTERS	    { 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
104 #define CALL_USED_REGISTERS { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }
105 
106 /* Register Classes.  */
107 enum reg_class             {  NO_REGS,    ALL_REGS,	LIM_REG_CLASSES };
108 #define REG_CLASS_NAMES    { "NO_REGS",  "ALL_REGS" }
109 #define REG_CLASS_CONTENTS { { 0x0000 }, { 0xFFFF } }
110 #define N_REG_CLASSES (int) LIM_REG_CLASSES
111 
112 #define GENERAL_REGS ALL_REGS
113 #define REGNO_REG_CLASS(R) ((void)(R), ALL_REGS)
114 #define BASE_REG_CLASS ALL_REGS
115 #define INDEX_REG_CLASS NO_REGS
116 
117 #define REGNO_OK_FOR_BASE_P(X) true
118 #define REGNO_OK_FOR_INDEX_P(X) false
119 
120 #define CLASS_MAX_NREGS(class, mode) \
121   ((GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
122 
123 #define PROMOTE_MODE(MODE, UNSIGNEDP, TYPE)		\
124   if ((MODE) == QImode || (MODE) == HImode)		\
125     {							\
126       (MODE) = SImode;					\
127       (void)(UNSIGNEDP);				\
128       (void)(TYPE);					\
129     }
130 
131 /* Stack and Calling.  */
132 
133 #define FRAME_GROWS_DOWNWARD 0
134 #define STACK_GROWS_DOWNWARD 1
135 
136 #define NVPTX_RETURN_REGNUM 0
137 #define STACK_POINTER_REGNUM 1
138 #define FRAME_POINTER_REGNUM 2
139 #define ARG_POINTER_REGNUM 3
140 #define STATIC_CHAIN_REGNUM 4
141 /* This register points to the shared memory location with the current warp's
142    soft stack pointer (__nvptx_stacks[tid.y]).  */
143 #define SOFTSTACK_SLOT_REGNUM 5
144 /* This register is used to save the previous value of the soft stack pointer
145    in the prologue and restore it when returning.  */
146 #define SOFTSTACK_PREV_REGNUM 6
147 
148 #define REGISTER_NAMES							\
149   {									\
150     "%value", "%stack", "%frame", "%args",                              \
151     "%chain", "%sspslot", "%sspprev", "%hr7",                           \
152     "%hr8", "%hr9", "%hr10", "%hr11", "%hr12", "%hr13", "%hr14", "%hr15" \
153   }
154 
155 #define FIRST_PARM_OFFSET(FNDECL) ((void)(FNDECL), 0)
156 #define PUSH_ARGS_REVERSED 1
157 #define ACCUMULATE_OUTGOING_ARGS 1
158 
159 /* Avoid using the argument pointer for frame-related things.  */
160 #define FRAME_POINTER_CFA_OFFSET(FNDECL) ((void)(FNDECL), 0)
161 
162 #ifdef HOST_WIDE_INT
163 struct nvptx_args {
164   tree fntype;
165   /* Number of arguments passed in registers so far.  */
166   int count;
167 };
168 #endif
169 
170 #define CUMULATIVE_ARGS struct nvptx_args
171 
172 #define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, FNDECL, N_NAMED_ARGS) \
173   ((CUM).fntype = (FNTYPE), (CUM).count = 0, (void)0)
174 
175 #define FUNCTION_ARG_REGNO_P(r) 0
176 
177 #define DEFAULT_PCC_STRUCT_RETURN 0
178 
179 #define FUNCTION_PROFILER(file, labelno) \
180   fatal_error (input_location, \
181 	       "profiling is not yet implemented for this architecture")
182 
183 #define TRAMPOLINE_SIZE 32
184 #define TRAMPOLINE_ALIGNMENT 256
185 
186 /* We don't run reload, so this isn't actually used, but it still needs to be
187    defined.  Showing an argp->fp elimination also stops
188    expand_builtin_setjmp_receiver from generating invalid insns.  */
189 #define ELIMINABLE_REGS					\
190   {							\
191     { ARG_POINTER_REGNUM, FRAME_POINTER_REGNUM}	\
192   }
193 
194 /* Define the offset between two registers, one to be eliminated, and the other
195    its replacement, at the start of a routine.  */
196 
197 #define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET) \
198   ((OFFSET) = 0)
199 
200 /* Addressing Modes.  */
201 
202 #define MAX_REGS_PER_ADDRESS 1
203 
204 #define LEGITIMATE_PIC_OPERAND_P(X) 1
205 
206 
207 #if defined HOST_WIDE_INT
208 struct GTY(()) machine_function
209 {
210   rtx_expr_list *call_args;  /* Arg list for the current call.  */
211   bool doing_call; /* Within a CALL_ARGS ... CALL_ARGS_END sequence.  */
212   bool is_varadic;  /* This call is varadic  */
213   bool has_varadic;  /* Current function has a varadic call.  */
214   bool has_chain; /* Current function has outgoing static chain.  */
215   bool has_softstack; /* Current function has a soft stack frame.  */
216   bool has_simtreg; /* Current function has an OpenMP SIMD region.  */
217   int num_args;	/* Number of args of current call.  */
218   int return_mode; /* Return mode of current fn.
219 		      (machine_mode not defined yet.) */
220   rtx axis_predicate[2]; /* Neutering predicates.  */
221   int axis_dim[2]; /* Maximum number of threads on each axis, dim[0] is
222 		      vector_length, dim[1] is num_workers.  */
223   bool axis_dim_init_p;
224   rtx bcast_partition; /* Register containing the size of each
225 			  vector's partition of share-memory used to
226 			  broadcast state.  */
227   rtx red_partition; /* Similar to bcast_partition, except for vector
228 			reductions.  */
229   rtx sync_bar; /* Synchronization barrier ID for vectors.  */
230   rtx unisimt_master; /* 'Master lane index' for -muniform-simt.  */
231   rtx unisimt_predicate; /* Predicate for -muniform-simt.  */
232   rtx unisimt_location; /* Mask location for -muniform-simt.  */
233   /* The following two fields hold the maximum size resp. alignment required
234      for per-lane storage in OpenMP SIMD regions.  */
235   unsigned HOST_WIDE_INT simt_stack_size;
236   unsigned HOST_WIDE_INT simt_stack_align;
237 };
238 #endif
239 
240 /* Costs.  */
241 
242 #define NO_FUNCTION_CSE 1
243 #define SLOW_BYTE_ACCESS 0
244 #define BRANCH_COST(speed_p, predictable_p) 6
245 
246 /* Assembler Format.  */
247 
248 #undef ASM_DECLARE_FUNCTION_NAME
249 #define ASM_DECLARE_FUNCTION_NAME(FILE, NAME, DECL)		\
250   nvptx_declare_function_name (FILE, NAME, DECL)
251 
252 #undef ASM_DECLARE_FUNCTION_SIZE
253 #define ASM_DECLARE_FUNCTION_SIZE(STREAM, NAME, DECL) \
254   nvptx_function_end (STREAM)
255 
256 #define DWARF2_ASM_LINE_DEBUG_INFO 1
257 
258 #undef ASM_APP_ON
259 #define ASM_APP_ON "\t// #APP \n"
260 #undef ASM_APP_OFF
261 #define ASM_APP_OFF "\t// #NO_APP \n"
262 
263 #define DBX_REGISTER_NUMBER(N) N
264 
265 #define TEXT_SECTION_ASM_OP ""
266 #define DATA_SECTION_ASM_OP ""
267 
268 #undef  ASM_GENERATE_INTERNAL_LABEL
269 #define ASM_GENERATE_INTERNAL_LABEL(LABEL, PREFIX, NUM)		\
270   do								\
271     {								\
272       char *__p;						\
273       __p = stpcpy (&(LABEL)[1], PREFIX);			\
274       (LABEL)[0] = '$';						\
275       sprint_ul (__p, (unsigned long) (NUM));			\
276     }								\
277   while (0)
278 
279 #define ASM_OUTPUT_ALIGN(FILE, POWER)		\
280   do						\
281     {						\
282       (void) (FILE);				\
283       (void) (POWER);				\
284     }						\
285   while (0)
286 
287 #define ASM_OUTPUT_SKIP(FILE, N)		\
288   nvptx_output_skip (FILE, N)
289 
290 #undef  ASM_OUTPUT_ASCII
291 #define ASM_OUTPUT_ASCII(FILE, STR, LENGTH)			\
292   nvptx_output_ascii (FILE, STR, LENGTH);
293 
294 #define ASM_DECLARE_OBJECT_NAME(FILE, NAME, DECL)	\
295   nvptx_declare_object_name (FILE, NAME, DECL)
296 
297 #undef  ASM_OUTPUT_ALIGNED_DECL_COMMON
298 #define ASM_OUTPUT_ALIGNED_DECL_COMMON(FILE, DECL, NAME, SIZE, ALIGN)	\
299   nvptx_output_aligned_decl (FILE, NAME, DECL, SIZE, ALIGN)
300 
301 #undef  ASM_OUTPUT_ALIGNED_DECL_LOCAL
302 #define ASM_OUTPUT_ALIGNED_DECL_LOCAL(FILE, DECL, NAME, SIZE, ALIGN)	\
303   nvptx_output_aligned_decl (FILE, NAME, DECL, SIZE, ALIGN)
304 
305 #define CASE_VECTOR_PC_RELATIVE flag_pic
306 #define JUMP_TABLES_IN_TEXT_SECTION flag_pic
307 
308 #define ADDR_VEC_ALIGN(VEC) (JUMP_TABLES_IN_TEXT_SECTION ? 5 : 2)
309 
310 /* Misc.  */
311 
312 #define DWARF2_LINENO_DEBUGGING_INFO 1
313 
314 #define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \
315   ((VALUE) = GET_MODE_BITSIZE ((MODE)), 2)
316 #define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \
317   ((VALUE) = GET_MODE_BITSIZE ((MODE)), 2)
318 
319 #define SUPPORTS_WEAK 1
320 #define NO_DOT_IN_LABEL
321 #define ASM_COMMENT_START "//"
322 
323 #define STORE_FLAG_VALUE -1
324 #define FLOAT_STORE_FLAG_VALUE(MODE) REAL_VALUE_ATOF("1.0", (MODE))
325 
326 #define CASE_VECTOR_MODE SImode
327 #define MOVE_MAX 8
328 #define MOVE_RATIO(SPEED) 4
329 #define FUNCTION_MODE QImode
330 #define HAS_INIT_SECTION 1
331 
332 /* The C++ front end insists to link against libstdc++ -- which we don't build.
333    Tell it to instead link against the innocuous libgcc.  */
334 #define LIBSTDCXX "gcc"
335 
336 #endif /* GCC_NVPTX_H */
337