1 /* Target Definitions for NVPTX.
2    Copyright (C) 2014-2022 Free Software Foundation, Inc.
3    Contributed by Bernd Schmidt <bernds@codesourcery.com>
4 
5    This file is part of GCC.
6 
7    GCC is free software; you can redistribute it and/or modify it
8    under the terms of the GNU General Public License as published
9    by the Free Software Foundation; either version 3, or (at your
10    option) any later version.
11 
12    GCC is distributed in the hope that it will be useful, but WITHOUT
13    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
15    License for more details.
16 
17    You should have received a copy of the GNU General Public License
18    along with GCC; see the file COPYING3.  If not see
19    <http://www.gnu.org/licenses/>.  */
20 
21 #ifndef GCC_NVPTX_H
22 #define GCC_NVPTX_H
23 
24 #ifndef NVPTX_OPTS_H
25 #include "config/nvptx/nvptx-opts.h"
26 #endif
27 
28 /* Run-time Target.  */
29 
30 #define STARTFILE_SPEC "%{mmainkernel:crt0.o%s}"
31 
32 #define TARGET_CPU_CPP_BUILTINS() nvptx_cpu_cpp_builtins ()
33 
34 /* Avoid the default in ../../gcc.cc, which adds "-pthread", which is not
35    supported for nvptx.  */
36 #define GOMP_SELF_SPECS ""
37 
38 /* Storage Layout.  */
39 
40 #define BITS_BIG_ENDIAN 0
41 #define BYTES_BIG_ENDIAN 0
42 #define WORDS_BIG_ENDIAN 0
43 
44 /* Chosen such that we won't have to deal with multi-word subregs.  */
45 #define UNITS_PER_WORD 8
46 
47 /* Alignments in bits.  */
48 #define PARM_BOUNDARY 32
49 #define STACK_BOUNDARY 128
50 #define FUNCTION_BOUNDARY 32
51 #define BIGGEST_ALIGNMENT 128
52 #define STRICT_ALIGNMENT 1
53 
54 #define MAX_STACK_ALIGNMENT (1024 * 8)
55 
56 #define DATA_ALIGNMENT nvptx_data_alignment
57 
58 /* Copied from elf.h and other places.  We'd otherwise use
59    BIGGEST_ALIGNMENT and fail a number of testcases.  */
60 #define MAX_OFILE_ALIGNMENT (32768 * 8)
61 
62 /* Type Layout.  */
63 
64 #define DEFAULT_SIGNED_CHAR 1
65 
66 #define SHORT_TYPE_SIZE 16
67 #define INT_TYPE_SIZE 32
68 #define LONG_TYPE_SIZE (TARGET_ABI64 ? 64 : 32)
69 #define LONG_LONG_TYPE_SIZE 64
70 #define FLOAT_TYPE_SIZE 32
71 #define DOUBLE_TYPE_SIZE 64
72 #define LONG_DOUBLE_TYPE_SIZE 64
73 #define TARGET_SUPPORTS_WIDE_INT 1
74 
75 #undef SIZE_TYPE
76 #define SIZE_TYPE (TARGET_ABI64 ? "long unsigned int" : "unsigned int")
77 #undef PTRDIFF_TYPE
78 #define PTRDIFF_TYPE (TARGET_ABI64 ? "long int" : "int")
79 
80 #define POINTER_SIZE (TARGET_ABI64 ? 64 : 32)
81 #define Pmode (TARGET_ABI64 ? DImode : SImode)
82 #define STACK_SIZE_MODE Pmode
83 
84 #include "nvptx-gen.h"
85 
86 #define TARGET_PTX_6_0 (ptx_version_option >= PTX_VERSION_6_0)
87 #define TARGET_PTX_6_3 (ptx_version_option >= PTX_VERSION_6_3)
88 #define TARGET_PTX_7_0 (ptx_version_option >= PTX_VERSION_7_0)
89 
90 /* Registers.  Since ptx is a virtual target, we just define a few
91    hard registers for special purposes and leave pseudos unallocated.
92    We have to have some available hard registers, to keep gcc setup
93    happy.  */
94 #define FIRST_PSEUDO_REGISTER 16
95 #define FIXED_REGISTERS	    { 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
96 #define CALL_USED_REGISTERS { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }
97 
98 /* Register Classes.  */
99 enum reg_class             {  NO_REGS,    ALL_REGS,	LIM_REG_CLASSES };
100 #define REG_CLASS_NAMES    { "NO_REGS",  "ALL_REGS" }
101 #define REG_CLASS_CONTENTS { { 0x0000 }, { 0xFFFF } }
102 #define N_REG_CLASSES (int) LIM_REG_CLASSES
103 
104 #define GENERAL_REGS ALL_REGS
105 #define REGNO_REG_CLASS(R) ((void)(R), ALL_REGS)
106 #define BASE_REG_CLASS ALL_REGS
107 #define INDEX_REG_CLASS NO_REGS
108 
109 #define REGNO_OK_FOR_BASE_P(X) true
110 #define REGNO_OK_FOR_INDEX_P(X) false
111 
112 #define CLASS_MAX_NREGS(class, mode) \
113   ((GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
114 
115 #define PROMOTE_MODE(MODE, UNSIGNEDP, TYPE)		\
116   if ((MODE) == QImode || (MODE) == HImode)		\
117     {							\
118       (MODE) = SImode;					\
119       (void)(UNSIGNEDP);				\
120       (void)(TYPE);					\
121     }
122 
123 /* Stack and Calling.  */
124 
125 #define FRAME_GROWS_DOWNWARD 0
126 #define STACK_GROWS_DOWNWARD 1
127 
128 #define NVPTX_RETURN_REGNUM 0
129 #define STACK_POINTER_REGNUM 1
130 #define FRAME_POINTER_REGNUM 2
131 #define ARG_POINTER_REGNUM 3
132 #define STATIC_CHAIN_REGNUM 4
133 /* This register points to the shared memory location with the current warp's
134    soft stack pointer (__nvptx_stacks[tid.y]).  */
135 #define SOFTSTACK_SLOT_REGNUM 5
136 /* This register is used to save the previous value of the soft stack pointer
137    in the prologue and restore it when returning.  */
138 #define SOFTSTACK_PREV_REGNUM 6
139 
140 #define REGISTER_NAMES							\
141   {									\
142     "%value", "%stack", "%frame", "%args",                              \
143     "%chain", "%sspslot", "%sspprev", "%hr7",                           \
144     "%hr8", "%hr9", "%hr10", "%hr11", "%hr12", "%hr13", "%hr14", "%hr15" \
145   }
146 
147 #define FIRST_PARM_OFFSET(FNDECL) ((void)(FNDECL), 0)
148 #define PUSH_ARGS_REVERSED 1
149 #define ACCUMULATE_OUTGOING_ARGS 1
150 
151 /* Avoid using the argument pointer for frame-related things.  */
152 #define FRAME_POINTER_CFA_OFFSET(FNDECL) ((void)(FNDECL), 0)
153 
154 #ifdef HOST_WIDE_INT
155 struct nvptx_args {
156   tree fntype;
157   /* Number of arguments passed in registers so far.  */
158   int count;
159 };
160 #endif
161 
162 #define CUMULATIVE_ARGS struct nvptx_args
163 
164 #define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, FNDECL, N_NAMED_ARGS) \
165   ((CUM).fntype = (FNTYPE), (CUM).count = 0, (void)0)
166 
167 #define FUNCTION_ARG_REGNO_P(r) 0
168 
169 #define DEFAULT_PCC_STRUCT_RETURN 0
170 
171 #define FUNCTION_PROFILER(file, labelno) \
172   fatal_error (input_location, \
173 	       "profiling is not yet implemented for this architecture")
174 
175 #define TRAMPOLINE_SIZE 32
176 #define TRAMPOLINE_ALIGNMENT 256
177 
178 /* We don't run reload, so this isn't actually used, but it still needs to be
179    defined.  Showing an argp->fp elimination also stops
180    expand_builtin_setjmp_receiver from generating invalid insns.  */
181 #define ELIMINABLE_REGS					\
182   {							\
183     { ARG_POINTER_REGNUM, FRAME_POINTER_REGNUM}	\
184   }
185 
186 /* Define the offset between two registers, one to be eliminated, and the other
187    its replacement, at the start of a routine.  */
188 
189 #define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET) \
190   ((OFFSET) = 0)
191 
192 /* Addressing Modes.  */
193 
194 #define MAX_REGS_PER_ADDRESS 1
195 
196 #define LEGITIMATE_PIC_OPERAND_P(X) 1
197 
198 
199 #if defined HOST_WIDE_INT
200 struct GTY(()) machine_function
201 {
202   rtx_expr_list *call_args;  /* Arg list for the current call.  */
203   bool doing_call; /* Within a CALL_ARGS ... CALL_ARGS_END sequence.  */
204   bool is_varadic;  /* This call is varadic  */
205   bool has_varadic;  /* Current function has a varadic call.  */
206   bool has_chain; /* Current function has outgoing static chain.  */
207   bool has_softstack; /* Current function has a soft stack frame.  */
208   bool has_simtreg; /* Current function has an OpenMP SIMD region.  */
209   int num_args;	/* Number of args of current call.  */
210   int return_mode; /* Return mode of current fn.
211 		      (machine_mode not defined yet.) */
212   rtx axis_predicate[2]; /* Neutering predicates.  */
213   int axis_dim[2]; /* Maximum number of threads on each axis, dim[0] is
214 		      vector_length, dim[1] is num_workers.  */
215   bool axis_dim_init_p;
216   rtx bcast_partition; /* Register containing the size of each
217 			  vector's partition of share-memory used to
218 			  broadcast state.  */
219   rtx red_partition; /* Similar to bcast_partition, except for vector
220 			reductions.  */
221   rtx sync_bar; /* Synchronization barrier ID for vectors.  */
222   rtx unisimt_master; /* 'Master lane index' for -muniform-simt.  */
223   rtx unisimt_predicate; /* Predicate for -muniform-simt.  */
224   rtx unisimt_outside_simt_predicate; /* Predicate for -muniform-simt.  */
225   rtx unisimt_location; /* Mask location for -muniform-simt.  */
226   /* The following two fields hold the maximum size resp. alignment required
227      for per-lane storage in OpenMP SIMD regions.  */
228   unsigned HOST_WIDE_INT simt_stack_size;
229   unsigned HOST_WIDE_INT simt_stack_align;
230 };
231 #endif
232 
233 /* Costs.  */
234 
235 #define NO_FUNCTION_CSE 1
236 #define SLOW_BYTE_ACCESS 0
237 #define BRANCH_COST(speed_p, predictable_p) 6
238 
239 /* Assembler Format.  */
240 
241 #undef ASM_DECLARE_FUNCTION_NAME
242 #define ASM_DECLARE_FUNCTION_NAME(FILE, NAME, DECL)		\
243   nvptx_declare_function_name (FILE, NAME, DECL)
244 
245 #undef ASM_DECLARE_FUNCTION_SIZE
246 #define ASM_DECLARE_FUNCTION_SIZE(STREAM, NAME, DECL) \
247   nvptx_function_end (STREAM)
248 
249 #define DWARF2_ASM_LINE_DEBUG_INFO 1
250 
251 #undef ASM_APP_ON
252 #define ASM_APP_ON "\t// #APP \n"
253 #undef ASM_APP_OFF
254 #define ASM_APP_OFF "\t// #NO_APP \n"
255 
256 #define DBX_REGISTER_NUMBER(N) N
257 
258 #define TEXT_SECTION_ASM_OP ""
259 #define DATA_SECTION_ASM_OP ""
260 
261 #undef  ASM_GENERATE_INTERNAL_LABEL
262 #define ASM_GENERATE_INTERNAL_LABEL(LABEL, PREFIX, NUM)		\
263   do								\
264     {								\
265       char *__p;						\
266       __p = stpcpy (&(LABEL)[1], PREFIX);			\
267       (LABEL)[0] = '$';						\
268       sprint_ul (__p, (unsigned long) (NUM));			\
269     }								\
270   while (0)
271 
272 #define ASM_OUTPUT_ALIGN(FILE, POWER)		\
273   do						\
274     {						\
275       (void) (FILE);				\
276       (void) (POWER);				\
277     }						\
278   while (0)
279 
280 #define ASM_OUTPUT_SKIP(FILE, N)		\
281   nvptx_output_skip (FILE, N)
282 
283 #undef  ASM_OUTPUT_ASCII
284 #define ASM_OUTPUT_ASCII(FILE, STR, LENGTH)			\
285   nvptx_output_ascii (FILE, STR, LENGTH);
286 
287 #define ASM_DECLARE_OBJECT_NAME(FILE, NAME, DECL)	\
288   nvptx_declare_object_name (FILE, NAME, DECL)
289 
290 #undef  ASM_OUTPUT_ALIGNED_DECL_COMMON
291 #define ASM_OUTPUT_ALIGNED_DECL_COMMON(FILE, DECL, NAME, SIZE, ALIGN)	\
292   nvptx_output_aligned_decl (FILE, NAME, DECL, SIZE, ALIGN)
293 
294 #undef  ASM_OUTPUT_ALIGNED_DECL_LOCAL
295 #define ASM_OUTPUT_ALIGNED_DECL_LOCAL(FILE, DECL, NAME, SIZE, ALIGN)	\
296   nvptx_output_aligned_decl (FILE, NAME, DECL, SIZE, ALIGN)
297 
298 #define CASE_VECTOR_PC_RELATIVE flag_pic
299 #define JUMP_TABLES_IN_TEXT_SECTION flag_pic
300 
301 #define ADDR_VEC_ALIGN(VEC) (JUMP_TABLES_IN_TEXT_SECTION ? 5 : 2)
302 
303 /* Misc.  */
304 
305 #define DWARF2_LINENO_DEBUGGING_INFO 1
306 
307 #define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \
308   ((VALUE) = GET_MODE_BITSIZE ((MODE)), 2)
309 #define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \
310   ((VALUE) = GET_MODE_BITSIZE ((MODE)), 2)
311 
312 #define SUPPORTS_WEAK 1
313 
314 /* The documentation states that ASM_OUTPUT_DEF_FROM_DECLS is used in
315    preference to ASM_OUTPUT_DEF if the tree nodes are available.  However, we
316    need the tree nodes to emit the prototype, so at this point it's not clear
317    how we can support ASM_OUTPUT_DEF.  Still, we need to define it, or
318    ASM_OUTPUT_DEF_FROM_DECLS is ignored.  For now, assert, and once we run
319    into it possibly improve by somehow emitting the prototype elsewhere, or
320    emitting a reasonable error message.  */
321 #define ASM_OUTPUT_DEF(FILE,LABEL1,LABEL2)	\
322   do						\
323     {						\
324       gcc_unreachable ();			\
325     }						\
326   while (0)
327 #define ASM_OUTPUT_DEF_FROM_DECLS(STREAM, NAME, VALUE)	\
328   nvptx_asm_output_def_from_decls (STREAM, NAME, VALUE)
329 
330 #define NO_DOT_IN_LABEL
331 #define ASM_COMMENT_START "//"
332 
333 #define STORE_FLAG_VALUE 1
334 #define FLOAT_STORE_FLAG_VALUE(MODE) REAL_VALUE_ATOF("1.0", (MODE))
335 
336 #define CASE_VECTOR_MODE SImode
337 #define MOVE_MAX 8
338 #define MOVE_RATIO(SPEED) 4
339 #define FUNCTION_MODE QImode
340 #define HAS_INIT_SECTION 1
341 
342 /* The C++ front end insists to link against libstdc++ -- which we don't build.
343    Tell it to instead link against the innocuous libgcc.  */
344 #define LIBSTDCXX "gcc"
345 
346 #endif /* GCC_NVPTX_H */
347