1 /* Target Definitions for NVPTX. 2 Copyright (C) 2014-2019 Free Software Foundation, Inc. 3 Contributed by Bernd Schmidt <bernds@codesourcery.com> 4 5 This file is part of GCC. 6 7 GCC is free software; you can redistribute it and/or modify it 8 under the terms of the GNU General Public License as published 9 by the Free Software Foundation; either version 3, or (at your 10 option) any later version. 11 12 GCC is distributed in the hope that it will be useful, but WITHOUT 13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public 15 License for more details. 16 17 You should have received a copy of the GNU General Public License 18 along with GCC; see the file COPYING3. If not see 19 <http://www.gnu.org/licenses/>. */ 20 21 #ifndef GCC_NVPTX_H 22 #define GCC_NVPTX_H 23 24 #ifndef NVPTX_OPTS_H 25 #include "config/nvptx/nvptx-opts.h" 26 #endif 27 28 /* Run-time Target. */ 29 30 #define STARTFILE_SPEC "%{mmainkernel:crt0.o}" 31 32 #define ASM_SPEC "%{misa=*:-m %*}" 33 34 #define TARGET_CPU_CPP_BUILTINS() \ 35 do \ 36 { \ 37 builtin_assert ("machine=nvptx"); \ 38 builtin_assert ("cpu=nvptx"); \ 39 builtin_define ("__nvptx__"); \ 40 if (TARGET_SOFT_STACK) \ 41 builtin_define ("__nvptx_softstack__"); \ 42 if (TARGET_UNIFORM_SIMT) \ 43 builtin_define ("__nvptx_unisimt__"); \ 44 } while (0) 45 46 /* Avoid the default in ../../gcc.c, which adds "-pthread", which is not 47 supported for nvptx. */ 48 #define GOMP_SELF_SPECS "" 49 50 /* Storage Layout. */ 51 52 #define BITS_BIG_ENDIAN 0 53 #define BYTES_BIG_ENDIAN 0 54 #define WORDS_BIG_ENDIAN 0 55 56 /* Chosen such that we won't have to deal with multi-word subregs. */ 57 #define UNITS_PER_WORD 8 58 59 /* Alignments in bits. */ 60 #define PARM_BOUNDARY 32 61 #define STACK_BOUNDARY 128 62 #define FUNCTION_BOUNDARY 32 63 #define BIGGEST_ALIGNMENT 128 64 #define STRICT_ALIGNMENT 1 65 66 #define MAX_STACK_ALIGNMENT (1024 * 8) 67 68 #define DATA_ALIGNMENT nvptx_data_alignment 69 70 /* Copied from elf.h and other places. We'd otherwise use 71 BIGGEST_ALIGNMENT and fail a number of testcases. */ 72 #define MAX_OFILE_ALIGNMENT (32768 * 8) 73 74 /* Type Layout. */ 75 76 #define DEFAULT_SIGNED_CHAR 1 77 78 #define SHORT_TYPE_SIZE 16 79 #define INT_TYPE_SIZE 32 80 #define LONG_TYPE_SIZE (TARGET_ABI64 ? 64 : 32) 81 #define LONG_LONG_TYPE_SIZE 64 82 #define FLOAT_TYPE_SIZE 32 83 #define DOUBLE_TYPE_SIZE 64 84 #define LONG_DOUBLE_TYPE_SIZE 64 85 #define TARGET_SUPPORTS_WIDE_INT 1 86 87 #undef SIZE_TYPE 88 #define SIZE_TYPE (TARGET_ABI64 ? "long unsigned int" : "unsigned int") 89 #undef PTRDIFF_TYPE 90 #define PTRDIFF_TYPE (TARGET_ABI64 ? "long int" : "int") 91 92 #define POINTER_SIZE (TARGET_ABI64 ? 64 : 32) 93 #define Pmode (TARGET_ABI64 ? DImode : SImode) 94 #define STACK_SIZE_MODE Pmode 95 96 #define TARGET_SM35 (ptx_isa_option >= PTX_ISA_SM35) 97 98 /* Registers. Since ptx is a virtual target, we just define a few 99 hard registers for special purposes and leave pseudos unallocated. 100 We have to have some available hard registers, to keep gcc setup 101 happy. */ 102 #define FIRST_PSEUDO_REGISTER 16 103 #define FIXED_REGISTERS { 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0 } 104 #define CALL_USED_REGISTERS { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 } 105 106 /* Register Classes. */ 107 enum reg_class { NO_REGS, ALL_REGS, LIM_REG_CLASSES }; 108 #define REG_CLASS_NAMES { "NO_REGS", "ALL_REGS" } 109 #define REG_CLASS_CONTENTS { { 0x0000 }, { 0xFFFF } } 110 #define N_REG_CLASSES (int) LIM_REG_CLASSES 111 112 #define GENERAL_REGS ALL_REGS 113 #define REGNO_REG_CLASS(R) ((void)(R), ALL_REGS) 114 #define BASE_REG_CLASS ALL_REGS 115 #define INDEX_REG_CLASS NO_REGS 116 117 #define REGNO_OK_FOR_BASE_P(X) true 118 #define REGNO_OK_FOR_INDEX_P(X) false 119 120 #define CLASS_MAX_NREGS(class, mode) \ 121 ((GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD) 122 123 #define PROMOTE_MODE(MODE, UNSIGNEDP, TYPE) \ 124 if ((MODE) == QImode || (MODE) == HImode) \ 125 { \ 126 (MODE) = SImode; \ 127 (void)(UNSIGNEDP); \ 128 (void)(TYPE); \ 129 } 130 131 /* Stack and Calling. */ 132 133 #define FRAME_GROWS_DOWNWARD 0 134 #define STACK_GROWS_DOWNWARD 1 135 136 #define NVPTX_RETURN_REGNUM 0 137 #define STACK_POINTER_REGNUM 1 138 #define FRAME_POINTER_REGNUM 2 139 #define ARG_POINTER_REGNUM 3 140 #define STATIC_CHAIN_REGNUM 4 141 /* This register points to the shared memory location with the current warp's 142 soft stack pointer (__nvptx_stacks[tid.y]). */ 143 #define SOFTSTACK_SLOT_REGNUM 5 144 /* This register is used to save the previous value of the soft stack pointer 145 in the prologue and restore it when returning. */ 146 #define SOFTSTACK_PREV_REGNUM 6 147 148 #define REGISTER_NAMES \ 149 { \ 150 "%value", "%stack", "%frame", "%args", \ 151 "%chain", "%sspslot", "%sspprev", "%hr7", \ 152 "%hr8", "%hr9", "%hr10", "%hr11", "%hr12", "%hr13", "%hr14", "%hr15" \ 153 } 154 155 #define FIRST_PARM_OFFSET(FNDECL) ((void)(FNDECL), 0) 156 #define PUSH_ARGS_REVERSED 1 157 #define ACCUMULATE_OUTGOING_ARGS 1 158 159 /* Avoid using the argument pointer for frame-related things. */ 160 #define FRAME_POINTER_CFA_OFFSET(FNDECL) ((void)(FNDECL), 0) 161 162 #ifdef HOST_WIDE_INT 163 struct nvptx_args { 164 tree fntype; 165 /* Number of arguments passed in registers so far. */ 166 int count; 167 }; 168 #endif 169 170 #define CUMULATIVE_ARGS struct nvptx_args 171 172 #define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, FNDECL, N_NAMED_ARGS) \ 173 ((CUM).fntype = (FNTYPE), (CUM).count = 0, (void)0) 174 175 #define FUNCTION_ARG_REGNO_P(r) 0 176 177 #define DEFAULT_PCC_STRUCT_RETURN 0 178 179 #define FUNCTION_PROFILER(file, labelno) \ 180 fatal_error (input_location, \ 181 "profiling is not yet implemented for this architecture") 182 183 #define TRAMPOLINE_SIZE 32 184 #define TRAMPOLINE_ALIGNMENT 256 185 186 /* We don't run reload, so this isn't actually used, but it still needs to be 187 defined. Showing an argp->fp elimination also stops 188 expand_builtin_setjmp_receiver from generating invalid insns. */ 189 #define ELIMINABLE_REGS \ 190 { \ 191 { ARG_POINTER_REGNUM, FRAME_POINTER_REGNUM} \ 192 } 193 194 /* Define the offset between two registers, one to be eliminated, and the other 195 its replacement, at the start of a routine. */ 196 197 #define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET) \ 198 ((OFFSET) = 0) 199 200 /* Addressing Modes. */ 201 202 #define MAX_REGS_PER_ADDRESS 1 203 204 #define LEGITIMATE_PIC_OPERAND_P(X) 1 205 206 207 #if defined HOST_WIDE_INT 208 struct GTY(()) machine_function 209 { 210 rtx_expr_list *call_args; /* Arg list for the current call. */ 211 bool doing_call; /* Within a CALL_ARGS ... CALL_ARGS_END sequence. */ 212 bool is_varadic; /* This call is varadic */ 213 bool has_varadic; /* Current function has a varadic call. */ 214 bool has_chain; /* Current function has outgoing static chain. */ 215 bool has_softstack; /* Current function has a soft stack frame. */ 216 bool has_simtreg; /* Current function has an OpenMP SIMD region. */ 217 int num_args; /* Number of args of current call. */ 218 int return_mode; /* Return mode of current fn. 219 (machine_mode not defined yet.) */ 220 rtx axis_predicate[2]; /* Neutering predicates. */ 221 int axis_dim[2]; /* Maximum number of threads on each axis, dim[0] is 222 vector_length, dim[1] is num_workers. */ 223 bool axis_dim_init_p; 224 rtx bcast_partition; /* Register containing the size of each 225 vector's partition of share-memory used to 226 broadcast state. */ 227 rtx red_partition; /* Similar to bcast_partition, except for vector 228 reductions. */ 229 rtx sync_bar; /* Synchronization barrier ID for vectors. */ 230 rtx unisimt_master; /* 'Master lane index' for -muniform-simt. */ 231 rtx unisimt_predicate; /* Predicate for -muniform-simt. */ 232 rtx unisimt_location; /* Mask location for -muniform-simt. */ 233 /* The following two fields hold the maximum size resp. alignment required 234 for per-lane storage in OpenMP SIMD regions. */ 235 unsigned HOST_WIDE_INT simt_stack_size; 236 unsigned HOST_WIDE_INT simt_stack_align; 237 }; 238 #endif 239 240 /* Costs. */ 241 242 #define NO_FUNCTION_CSE 1 243 #define SLOW_BYTE_ACCESS 0 244 #define BRANCH_COST(speed_p, predictable_p) 6 245 246 /* Assembler Format. */ 247 248 #undef ASM_DECLARE_FUNCTION_NAME 249 #define ASM_DECLARE_FUNCTION_NAME(FILE, NAME, DECL) \ 250 nvptx_declare_function_name (FILE, NAME, DECL) 251 252 #undef ASM_DECLARE_FUNCTION_SIZE 253 #define ASM_DECLARE_FUNCTION_SIZE(STREAM, NAME, DECL) \ 254 nvptx_function_end (STREAM) 255 256 #define DWARF2_ASM_LINE_DEBUG_INFO 1 257 258 #undef ASM_APP_ON 259 #define ASM_APP_ON "\t// #APP \n" 260 #undef ASM_APP_OFF 261 #define ASM_APP_OFF "\t// #NO_APP \n" 262 263 #define DBX_REGISTER_NUMBER(N) N 264 265 #define TEXT_SECTION_ASM_OP "" 266 #define DATA_SECTION_ASM_OP "" 267 268 #undef ASM_GENERATE_INTERNAL_LABEL 269 #define ASM_GENERATE_INTERNAL_LABEL(LABEL, PREFIX, NUM) \ 270 do \ 271 { \ 272 char *__p; \ 273 __p = stpcpy (&(LABEL)[1], PREFIX); \ 274 (LABEL)[0] = '$'; \ 275 sprint_ul (__p, (unsigned long) (NUM)); \ 276 } \ 277 while (0) 278 279 #define ASM_OUTPUT_ALIGN(FILE, POWER) \ 280 do \ 281 { \ 282 (void) (FILE); \ 283 (void) (POWER); \ 284 } \ 285 while (0) 286 287 #define ASM_OUTPUT_SKIP(FILE, N) \ 288 nvptx_output_skip (FILE, N) 289 290 #undef ASM_OUTPUT_ASCII 291 #define ASM_OUTPUT_ASCII(FILE, STR, LENGTH) \ 292 nvptx_output_ascii (FILE, STR, LENGTH); 293 294 #define ASM_DECLARE_OBJECT_NAME(FILE, NAME, DECL) \ 295 nvptx_declare_object_name (FILE, NAME, DECL) 296 297 #undef ASM_OUTPUT_ALIGNED_DECL_COMMON 298 #define ASM_OUTPUT_ALIGNED_DECL_COMMON(FILE, DECL, NAME, SIZE, ALIGN) \ 299 nvptx_output_aligned_decl (FILE, NAME, DECL, SIZE, ALIGN) 300 301 #undef ASM_OUTPUT_ALIGNED_DECL_LOCAL 302 #define ASM_OUTPUT_ALIGNED_DECL_LOCAL(FILE, DECL, NAME, SIZE, ALIGN) \ 303 nvptx_output_aligned_decl (FILE, NAME, DECL, SIZE, ALIGN) 304 305 #define CASE_VECTOR_PC_RELATIVE flag_pic 306 #define JUMP_TABLES_IN_TEXT_SECTION flag_pic 307 308 #define ADDR_VEC_ALIGN(VEC) (JUMP_TABLES_IN_TEXT_SECTION ? 5 : 2) 309 310 /* Misc. */ 311 312 #define DWARF2_LINENO_DEBUGGING_INFO 1 313 314 #define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \ 315 ((VALUE) = GET_MODE_BITSIZE ((MODE)), 2) 316 #define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \ 317 ((VALUE) = GET_MODE_BITSIZE ((MODE)), 2) 318 319 #define SUPPORTS_WEAK 1 320 #define NO_DOT_IN_LABEL 321 #define ASM_COMMENT_START "//" 322 323 #define STORE_FLAG_VALUE -1 324 #define FLOAT_STORE_FLAG_VALUE(MODE) REAL_VALUE_ATOF("1.0", (MODE)) 325 326 #define CASE_VECTOR_MODE SImode 327 #define MOVE_MAX 8 328 #define MOVE_RATIO(SPEED) 4 329 #define FUNCTION_MODE QImode 330 #define HAS_INIT_SECTION 1 331 332 /* The C++ front end insists to link against libstdc++ -- which we don't build. 333 Tell it to instead link against the innocuous libgcc. */ 334 #define LIBSTDCXX "gcc" 335 336 #endif /* GCC_NVPTX_H */ 337