1 /* 2 Copyright (c) 2010-2021, Intel Corporation 3 All rights reserved. 4 5 Redistribution and use in source and binary forms, with or without 6 modification, are permitted provided that the following conditions are 7 met: 8 9 * Redistributions of source code must retain the above copyright 10 notice, this list of conditions and the following disclaimer. 11 12 * Redistributions in binary form must reproduce the above copyright 13 notice, this list of conditions and the following disclaimer in the 14 documentation and/or other materials provided with the distribution. 15 16 * Neither the name of Intel Corporation nor the names of its 17 contributors may be used to endorse or promote products derived from 18 this software without specific prior written permission. 19 20 21 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS 22 IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 23 TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 24 PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER 25 OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 26 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 27 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 28 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 29 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 30 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 31 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 /** @file ispc.h 35 @brief Main ispc.header file. Defines Target, Globals and Opt classes. 36 */ 37 38 #pragma once 39 40 #include "ispc_version.h" 41 #include "target_enums.h" 42 #include "target_registry.h" 43 44 #if ISPC_LLVM_VERSION < OLDEST_SUPPORTED_LLVM || ISPC_LLVM_VERSION > LATEST_SUPPORTED_LLVM 45 #error "Only LLVM 8.0 - 12.0 and 13.0 development branch are supported" 46 #endif 47 48 #if defined(_WIN32) || defined(_WIN64) 49 #define ISPC_HOST_IS_WINDOWS 50 #elif defined(__linux__) 51 #define ISPC_HOST_IS_LINUX 52 #elif defined(__FreeBSD__) 53 #define ISPC_HOST_IS_FREEBSD 54 #elif defined(__DragonFly__) 55 #define ISPC_HOST_IS_DRAGONFLY 56 #elif defined(__APPLE__) 57 #define ISPC_HOST_IS_APPLE 58 #endif 59 60 #include <map> 61 #include <set> 62 #include <stdint.h> 63 #include <stdio.h> 64 #include <stdlib.h> 65 #include <string> 66 #include <vector> 67 68 #include <llvm/ADT/StringRef.h> 69 70 /** @def ISPC_MAX_NVEC maximum vector size of any of the compliation 71 targets. 72 */ 73 #define ISPC_MAX_NVEC 64 74 75 // Number of final optimization phase 76 #define LAST_OPT_NUMBER 1000 77 78 // Forward declarations of a number of widely-used LLVM types 79 namespace llvm { 80 81 class AttrBuilder; 82 class BasicBlock; 83 class Constant; 84 class ConstantValue; 85 class DataLayout; 86 class DIBuilder; 87 class Function; 88 class FunctionType; 89 class LLVMContext; 90 class Module; 91 class Target; 92 class TargetMachine; 93 class Type; 94 class Value; 95 class DIFile; 96 class DINamespace; 97 class DIType; 98 99 class DIScope; 100 } // namespace llvm 101 102 namespace ispc { 103 104 class ArrayType; 105 class AST; 106 class ASTNode; 107 class AtomicType; 108 class FunctionEmitContext; 109 class Expr; 110 class ExprList; 111 class Function; 112 class FunctionType; 113 class Module; 114 class PointerType; 115 class Stmt; 116 class Symbol; 117 class SymbolTable; 118 class Type; 119 struct VariableDeclaration; 120 121 enum StorageClass { SC_NONE, SC_EXTERN, SC_STATIC, SC_TYPEDEF, SC_EXTERN_C }; 122 123 /** @brief Representation of a range of positions in a source file. 124 125 This class represents a range of characters in a source file 126 (e.g. those that span a token's definition), from starting line and 127 column to ending line and column. (These values are tracked by the 128 lexing code). Both lines and columns are counted starting from one. 129 */ 130 struct SourcePos { 131 SourcePos(const char *n = NULL, int fl = 0, int fc = 0, int ll = 0, int lc = 0); 132 133 const char *name; 134 int first_line; 135 int first_column; 136 int last_line; 137 int last_column; 138 139 /** Prints the filename and line/column range to standard output. */ 140 void Print() const; 141 142 /** Returns a LLVM DIFile object that represents the SourcePos's file */ 143 llvm::DIFile *GetDIFile() const; 144 145 /** Returns a LLVM DINamespace object that represents 'ispc' namespace. */ 146 llvm::DINamespace *GetDINamespace() const; 147 148 bool operator==(const SourcePos &p2) const; 149 }; 150 151 /** Returns a SourcePos that encompasses the extent of both of the given 152 extents. */ 153 SourcePos Union(const SourcePos &p1, const SourcePos &p2); 154 155 /** @brief Structure that defines a compilation target 156 157 This structure defines a compilation target for the ispc compiler. 158 */ 159 class Target { 160 public: 161 /** Enumerator giving the instruction sets that the compiler can 162 target. These should be ordered from "worse" to "better" in that 163 if a processor supports multiple target ISAs, then the most 164 flexible/performant of them will apear last in the enumerant. Note 165 also that __best_available_isa() needs to be updated if ISAs are 166 added or the enumerant values are reordered. */ 167 enum ISA { 168 SSE2 = 0, 169 SSE4 = 1, 170 AVX = 2, 171 // Not supported anymore. Use either AVX or AVX2. 172 // AVX11 = 3, 173 AVX2 = 3, 174 KNL_AVX512 = 4, 175 SKX_AVX512 = 5, 176 #ifdef ISPC_ARM_ENABLED 177 NEON, 178 #endif 179 #ifdef ISPC_WASM_ENABLED 180 WASM, 181 #endif 182 #ifdef ISPC_GENX_ENABLED 183 GENX, 184 #endif 185 NUM_ISAS 186 }; 187 188 #ifdef ISPC_GENX_ENABLED 189 enum GENX_PLATFORM { 190 GENX_GEN9, 191 GENX_TGLLP, 192 }; 193 #endif 194 195 /** Initializes the given Target pointer for a target of the given 196 name, if the name is a known target. Returns true if the 197 target was initialized and false if the name is unknown. */ 198 Target(Arch arch, const char *cpu, ISPCTarget isa, bool pic, bool printTarget); 199 200 /** Check if LLVM intrinsic is supported for the current target. */ 201 bool checkIntrinsticSupport(llvm::StringRef name, SourcePos pos); 202 203 /** Returns a comma-delimited string giving the names of the currently 204 supported CPUs. */ 205 static std::string SupportedCPUs(); 206 207 /** Returns a triple string specifying the target architecture, vendor, 208 and environment. */ 209 std::string GetTripleString() const; 210 211 /** Returns the LLVM TargetMachine object corresponding to this 212 target. */ GetTargetMachine()213 llvm::TargetMachine *GetTargetMachine() const { return m_targetMachine; } 214 215 /** Convert ISA enum to string */ 216 static const char *ISAToString(Target::ISA isa); 217 218 /** Returns a string like "avx" encoding the target. Good for mangling. */ 219 const char *GetISAString() const; 220 221 /** Convert ISA enum to string */ 222 static const char *ISAToTargetString(Target::ISA isa); 223 224 /** Returns a string like "avx2-i32x8" encoding the target. 225 This may be used for Target initialization. */ 226 const char *GetISATargetString() const; 227 228 /** Returns the size of the given type */ 229 llvm::Value *SizeOf(llvm::Type *type, llvm::BasicBlock *insertAtEnd); 230 231 /** Given a structure type and an element number in the structure, 232 returns a value corresponding to the number of bytes from the start 233 of the structure where the element is located. */ 234 llvm::Value *StructOffset(llvm::Type *type, int element, llvm::BasicBlock *insertAtEnd); 235 236 /** Mark LLVM function with target specific attribute, if required. */ 237 void markFuncWithTargetAttr(llvm::Function *func); 238 239 /** Set LLVM function with Calling Convention. */ 240 void markFuncWithCallingConv(llvm::Function *func); 241 getTarget()242 const llvm::Target *getTarget() const { return m_target; } 243 244 // Note the same name of method for 3.1 and 3.2+, this allows 245 // to reduce number ifdefs on client side. getDataLayout()246 const llvm::DataLayout *getDataLayout() const { return m_dataLayout; } 247 248 /** Reports if Target object has valid state. */ isValid()249 bool isValid() const { return m_valid; } 250 getISPCTarget()251 ISPCTarget getISPCTarget() const { return m_ispc_target; } 252 getISA()253 ISA getISA() const { return m_isa; } 254 isGenXTarget()255 bool isGenXTarget() { 256 #ifdef ISPC_GENX_ENABLED 257 return m_isa == Target::GENX; 258 #else 259 return false; 260 #endif 261 } 262 263 #ifdef ISPC_GENX_ENABLED 264 GENX_PLATFORM getGenxPlatform() const; 265 uint32_t getGenxGrfSize() const; 266 bool hasGenxPrefetch() const; 267 #endif 268 getArch()269 Arch getArch() const { return m_arch; } 270 is32Bit()271 bool is32Bit() const { return m_is32Bit; } 272 getCPU()273 std::string getCPU() const { return m_cpu; } 274 getNativeVectorWidth()275 int getNativeVectorWidth() const { return m_nativeVectorWidth; } 276 getNativeVectorAlignment()277 int getNativeVectorAlignment() const { return m_nativeVectorAlignment; } 278 getDataTypeWidth()279 int getDataTypeWidth() const { return m_dataTypeWidth; } 280 getVectorWidth()281 int getVectorWidth() const { return m_vectorWidth; } 282 getGeneratePIC()283 bool getGeneratePIC() const { return m_generatePIC; } 284 getMaskingIsFree()285 bool getMaskingIsFree() const { return m_maskingIsFree; } 286 getMaskBitCount()287 int getMaskBitCount() const { return m_maskBitCount; } 288 hasHalf()289 bool hasHalf() const { return m_hasHalf; } 290 hasRand()291 bool hasRand() const { return m_hasRand; } 292 hasGather()293 bool hasGather() const { return m_hasGather; } 294 hasScatter()295 bool hasScatter() const { return m_hasScatter; } 296 hasTranscendentals()297 bool hasTranscendentals() const { return m_hasTranscendentals; } 298 hasTrigonometry()299 bool hasTrigonometry() const { return m_hasTrigonometry; } 300 hasRsqrtd()301 bool hasRsqrtd() const { return m_hasRsqrtd; } 302 hasRcpd()303 bool hasRcpd() const { return m_hasRcpd; } 304 hasVecPrefetch()305 bool hasVecPrefetch() const { return m_hasVecPrefetch; } 306 hasSatArith()307 bool hasSatArith() const { return m_hasSaturatingArithmetic; } 308 hasFp64Support()309 bool hasFp64Support() const { return m_hasFp64Support; } 310 warnFtoU32IsExpensive()311 bool warnFtoU32IsExpensive() const { return m_warnFtoU32IsExpensive; } 312 313 private: 314 /** llvm Target object representing this target. */ 315 const llvm::Target *m_target; 316 317 /** llvm TargetMachine. 318 Note that it's not destroyed during Target destruction, as 319 Module::CompileAndOutput() uses TargetMachines after Target is destroyed. 320 This needs to be changed. */ 321 llvm::TargetMachine *m_targetMachine; 322 llvm::DataLayout *m_dataLayout; 323 324 /** flag to report invalid state after construction 325 (due to bad parameters passed to constructor). */ 326 bool m_valid; 327 328 /** ISPC target being used */ 329 ISPCTarget m_ispc_target; 330 331 /** Instruction set being compiled to. */ 332 ISA m_isa; 333 334 /** Target system architecture. (e.g. "x86-64", "x86"). */ 335 Arch m_arch; 336 337 /** Is the target architecture 32 or 64 bit */ 338 bool m_is32Bit; 339 340 /** Target CPU. (e.g. "corei7", "corei7-avx", ..) */ 341 std::string m_cpu; 342 343 /** Target-specific attribute string to pass along to the LLVM backend */ 344 std::string m_attributes; 345 346 /** Target-specific function attributes */ 347 std::vector<std::pair<std::string, std::string>> m_funcAttributes; 348 349 /** Target-specific LLVM attribute, which has to be attached to every 350 function to ensure that it is generated for correct target architecture. 351 This is requirement was introduced in LLVM 3.3 */ 352 llvm::AttrBuilder *m_tf_attributes; 353 354 /** Native vector width of the vector instruction set. Note that this 355 value is directly derived from the ISA being used (e.g. it's 4 for 356 SSE, 8 for AVX, etc.) */ 357 int m_nativeVectorWidth; 358 359 /** Native vector alignment in bytes. Theoretically this may be derived 360 from the vector size, but it's better to manage directly the alignement. 361 It allows easier experimenting and better fine tuning for particular 362 platform. This information is primatily used when 363 --opt=force-aligned-memory is used. */ 364 int m_nativeVectorAlignment; 365 366 /** Data type width in bits. Typically it's 32, but could be 8, 16 or 64. */ 367 int m_dataTypeWidth; 368 369 /** Actual vector width currently being compiled to. This may be an 370 integer multiple of the native vector width, for example if we're 371 "doubling up" and compiling 8-wide on a 4-wide SSE system. */ 372 int m_vectorWidth; 373 374 /** Indicates whether position independent code should be generated. */ 375 bool m_generatePIC; 376 377 /** Is there overhead associated with masking on the target 378 architecture; e.g. there is on SSE, due to extra blends and the 379 like, but there isn't with an ISA that supports masking 380 natively. */ 381 bool m_maskingIsFree; 382 383 /** How many bits are used to store each element of the mask: e.g. this 384 is 32 on SSE/AVX, since that matches the HW better. */ 385 int m_maskBitCount; 386 387 /** Indicates whether the target has native support for float/half 388 conversions. */ 389 bool m_hasHalf; 390 391 /** Indicates whether there is an ISA random number instruction. */ 392 bool m_hasRand; 393 394 /** Indicates whether the target has a native gather instruction */ 395 bool m_hasGather; 396 397 /** Indicates whether the target has a native scatter instruction */ 398 bool m_hasScatter; 399 400 /** Indicates whether the target has support for transcendentals (beyond 401 sqrt, which we assume that all of them handle). */ 402 bool m_hasTranscendentals; 403 404 /** Indicates whether the target has ISA support for trigonometry */ 405 bool m_hasTrigonometry; 406 407 /** Indicates whether there is an ISA double precision rsqrt. */ 408 bool m_hasRsqrtd; 409 410 /** Indicates whether there is an ISA double precision rcp. */ 411 bool m_hasRcpd; 412 413 /** Indicates whether the target has hardware instruction for vector prefetch. */ 414 bool m_hasVecPrefetch; 415 416 /** Indicates whether the target has special saturating arithmetic instructions. */ 417 bool m_hasSaturatingArithmetic; 418 419 /** Indicates whether the target has FP64 support. */ 420 bool m_hasFp64Support; 421 422 /** Indicates whether the target has uint32 -> float cvt support **/ 423 bool m_warnFtoU32IsExpensive; 424 }; 425 426 /** @brief Structure that collects optimization options 427 428 This structure collects all of the options related to optimization of 429 generated code. 430 */ 431 struct Opt { 432 Opt(); 433 434 /** Optimization level. Currently, the only valid values are 0, 435 indicating essentially no optimization, and 1, indicating as much 436 optimization as possible. */ 437 int level; 438 439 /** Indicates whether "fast and loose" numerically unsafe optimizations 440 should be performed. This is false by default. */ 441 bool fastMath; 442 443 /** Indicates whether an vector load should be issued for masked loads 444 on platforms that don't have a native masked vector load. (This may 445 lead to accessing memory up to programCount-1 elements past the end of 446 arrays, so is unsafe in general.) */ 447 bool fastMaskedVload; 448 449 /** Indicates when loops should be unrolled (when doing so seems like 450 it will make sense. */ 451 bool unrollLoops; 452 453 /** Indicates if addressing math will be done with 32-bit math, even on 454 64-bit systems. (This is generally noticably more efficient, 455 though at the cost of addressing >2GB). 456 */ 457 bool force32BitAddressing; 458 459 /** Indicates whether Assert() statements should be ignored (for 460 performance in the generated code). */ 461 bool disableAsserts; 462 463 /** Indicates whether FMA instructions should be disabled (on targets 464 that support them). */ 465 bool disableFMA; 466 467 /** Always generate aligned vector load/store instructions; this 468 implies a guarantee that all dynamic access through pointers that 469 becomes a vector load/store will be a cache-aligned sequence of 470 locations. */ 471 bool forceAlignedMemory; 472 473 /** If enabled, disables the various optimizations that kick in when 474 the execution mask can be determined to be "all on" at compile 475 time. */ 476 bool disableMaskAllOnOptimizations; 477 478 /** If enabled, the various __pseudo* memory ops (gather/scatter, 479 masked load/store) are left in their __pseudo* form, for better 480 understanding of the structure of generated code when reading 481 it. */ 482 bool disableHandlePseudoMemoryOps; 483 484 /** On targets that don't have a masked store instruction but do have a 485 blending instruction, by default, we simulate masked stores by 486 loading the old value, blending, and storing the result. This can 487 potentially be unsafe in multi-threaded code, in that it writes to 488 locations that aren't supposed to be written to. Setting this 489 value to true disables this work-around, and instead implements 490 masked stores by 'scalarizing' them, so that we iterate over the 491 ISIMD lanes and do a scalar write for the ones that are running. */ 492 bool disableBlendedMaskedStores; 493 494 /** Disables the 'coherent control flow' constructs in the 495 language. (e.g. this causes "cif" statements to be demoted to "if" 496 statements.) This is likely only useful for measuring the impact 497 of coherent control flow. */ 498 bool disableCoherentControlFlow; 499 500 /** Disables uniform control flow optimizations (e.g. this changes an 501 "if" statement with a uniform condition to have a varying 502 condition). This is likely only useful for measuring the impact of 503 uniform control flow. */ 504 bool disableUniformControlFlow; 505 506 /** Disables the backend optimizations related to gather/scatter 507 (e.g. transforming gather from sequential locations to an unaligned 508 load, etc.) This is likely only useful for measuring the impact of 509 these optimizations. */ 510 bool disableGatherScatterOptimizations; 511 512 /** Disables the optimization that demotes masked stores to regular 513 stores when the store is happening at the same control flow level 514 where the variable was declared. This is likely only useful for 515 measuring the impact of this optimization. */ 516 bool disableMaskedStoreToStore; 517 518 /** Disables the optimization that detects when the execution mask is 519 all on and emits code for gathers and scatters that doesn't loop 520 over the SIMD lanes but just does the scalar loads and stores 521 directly. */ 522 bool disableGatherScatterFlattening; 523 524 /** Disables the optimizations that detect when arrays are being 525 indexed with 'uniform' values and issue scalar loads/stores rather 526 than gathers/scatters. This is likely only useful for measuring 527 the impact of this optimization. */ 528 bool disableUniformMemoryOptimizations; 529 530 /** Disables optimizations that coalesce incoherent scalar memory 531 access from gathers into wider vector operations, when possible. */ 532 bool disableCoalescing; 533 534 /** Disable using zmm registers for avx512 target in favour of ymm. 535 Affects only >= 512 bit wide targets and only if avx512vl is available */ 536 bool disableZMM; 537 538 #ifdef ISPC_GENX_ENABLED 539 /** Disables optimization that coalesce gathers on GenX. This is 540 likely only useful for measuring the impact of this optimization */ 541 bool disableGenXGatherCoalescing; 542 543 /** Enables experimental support of foreach statement inside varying CF. 544 Current implementation brings performance degradation due to ineffective 545 implementation of unmasked.*/ 546 bool enableForeachInsideVarying; 547 548 /** Enables emitting of genx.any intrinsics and the control flow which is 549 based on impliit hardware mask. Forces generation of goto/join instructions 550 in assembly.*/ 551 bool emitGenXHardwareMask; 552 553 /** Enables generation of masked loads implemented using svm loads which 554 * may lead to out of bound reads but bring prformance improvement in 555 * most of the cases. 556 */ 557 bool enableGenXUnsafeMaskedLoad; 558 #endif 559 }; 560 561 /** @brief This structure collects together a number of global variables. 562 563 This structure collects a number of global variables that mostly 564 represent parameter settings for this compilation run. In particular, 565 none of these values should change after compilation befins; their 566 values are all set during command-line argument processing or very 567 early during the compiler's execution, before any files are parsed. 568 */ 569 struct Globals { 570 Globals(); 571 572 /** TargetRegistry holding all stdlib bitcode. */ 573 TargetLibRegistry *target_registry; 574 575 /** Optimization option settings */ 576 Opt opt; 577 578 /** Compilation target information */ 579 Target *target; 580 581 /** Target OS */ 582 TargetOS target_os; 583 584 /** Function Calling Convention */ 585 CallingConv calling_conv; 586 587 /** There are a number of math libraries that can be used for 588 transcendentals and the like during program compilation. */ 589 enum MathLib { Math_ISPC, Math_ISPCFast, Math_SVML, Math_System }; 590 MathLib mathLib; 591 592 /** Optimization level to be specified while creating TargetMachine. */ 593 enum CodegenOptLevel { None, Aggressive }; 594 CodegenOptLevel codegenOptLevel; 595 596 /** Records whether the ispc standard library should be made available 597 to the program during compilations. (Default is true.) */ 598 bool includeStdlib; 599 600 /** Indicates whether the C pre-processor should be run over the 601 program source before compiling it. (Default is true.) */ 602 bool runCPP; 603 604 /** When \c true, voluminous debugging output will be printed during 605 ispc's execution. */ 606 bool debugPrint; 607 608 /** When \c true, target ISA will be printed during ispc's execution. */ 609 bool printTarget; 610 611 /** When \c true, LLVM won't omit frame pointer. */ 612 bool NoOmitFramePointer; 613 614 /** Indicates which stages of optimization we want to dump. */ 615 std::set<int> debug_stages; 616 617 /** Whether to dump IR to file. */ 618 bool dumpFile; 619 620 /** Indicates after which optimization we want to generate 621 DebugIR information. */ 622 int debugIR; 623 624 /** Indicates which phases of optimization we want to switch off. */ 625 std::set<int> off_stages; 626 627 /** Indicates whether all warning messages should be surpressed. */ 628 bool disableWarnings; 629 630 /** Indicates whether warnings should be issued as errors. */ 631 bool warningsAsErrors; 632 633 /** Indicates whether line wrapping of error messages to the terminal 634 width should be disabled. */ 635 bool disableLineWrap; 636 637 /** Indicates whether additional warnings should be issued about 638 possible performance pitfalls. */ 639 bool emitPerfWarnings; 640 641 /** Indicates whether all printed output should be surpressed. */ 642 bool quiet; 643 644 /** Always use ANSI escape sequences to colorize warning and error 645 messages, even if piping output to a file, etc. */ 646 bool forceColoredOutput; 647 648 /** Indicates whether calls should be emitted in the program to an 649 externally-defined program instrumentation function. (See the 650 "Instrumenting your ispc programs" section in the user's 651 manual.) */ 652 bool emitInstrumentation; 653 654 #ifdef ISPC_GENX_ENABLED 655 /** Arguments to pass to Vector Compiler backend for offline 656 compilation to L0 binary */ 657 std::string vcOpts; 658 #endif 659 660 bool noPragmaOnce; 661 662 /** Indicates whether ispc should generate debugging symbols for the 663 program in its output. */ 664 bool generateDebuggingSymbols; 665 666 /** Require generation of DWARF of certain version (2, 3, 4). For 667 default version, this field is set to 0. */ 668 // Hint: to verify dwarf version in the object file, run on Linux: 669 // readelf --debug-dump=info object.o | grep -A 2 'Compilation Unit @' 670 // on Mac: 671 // xcrun dwarfdump -r0 object.o 672 int generateDWARFVersion; 673 674 /** If true, function names are mangled by appending the target ISA and 675 vector width to them. */ 676 bool mangleFunctionsWithTarget; 677 678 /** If enabled, the lexer will randomly replace some tokens returned 679 with other tokens, in order to test error condition handling in the 680 compiler. */ 681 bool enableFuzzTest; 682 683 /* If enabled, allows the user to directly call LLVM intrinsics. */ 684 bool enableLLVMIntrinsics; 685 686 /** Seed for random number generator used for fuzz testing. */ 687 int fuzzTestSeed; 688 689 /** Global LLVMContext object */ 690 llvm::LLVMContext *ctx; 691 692 /** Current working directory when the ispc compiler starts 693 execution. */ 694 char currentDirectory[1024]; 695 696 /** Arguments to pass along to the C pre-processor, if it is run on the 697 program before compilation. */ 698 std::vector<std::string> cppArgs; 699 700 /** Additional user-provided directories to search when processing 701 #include directives in the preprocessor. */ 702 std::vector<std::string> includePath; 703 704 /** Indicates that alignment in memory allocation routines should be 705 forced to have given value. -1 value means natural alignment for the platforms. */ 706 int forceAlignment; 707 708 /** When true, flag non-static functions with dllexport attribute on Windows. */ 709 bool dllExport; 710 711 /** Lines for which warnings are turned off. */ 712 std::map<std::pair<int, std::string>, bool> turnOffWarnings; 713 714 enum pragmaUnrollType { none, nounroll, unroll, count }; 715 716 /* If true, we are compiling for more than one target. */ 717 bool isMultiTargetCompilation; 718 719 /* Number of errors to show in ISPC. */ 720 int errorLimit; 721 722 /* When true, enable compile time tracing. */ 723 bool enableTimeTrace; 724 725 /* When compile time tracing is enabled, set time granularity. */ 726 int timeTraceGranularity; 727 }; 728 729 enum { 730 COST_ASSIGN = 1, 731 COST_COMPLEX_ARITH_OP = 4, 732 COST_DELETE = 32, 733 COST_DEREF = 4, 734 COST_FUNCALL = 4, 735 COST_FUNPTR_UNIFORM = 12, 736 COST_FUNPTR_VARYING = 24, 737 COST_GATHER = 8, 738 COST_GOTO = 4, 739 COST_LOAD = 2, 740 COST_NEW = 32, 741 COST_BREAK_CONTINUE = 3, 742 COST_RETURN = 4, 743 COST_SELECT = 4, 744 COST_SIMPLE_ARITH_LOGIC_OP = 1, 745 COST_SYNC = 32, 746 COST_TASK_LAUNCH = 32, 747 COST_TYPECAST_COMPLEX = 4, 748 COST_TYPECAST_SIMPLE = 1, 749 COST_UNIFORM_IF = 2, 750 COST_VARYING_IF = 3, 751 COST_UNIFORM_LOOP = 4, 752 COST_VARYING_LOOP = 6, 753 COST_UNIFORM_SWITCH = 4, 754 COST_VARYING_SWITCH = 12, 755 COST_ASSERT = 8, 756 757 CHECK_MASK_AT_FUNCTION_START_COST = 16, 758 PREDICATE_SAFE_IF_STATEMENT_COST = 6, 759 // For gen target we want to avoid branches as much as possible 760 // so we use increased cost here 761 PREDICATE_SAFE_SHORT_CIRC_GENX_STATEMENT_COST = 10, 762 }; 763 764 extern Globals *g; 765 extern Module *m; 766 } // namespace ispc 767