1 /*
2   Copyright (c) 2010-2021, Intel Corporation
3   All rights reserved.
4 
5   Redistribution and use in source and binary forms, with or without
6   modification, are permitted provided that the following conditions are
7   met:
8 
9     * Redistributions of source code must retain the above copyright
10       notice, this list of conditions and the following disclaimer.
11 
12     * Redistributions in binary form must reproduce the above copyright
13       notice, this list of conditions and the following disclaimer in the
14       documentation and/or other materials provided with the distribution.
15 
16     * Neither the name of Intel Corporation nor the names of its
17       contributors may be used to endorse or promote products derived from
18       this software without specific prior written permission.
19 
20 
21    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
22    IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
23    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
24    PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
25    OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
26    EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
27    PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
28    PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
29    LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
30    NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
31    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 */
33 
34 /** @file ispc.h
35     @brief Main ispc.header file. Defines Target, Globals and Opt classes.
36 */
37 
38 #pragma once
39 
40 #include "ispc_version.h"
41 #include "target_enums.h"
42 #include "target_registry.h"
43 
44 #if ISPC_LLVM_VERSION < OLDEST_SUPPORTED_LLVM || ISPC_LLVM_VERSION > LATEST_SUPPORTED_LLVM
45 #error "Only LLVM 8.0 - 12.0 and 13.0 development branch are supported"
46 #endif
47 
48 #if defined(_WIN32) || defined(_WIN64)
49 #define ISPC_HOST_IS_WINDOWS
50 #elif defined(__linux__)
51 #define ISPC_HOST_IS_LINUX
52 #elif defined(__FreeBSD__)
53 #define ISPC_HOST_IS_FREEBSD
54 #elif defined(__DragonFly__)
55 #define ISPC_HOST_IS_DRAGONFLY
56 #elif defined(__APPLE__)
57 #define ISPC_HOST_IS_APPLE
58 #endif
59 
60 #include <map>
61 #include <set>
62 #include <stdint.h>
63 #include <stdio.h>
64 #include <stdlib.h>
65 #include <string>
66 #include <vector>
67 
68 #include <llvm/ADT/StringRef.h>
69 
70 /** @def ISPC_MAX_NVEC maximum vector size of any of the compliation
71     targets.
72  */
73 #define ISPC_MAX_NVEC 64
74 
75 // Number of final optimization phase
76 #define LAST_OPT_NUMBER 1000
77 
78 // Forward declarations of a number of widely-used LLVM types
79 namespace llvm {
80 
81 class AttrBuilder;
82 class BasicBlock;
83 class Constant;
84 class ConstantValue;
85 class DataLayout;
86 class DIBuilder;
87 class Function;
88 class FunctionType;
89 class LLVMContext;
90 class Module;
91 class Target;
92 class TargetMachine;
93 class Type;
94 class Value;
95 class DIFile;
96 class DINamespace;
97 class DIType;
98 
99 class DIScope;
100 } // namespace llvm
101 
102 namespace ispc {
103 
104 class ArrayType;
105 class AST;
106 class ASTNode;
107 class AtomicType;
108 class FunctionEmitContext;
109 class Expr;
110 class ExprList;
111 class Function;
112 class FunctionType;
113 class Module;
114 class PointerType;
115 class Stmt;
116 class Symbol;
117 class SymbolTable;
118 class Type;
119 struct VariableDeclaration;
120 
121 enum StorageClass { SC_NONE, SC_EXTERN, SC_STATIC, SC_TYPEDEF, SC_EXTERN_C };
122 
123 /** @brief Representation of a range of positions in a source file.
124 
125     This class represents a range of characters in a source file
126     (e.g. those that span a token's definition), from starting line and
127     column to ending line and column.  (These values are tracked by the
128     lexing code).  Both lines and columns are counted starting from one.
129  */
130 struct SourcePos {
131     SourcePos(const char *n = NULL, int fl = 0, int fc = 0, int ll = 0, int lc = 0);
132 
133     const char *name;
134     int first_line;
135     int first_column;
136     int last_line;
137     int last_column;
138 
139     /** Prints the filename and line/column range to standard output. */
140     void Print() const;
141 
142     /** Returns a LLVM DIFile object that represents the SourcePos's file */
143     llvm::DIFile *GetDIFile() const;
144 
145     /** Returns a LLVM DINamespace object that represents 'ispc' namespace. */
146     llvm::DINamespace *GetDINamespace() const;
147 
148     bool operator==(const SourcePos &p2) const;
149 };
150 
151 /** Returns a SourcePos that encompasses the extent of both of the given
152     extents. */
153 SourcePos Union(const SourcePos &p1, const SourcePos &p2);
154 
155 /** @brief Structure that defines a compilation target
156 
157     This structure defines a compilation target for the ispc compiler.
158 */
159 class Target {
160   public:
161     /** Enumerator giving the instruction sets that the compiler can
162         target.  These should be ordered from "worse" to "better" in that
163         if a processor supports multiple target ISAs, then the most
164         flexible/performant of them will apear last in the enumerant.  Note
165         also that __best_available_isa() needs to be updated if ISAs are
166         added or the enumerant values are reordered.  */
167     enum ISA {
168         SSE2 = 0,
169         SSE4 = 1,
170         AVX = 2,
171         // Not supported anymore. Use either AVX or AVX2.
172         // AVX11 = 3,
173         AVX2 = 3,
174         KNL_AVX512 = 4,
175         SKX_AVX512 = 5,
176 #ifdef ISPC_ARM_ENABLED
177         NEON,
178 #endif
179 #ifdef ISPC_WASM_ENABLED
180         WASM,
181 #endif
182 #ifdef ISPC_GENX_ENABLED
183         GENX,
184 #endif
185         NUM_ISAS
186     };
187 
188 #ifdef ISPC_GENX_ENABLED
189     enum GENX_PLATFORM {
190         GENX_GEN9,
191         GENX_TGLLP,
192     };
193 #endif
194 
195     /** Initializes the given Target pointer for a target of the given
196         name, if the name is a known target.  Returns true if the
197         target was initialized and false if the name is unknown. */
198     Target(Arch arch, const char *cpu, ISPCTarget isa, bool pic, bool printTarget);
199 
200     /** Check if LLVM intrinsic is supported for the current target. */
201     bool checkIntrinsticSupport(llvm::StringRef name, SourcePos pos);
202 
203     /** Returns a comma-delimited string giving the names of the currently
204         supported CPUs. */
205     static std::string SupportedCPUs();
206 
207     /** Returns a triple string specifying the target architecture, vendor,
208         and environment. */
209     std::string GetTripleString() const;
210 
211     /** Returns the LLVM TargetMachine object corresponding to this
212         target. */
GetTargetMachine()213     llvm::TargetMachine *GetTargetMachine() const { return m_targetMachine; }
214 
215     /** Convert ISA enum to string */
216     static const char *ISAToString(Target::ISA isa);
217 
218     /** Returns a string like "avx" encoding the target. Good for mangling. */
219     const char *GetISAString() const;
220 
221     /** Convert ISA enum to string */
222     static const char *ISAToTargetString(Target::ISA isa);
223 
224     /** Returns a string like "avx2-i32x8" encoding the target.
225         This may be used for Target initialization. */
226     const char *GetISATargetString() const;
227 
228     /** Returns the size of the given type */
229     llvm::Value *SizeOf(llvm::Type *type, llvm::BasicBlock *insertAtEnd);
230 
231     /** Given a structure type and an element number in the structure,
232         returns a value corresponding to the number of bytes from the start
233         of the structure where the element is located. */
234     llvm::Value *StructOffset(llvm::Type *type, int element, llvm::BasicBlock *insertAtEnd);
235 
236     /** Mark LLVM function with target specific attribute, if required. */
237     void markFuncWithTargetAttr(llvm::Function *func);
238 
239     /** Set LLVM function with Calling Convention. */
240     void markFuncWithCallingConv(llvm::Function *func);
241 
getTarget()242     const llvm::Target *getTarget() const { return m_target; }
243 
244     // Note the same name of method for 3.1 and 3.2+, this allows
245     // to reduce number ifdefs on client side.
getDataLayout()246     const llvm::DataLayout *getDataLayout() const { return m_dataLayout; }
247 
248     /** Reports if Target object has valid state. */
isValid()249     bool isValid() const { return m_valid; }
250 
getISPCTarget()251     ISPCTarget getISPCTarget() const { return m_ispc_target; }
252 
getISA()253     ISA getISA() const { return m_isa; }
254 
isGenXTarget()255     bool isGenXTarget() {
256 #ifdef ISPC_GENX_ENABLED
257         return m_isa == Target::GENX;
258 #else
259         return false;
260 #endif
261     }
262 
263 #ifdef ISPC_GENX_ENABLED
264     GENX_PLATFORM getGenxPlatform() const;
265     uint32_t getGenxGrfSize() const;
266     bool hasGenxPrefetch() const;
267 #endif
268 
getArch()269     Arch getArch() const { return m_arch; }
270 
is32Bit()271     bool is32Bit() const { return m_is32Bit; }
272 
getCPU()273     std::string getCPU() const { return m_cpu; }
274 
getNativeVectorWidth()275     int getNativeVectorWidth() const { return m_nativeVectorWidth; }
276 
getNativeVectorAlignment()277     int getNativeVectorAlignment() const { return m_nativeVectorAlignment; }
278 
getDataTypeWidth()279     int getDataTypeWidth() const { return m_dataTypeWidth; }
280 
getVectorWidth()281     int getVectorWidth() const { return m_vectorWidth; }
282 
getGeneratePIC()283     bool getGeneratePIC() const { return m_generatePIC; }
284 
getMaskingIsFree()285     bool getMaskingIsFree() const { return m_maskingIsFree; }
286 
getMaskBitCount()287     int getMaskBitCount() const { return m_maskBitCount; }
288 
hasHalf()289     bool hasHalf() const { return m_hasHalf; }
290 
hasRand()291     bool hasRand() const { return m_hasRand; }
292 
hasGather()293     bool hasGather() const { return m_hasGather; }
294 
hasScatter()295     bool hasScatter() const { return m_hasScatter; }
296 
hasTranscendentals()297     bool hasTranscendentals() const { return m_hasTranscendentals; }
298 
hasTrigonometry()299     bool hasTrigonometry() const { return m_hasTrigonometry; }
300 
hasRsqrtd()301     bool hasRsqrtd() const { return m_hasRsqrtd; }
302 
hasRcpd()303     bool hasRcpd() const { return m_hasRcpd; }
304 
hasVecPrefetch()305     bool hasVecPrefetch() const { return m_hasVecPrefetch; }
306 
hasSatArith()307     bool hasSatArith() const { return m_hasSaturatingArithmetic; }
308 
hasFp64Support()309     bool hasFp64Support() const { return m_hasFp64Support; }
310 
warnFtoU32IsExpensive()311     bool warnFtoU32IsExpensive() const { return m_warnFtoU32IsExpensive; }
312 
313   private:
314     /** llvm Target object representing this target. */
315     const llvm::Target *m_target;
316 
317     /** llvm TargetMachine.
318         Note that it's not destroyed during Target destruction, as
319         Module::CompileAndOutput() uses TargetMachines after Target is destroyed.
320         This needs to be changed. */
321     llvm::TargetMachine *m_targetMachine;
322     llvm::DataLayout *m_dataLayout;
323 
324     /** flag to report invalid state after construction
325         (due to bad parameters passed to constructor). */
326     bool m_valid;
327 
328     /** ISPC target being used */
329     ISPCTarget m_ispc_target;
330 
331     /** Instruction set being compiled to. */
332     ISA m_isa;
333 
334     /** Target system architecture.  (e.g. "x86-64", "x86"). */
335     Arch m_arch;
336 
337     /** Is the target architecture 32 or 64 bit */
338     bool m_is32Bit;
339 
340     /** Target CPU. (e.g. "corei7", "corei7-avx", ..) */
341     std::string m_cpu;
342 
343     /** Target-specific attribute string to pass along to the LLVM backend */
344     std::string m_attributes;
345 
346     /** Target-specific function attributes */
347     std::vector<std::pair<std::string, std::string>> m_funcAttributes;
348 
349     /** Target-specific LLVM attribute, which has to be attached to every
350         function to ensure that it is generated for correct target architecture.
351         This is requirement was introduced in LLVM 3.3 */
352     llvm::AttrBuilder *m_tf_attributes;
353 
354     /** Native vector width of the vector instruction set.  Note that this
355         value is directly derived from the ISA being used (e.g. it's 4 for
356         SSE, 8 for AVX, etc.) */
357     int m_nativeVectorWidth;
358 
359     /** Native vector alignment in bytes. Theoretically this may be derived
360         from the vector size, but it's better to manage directly the alignement.
361         It allows easier experimenting and better fine tuning for particular
362         platform. This information is primatily used when
363         --opt=force-aligned-memory is used. */
364     int m_nativeVectorAlignment;
365 
366     /** Data type width in bits. Typically it's 32, but could be 8, 16 or 64. */
367     int m_dataTypeWidth;
368 
369     /** Actual vector width currently being compiled to.  This may be an
370         integer multiple of the native vector width, for example if we're
371         "doubling up" and compiling 8-wide on a 4-wide SSE system. */
372     int m_vectorWidth;
373 
374     /** Indicates whether position independent code should be generated. */
375     bool m_generatePIC;
376 
377     /** Is there overhead associated with masking on the target
378         architecture; e.g. there is on SSE, due to extra blends and the
379         like, but there isn't with an ISA that supports masking
380         natively. */
381     bool m_maskingIsFree;
382 
383     /** How many bits are used to store each element of the mask: e.g. this
384         is 32 on SSE/AVX, since that matches the HW better. */
385     int m_maskBitCount;
386 
387     /** Indicates whether the target has native support for float/half
388         conversions. */
389     bool m_hasHalf;
390 
391     /** Indicates whether there is an ISA random number instruction. */
392     bool m_hasRand;
393 
394     /** Indicates whether the target has a native gather instruction */
395     bool m_hasGather;
396 
397     /** Indicates whether the target has a native scatter instruction */
398     bool m_hasScatter;
399 
400     /** Indicates whether the target has support for transcendentals (beyond
401         sqrt, which we assume that all of them handle). */
402     bool m_hasTranscendentals;
403 
404     /** Indicates whether the target has ISA support for trigonometry */
405     bool m_hasTrigonometry;
406 
407     /** Indicates whether there is an ISA double precision rsqrt. */
408     bool m_hasRsqrtd;
409 
410     /** Indicates whether there is an ISA double precision rcp. */
411     bool m_hasRcpd;
412 
413     /** Indicates whether the target has hardware instruction for vector prefetch. */
414     bool m_hasVecPrefetch;
415 
416     /** Indicates whether the target has special saturating arithmetic instructions. */
417     bool m_hasSaturatingArithmetic;
418 
419     /** Indicates whether the target has FP64 support. */
420     bool m_hasFp64Support;
421 
422     /** Indicates whether the target has uint32 -> float cvt support **/
423     bool m_warnFtoU32IsExpensive;
424 };
425 
426 /** @brief Structure that collects optimization options
427 
428     This structure collects all of the options related to optimization of
429     generated code.
430 */
431 struct Opt {
432     Opt();
433 
434     /** Optimization level.  Currently, the only valid values are 0,
435         indicating essentially no optimization, and 1, indicating as much
436         optimization as possible. */
437     int level;
438 
439     /** Indicates whether "fast and loose" numerically unsafe optimizations
440         should be performed.  This is false by default. */
441     bool fastMath;
442 
443     /** Indicates whether an vector load should be issued for masked loads
444         on platforms that don't have a native masked vector load.  (This may
445         lead to accessing memory up to programCount-1 elements past the end of
446         arrays, so is unsafe in general.) */
447     bool fastMaskedVload;
448 
449     /** Indicates when loops should be unrolled (when doing so seems like
450         it will make sense. */
451     bool unrollLoops;
452 
453     /** Indicates if addressing math will be done with 32-bit math, even on
454         64-bit systems.  (This is generally noticably more efficient,
455         though at the cost of addressing >2GB).
456      */
457     bool force32BitAddressing;
458 
459     /** Indicates whether Assert() statements should be ignored (for
460         performance in the generated code). */
461     bool disableAsserts;
462 
463     /** Indicates whether FMA instructions should be disabled (on targets
464         that support them). */
465     bool disableFMA;
466 
467     /** Always generate aligned vector load/store instructions; this
468         implies a guarantee that all dynamic access through pointers that
469         becomes a vector load/store will be a cache-aligned sequence of
470         locations. */
471     bool forceAlignedMemory;
472 
473     /** If enabled, disables the various optimizations that kick in when
474         the execution mask can be determined to be "all on" at compile
475         time. */
476     bool disableMaskAllOnOptimizations;
477 
478     /** If enabled, the various __pseudo* memory ops (gather/scatter,
479         masked load/store) are left in their __pseudo* form, for better
480         understanding of the structure of generated code when reading
481         it. */
482     bool disableHandlePseudoMemoryOps;
483 
484     /** On targets that don't have a masked store instruction but do have a
485         blending instruction, by default, we simulate masked stores by
486         loading the old value, blending, and storing the result.  This can
487         potentially be unsafe in multi-threaded code, in that it writes to
488         locations that aren't supposed to be written to.  Setting this
489         value to true disables this work-around, and instead implements
490         masked stores by 'scalarizing' them, so that we iterate over the
491         ISIMD lanes and do a scalar write for the ones that are running. */
492     bool disableBlendedMaskedStores;
493 
494     /** Disables the 'coherent control flow' constructs in the
495         language. (e.g. this causes "cif" statements to be demoted to "if"
496         statements.)  This is likely only useful for measuring the impact
497         of coherent control flow. */
498     bool disableCoherentControlFlow;
499 
500     /** Disables uniform control flow optimizations (e.g. this changes an
501         "if" statement with a uniform condition to have a varying
502         condition).  This is likely only useful for measuring the impact of
503         uniform control flow. */
504     bool disableUniformControlFlow;
505 
506     /** Disables the backend optimizations related to gather/scatter
507         (e.g. transforming gather from sequential locations to an unaligned
508         load, etc.)  This is likely only useful for measuring the impact of
509         these optimizations. */
510     bool disableGatherScatterOptimizations;
511 
512     /** Disables the optimization that demotes masked stores to regular
513         stores when the store is happening at the same control flow level
514         where the variable was declared.  This is likely only useful for
515         measuring the impact of this optimization. */
516     bool disableMaskedStoreToStore;
517 
518     /** Disables the optimization that detects when the execution mask is
519         all on and emits code for gathers and scatters that doesn't loop
520         over the SIMD lanes but just does the scalar loads and stores
521         directly. */
522     bool disableGatherScatterFlattening;
523 
524     /** Disables the optimizations that detect when arrays are being
525         indexed with 'uniform' values and issue scalar loads/stores rather
526         than gathers/scatters.  This is likely only useful for measuring
527         the impact of this optimization. */
528     bool disableUniformMemoryOptimizations;
529 
530     /** Disables optimizations that coalesce incoherent scalar memory
531         access from gathers into wider vector operations, when possible. */
532     bool disableCoalescing;
533 
534     /** Disable using zmm registers for avx512 target in favour of ymm.
535         Affects only >= 512 bit wide targets and only if avx512vl is available */
536     bool disableZMM;
537 
538 #ifdef ISPC_GENX_ENABLED
539     /** Disables optimization that coalesce gathers on GenX. This is
540         likely only useful for measuring the impact of this optimization */
541     bool disableGenXGatherCoalescing;
542 
543     /** Enables experimental support of foreach statement inside varying CF.
544         Current implementation brings performance degradation due to ineffective
545         implementation of unmasked.*/
546     bool enableForeachInsideVarying;
547 
548     /** Enables emitting of genx.any intrinsics and the control flow which is
549         based on impliit hardware mask. Forces generation of goto/join instructions
550         in assembly.*/
551     bool emitGenXHardwareMask;
552 
553     /** Enables generation of masked loads implemented using svm loads which
554      * may lead to out of bound reads but bring prformance improvement in
555      * most of the cases.
556      */
557     bool enableGenXUnsafeMaskedLoad;
558 #endif
559 };
560 
561 /** @brief This structure collects together a number of global variables.
562 
563     This structure collects a number of global variables that mostly
564     represent parameter settings for this compilation run.  In particular,
565     none of these values should change after compilation befins; their
566     values are all set during command-line argument processing or very
567     early during the compiler's execution, before any files are parsed.
568   */
569 struct Globals {
570     Globals();
571 
572     /** TargetRegistry holding all stdlib bitcode. */
573     TargetLibRegistry *target_registry;
574 
575     /** Optimization option settings */
576     Opt opt;
577 
578     /** Compilation target information */
579     Target *target;
580 
581     /** Target OS */
582     TargetOS target_os;
583 
584     /** Function Calling Convention */
585     CallingConv calling_conv;
586 
587     /** There are a number of math libraries that can be used for
588         transcendentals and the like during program compilation. */
589     enum MathLib { Math_ISPC, Math_ISPCFast, Math_SVML, Math_System };
590     MathLib mathLib;
591 
592     /** Optimization level to be specified while creating TargetMachine. */
593     enum CodegenOptLevel { None, Aggressive };
594     CodegenOptLevel codegenOptLevel;
595 
596     /** Records whether the ispc standard library should be made available
597         to the program during compilations. (Default is true.) */
598     bool includeStdlib;
599 
600     /** Indicates whether the C pre-processor should be run over the
601         program source before compiling it.  (Default is true.) */
602     bool runCPP;
603 
604     /** When \c true, voluminous debugging output will be printed during
605         ispc's execution. */
606     bool debugPrint;
607 
608     /** When \c true, target ISA will be printed during ispc's execution. */
609     bool printTarget;
610 
611     /** When \c true, LLVM won't omit frame pointer. */
612     bool NoOmitFramePointer;
613 
614     /** Indicates which stages of optimization we want to dump. */
615     std::set<int> debug_stages;
616 
617     /** Whether to dump IR to file. */
618     bool dumpFile;
619 
620     /** Indicates after which optimization we want to generate
621         DebugIR information. */
622     int debugIR;
623 
624     /** Indicates which phases of optimization we want to switch off. */
625     std::set<int> off_stages;
626 
627     /** Indicates whether all warning messages should be surpressed. */
628     bool disableWarnings;
629 
630     /** Indicates whether warnings should be issued as errors. */
631     bool warningsAsErrors;
632 
633     /** Indicates whether line wrapping of error messages to the terminal
634         width should be disabled. */
635     bool disableLineWrap;
636 
637     /** Indicates whether additional warnings should be issued about
638         possible performance pitfalls. */
639     bool emitPerfWarnings;
640 
641     /** Indicates whether all printed output should be surpressed. */
642     bool quiet;
643 
644     /** Always use ANSI escape sequences to colorize warning and error
645         messages, even if piping output to a file, etc. */
646     bool forceColoredOutput;
647 
648     /** Indicates whether calls should be emitted in the program to an
649         externally-defined program instrumentation function. (See the
650         "Instrumenting your ispc programs" section in the user's
651         manual.) */
652     bool emitInstrumentation;
653 
654 #ifdef ISPC_GENX_ENABLED
655     /** Arguments to pass to Vector Compiler backend for offline
656     compilation to L0 binary */
657     std::string vcOpts;
658 #endif
659 
660     bool noPragmaOnce;
661 
662     /** Indicates whether ispc should generate debugging symbols for the
663         program in its output. */
664     bool generateDebuggingSymbols;
665 
666     /** Require generation of DWARF of certain version (2, 3, 4). For
667         default version, this field is set to 0. */
668     // Hint: to verify dwarf version in the object file, run on Linux:
669     // readelf --debug-dump=info object.o | grep -A 2 'Compilation Unit @'
670     // on Mac:
671     // xcrun dwarfdump -r0 object.o
672     int generateDWARFVersion;
673 
674     /** If true, function names are mangled by appending the target ISA and
675         vector width to them. */
676     bool mangleFunctionsWithTarget;
677 
678     /** If enabled, the lexer will randomly replace some tokens returned
679         with other tokens, in order to test error condition handling in the
680         compiler. */
681     bool enableFuzzTest;
682 
683     /* If enabled, allows the user to directly call LLVM intrinsics. */
684     bool enableLLVMIntrinsics;
685 
686     /** Seed for random number generator used for fuzz testing. */
687     int fuzzTestSeed;
688 
689     /** Global LLVMContext object */
690     llvm::LLVMContext *ctx;
691 
692     /** Current working directory when the ispc compiler starts
693         execution. */
694     char currentDirectory[1024];
695 
696     /** Arguments to pass along to the C pre-processor, if it is run on the
697         program before compilation. */
698     std::vector<std::string> cppArgs;
699 
700     /** Additional user-provided directories to search when processing
701         #include directives in the preprocessor. */
702     std::vector<std::string> includePath;
703 
704     /** Indicates that alignment in memory allocation routines should be
705         forced to have given value. -1 value means natural alignment for the platforms. */
706     int forceAlignment;
707 
708     /** When true, flag non-static functions with dllexport attribute on Windows. */
709     bool dllExport;
710 
711     /** Lines for which warnings are turned off. */
712     std::map<std::pair<int, std::string>, bool> turnOffWarnings;
713 
714     enum pragmaUnrollType { none, nounroll, unroll, count };
715 
716     /* If true, we are compiling for more than one target. */
717     bool isMultiTargetCompilation;
718 
719     /* Number of errors to show in ISPC. */
720     int errorLimit;
721 
722     /* When true, enable compile time tracing. */
723     bool enableTimeTrace;
724 
725     /* When compile time tracing is enabled, set time granularity. */
726     int timeTraceGranularity;
727 };
728 
729 enum {
730     COST_ASSIGN = 1,
731     COST_COMPLEX_ARITH_OP = 4,
732     COST_DELETE = 32,
733     COST_DEREF = 4,
734     COST_FUNCALL = 4,
735     COST_FUNPTR_UNIFORM = 12,
736     COST_FUNPTR_VARYING = 24,
737     COST_GATHER = 8,
738     COST_GOTO = 4,
739     COST_LOAD = 2,
740     COST_NEW = 32,
741     COST_BREAK_CONTINUE = 3,
742     COST_RETURN = 4,
743     COST_SELECT = 4,
744     COST_SIMPLE_ARITH_LOGIC_OP = 1,
745     COST_SYNC = 32,
746     COST_TASK_LAUNCH = 32,
747     COST_TYPECAST_COMPLEX = 4,
748     COST_TYPECAST_SIMPLE = 1,
749     COST_UNIFORM_IF = 2,
750     COST_VARYING_IF = 3,
751     COST_UNIFORM_LOOP = 4,
752     COST_VARYING_LOOP = 6,
753     COST_UNIFORM_SWITCH = 4,
754     COST_VARYING_SWITCH = 12,
755     COST_ASSERT = 8,
756 
757     CHECK_MASK_AT_FUNCTION_START_COST = 16,
758     PREDICATE_SAFE_IF_STATEMENT_COST = 6,
759     // For gen target we want to avoid branches as much as possible
760     // so we use increased cost here
761     PREDICATE_SAFE_SHORT_CIRC_GENX_STATEMENT_COST = 10,
762 };
763 
764 extern Globals *g;
765 extern Module *m;
766 } // namespace ispc
767