1 // Copyright Contributors to the Open Shading Language project.
2 // SPDX-License-Identifier: BSD-3-Clause
3 // https://github.com/AcademySoftwareFoundation/OpenShadingLanguage
4 
5 #pragma once
6 
7 #include <OSL/export.h>
8 #include <OSL/oslversion.h>
9 #include <OSL/oslconfig.h>
10 
11 #include <vector>
12 #include <unordered_set>
13 
14 #ifdef LLVM_NAMESPACE
15 namespace llvm = LLVM_NAMESPACE;
16 #endif
17 
18 namespace llvm {
19   class BasicBlock;
20   class ConstantFolder;
21   class DIBuilder;
22   class DICompileUnit;
23   class DIFile;
24   class DILocation;
25   class DIScope;
26   class DISubprogram;
27   class DISubroutineType;
28   class ExecutionEngine;
29   class Function;
30   class FunctionType;
31   class SectionMemoryManager;
32   class JITEventListener;
33   class Linker;
34   class LLVMContext;
35   class Module;
36   class PointerType;
37   class Type;
38   class Value;
39   class VectorType;
40 
41   namespace legacy {
42     class FunctionPassManager;
43     class PassManager;
44   }
45 }
46 
47 
48 
49 OSL_NAMESPACE_ENTER
50 
51 namespace pvt {   // OSL::pvt
52 
53 
54 enum class TargetISA
55 {
56     UNKNOWN,
57     NONE,
58     x64,
59     SSE4_2,
60     AVX,
61     AVX2,
62     AVX2_noFMA,
63     AVX512,
64     AVX512_noFMA,
65     HOST,
66     COUNT
67 };
68 
69 
70 
71 /// Wrapper class around LLVM functionality.  This handles all the
72 /// gory details of actually dealing with LLVM.  It should be sufficiently
73 /// generic that it would be useful for any LLVM-JITing app, and is not
74 /// tied to OSL internals at all.
75 class OSLEXECPUBLIC LLVM_Util {
76 public:
77     struct OSLEXECPUBLIC PerThreadInfo {
PerThreadInfoPerThreadInfo78         PerThreadInfo() {}
79         ~PerThreadInfo();
80     private:
81         friend class LLVM_Util;
82         struct Impl;
83         mutable Impl* m_thread_info = nullptr;
84         Impl* get() const;
85     };
86 
87     LLVM_Util (const PerThreadInfo &per_thread_info,
88                int debuglevel = 0, int vector_width = 4);
89     ~LLVM_Util ();
90 
91     // JIT'd code needs to exist with a longer lifetime than the LLVM_Util object.
92     // To enable better cleanup at shutdown, the lifetime of all JIT'd code is
93     // controlled by the the existence of ScopedJitMemoryUser objects.
94     // When the last ScopedJitMemoryUser goes out of scope or is deleted,
95     // then the underlying memory managers will be deleted
96     struct OSLEXECPUBLIC ScopedJitMemoryUser {
97         ScopedJitMemoryUser();
98         ~ScopedJitMemoryUser();
99     };
100 
101     /// Set debug level
debug(int d)102     void debug (int d) { m_debug = d; }
debug()103     int debug () const { return m_debug; }
dumpasm(bool val)104     void dumpasm(bool val) { m_dumpasm = val; }
dumpasm()105     bool dumpasm() const { return m_dumpasm; }
jit_fma(bool val)106     void jit_fma(bool val) { m_jit_fma = val; }
jit_fma()107     bool jit_fma() const { return m_jit_fma; }
jit_aggressive(bool val)108     void jit_aggressive(bool val) { m_jit_aggressive = val; }
jit_aggressive()109     bool jit_aggressive() const { return m_jit_aggressive; }
110 
111     /// Return a reference to the current context.
context()112     llvm::LLVMContext &context () const { return *m_llvm_context; }
113 
114     /// Return a pointer to the current module.  Make a new one if
115     /// necessary.
module()116     llvm::Module *module () {
117         if (! m_llvm_module)
118             m_llvm_module = new_module();
119         return m_llvm_module;
120     }
121 
122     /// Set the current module to m.
module(llvm::Module * m)123     void module (llvm::Module *m) {
124         m_llvm_module = m;
125         m_ModuleIsFinalized = false;
126         m_ModuleIsPruned = false;
127     }
128 
129     /// Create a new empty module.
130     llvm::Module *new_module (const char *id = "default");
131 
132     /// Create a new module, populated with functions from the buffer
133     /// bitcode[0..size-1].  The name identifies the buffer.  If err is not
134     /// NULL, error messages will be stored there.
135     llvm::Module *module_from_bitcode (const char *bitcode, size_t size,
136                                        const std::string &name=std::string(),
137                                        std::string *err=NULL);
138 
139     bool debug_is_enabled() const;
140     void debug_setup_compilation_unit(const char * compile_unit_name);
141     void debug_push_function(const std::string & function_name,
142                              OIIO::ustring sourcefile, int sourceline);
143     void debug_pop_function();
144     void debug_push_inlined_function(OIIO::ustring function_name,
145                              OIIO::ustring sourcefile, int sourceline);
146     void debug_pop_inlined_function();
147     void debug_set_location(OIIO::ustring sourcefile, int sourceline);
148 
149     /// Create a new function (that will later be populated with
150     /// instructions) with up to 4 args.
151     llvm::Function *make_function (const std::string &name, bool fastcall,
152                                    llvm::Type *rettype,
153                                    llvm::Type *arg1=NULL,
154                                    llvm::Type *arg2=NULL,
155                                    llvm::Type *arg3=NULL,
156                                    llvm::Type *arg4=NULL);
157 
158     /// Create a new function (that will later be populated with
159     /// instructions) with a vector of args.
160     llvm::Function *make_function (const std::string &name, bool fastcall,
161                                    llvm::Type *rettype,
162                                    const std::vector<llvm::Type*> &paramtypes,
163                                    bool varargs=false);
164 
165     /// Add a global mapping of a function to its callable address
166     /// explicitly instead of relying on dlsym.
167     void add_function_mapping (llvm::Function *func, void *addr);
168 
169     /// Set up a new current function that subsequent basic blocks will
170     /// be added to.
current_function(llvm::Function * func)171     void current_function (llvm::Function *func) { m_current_function = func; }
172 
173     /// Return a ptr to the current function we're generating.
current_function()174     llvm::Function *current_function () const { return m_current_function; }
175 
176     /// Return the value ptr for the a-th argument of the current function.
177     llvm::Value *current_function_arg (int a);
178 
179 
180     /// Create a new IR builder with the given block as entry point. If
181     /// block is NULL, a new basic block for the current function will be
182     /// created.
183     void new_builder (llvm::BasicBlock *block=NULL);
184 
185     /// End the current builder
186     void end_builder ();
187 
188     /// Create a new JITing ExecutionEngine and make it the current one.
189     /// Return a pointer to the new engine.  If err is not NULL, put any
190     /// errors there.
191     /// Optionally request a specific ISA for JIT on the host
192     ///     ["x64", "SSE4.2", "AVX", "AVX2", "AVX512"]
193     ///     (ignored if requested ISA not valid for host)
194     /// Optionally enable debugging symbols (source file & line number)
195     /// Optionally enable profiling events
196     llvm::ExecutionEngine* make_jit_execengine (std::string *err = nullptr,
197                          TargetISA requestedISA = TargetISA::NONE,
198                          bool debugging_symbols = false,
199                          bool profiling_events = false);
200 
201     /// Report the host's TargetISA as chosen by the last call to
202     /// make_jit_execengine() or to detect_cpu_features(). Don't call
203     /// target_isa() unless one of those has previously been called.
target_isa()204     TargetISA target_isa() const { return m_target_isa; }
205 
206     // Check support for certain CPU ISA features. These are only valid
207     // after detect_cpu_features() (or make_jit_execengine()) has been
208     // called.
supports_avx()209     bool supports_avx() const { return m_supports_avx; }
supports_avx2()210     bool supports_avx2() const { return m_supports_avx2; }
supports_avx512f()211     bool supports_avx512f() const { return m_supports_avx512f; }
supports_llvm_bit_masks_natively()212     bool supports_llvm_bit_masks_natively() const { return m_supports_llvm_bit_masks_natively; }
supports_masked_stores()213     bool supports_masked_stores() const { return m_supports_masked_stores; }
214 
215     // Does this host support the requested target ISA?
216     static bool supports_isa(TargetISA target);
217 
218     // Look up the TargetISA enum by name. Special names: "host" means
219     // figure out what this host is, "none" or "" mean to use the baseline
220     // or no special ops not availabe on all flavors of this family of CPUs.
221     static TargetISA lookup_isa_by_name(string_view target_name = "");
222 
223     // Name for this TargetISA enum.
224     static const char* target_isa_name(TargetISA isa);
225 
226     // For CPU compilation, inventory the host CPU capabilities. You can
227     // optionally request a specific ISA by name. If `no_fma` is true,
228     // specifically pretend there is no FMA capability, even if the hardware
229     // supports it. Return true if ok, false if it couldn't figure it out.
230     bool detect_cpu_features(TargetISA requestedISA = TargetISA::UNKNOWN,
231                              bool no_fma = false);
232 
233     /// Return a pointer to the current ExecutionEngine.  Create a JITing
234     /// ExecutionEngine if one isn't already set up.
execengine()235     llvm::ExecutionEngine *execengine () {
236         if (! m_llvm_exec)
237             make_jit_execengine();
238         return m_llvm_exec;
239     }
240 
241     void dump_struct_data_layout(llvm::Type *Ty);
242     void validate_struct_data_layout(llvm::Type *Ty,
243               const std::vector<unsigned int>& expected_offset_by_index);
244 
245     /// Replace the ExecutionEngine (pass NULL to simply delete the
246     /// current one).
247     void execengine (llvm::ExecutionEngine *exec);
248 
249     enum class Linkage {
250         External, // Externally visible
251         LinkOnceODR, // One Definition Rule:  Inline version, but allow replacement by equivalent.
252         Internal, // Treat as static functions.
253         Private // Treat as static functions, but omit from symbol table.
254     };
255 
256     /// Identify exactly which functions need to exist in the module based
257     /// on actual usage from a set of external_functions. All unneeded
258     /// functions are removed before we move to optimization which is much
259     /// faster.  The external_functions will be set to have external linkage
260     /// and the remaining functions set to internal linkage.
261     void prune_and_internalize_module (
262         std::unordered_set<llvm::Function*> external_functions,
263         Linkage default_linkage = Linkage::Internal,
264         std::string *out_err = nullptr);
265 
266     // OLD, might deprecate later
267     /// Change symbols in the module that are marked as having external
268     /// linkage to an alternate linkage that allows them to be discarded if
269     /// not used within the module. Only do this for functions that start
270     /// with prefix, and that DON'T match anything in the two exceptions
271     /// lists.
272     void internalize_module_functions (const std::string &prefix,
273                                        const std::vector<std::string> &exceptions,
274                                        const std::vector<std::string> &moreexceptions);
275 
276     /// Setup LLVM optimization passes.
277     /// if targetHost is true, passes to target the host will be added
278     void setup_optimization_passes (int optlevel, bool target_host=true);
279 
280     /// Run the optimization passes.
281     void do_optimize (std::string *err = NULL);
282 
283     /// Retrieve a callable pointer to the JITed version of a function.
284     /// This will JIT the function if it hasn't already done so. Be sure
285     /// you have already called do_optimize() if you want optimization.
286     void *getPointerToFunction (llvm::Function *func);
287 
288     /// Wrap ExecutionEngine::InstallLazyFunctionCreator.
289     void InstallLazyFunctionCreator (void* (*P)(const std::string &));
290 
291 
292     /// Create a new LLVM basic block (for the current function) and return
293     /// its handle.
294     llvm::BasicBlock *new_basic_block (const std::string &name=std::string());
295 
296     /// Save the return block pointer when entering a function. If
297     /// after==NULL, generate a new basic block for where to go after the
298     /// function return.  Return the after BB.
299     llvm::BasicBlock *push_function (llvm::BasicBlock *after=NULL);
300 
301     /// Pop basic return destination when exiting a function.  This includes
302     /// resetting the IR insertion point to the block following the
303     /// corresponding function call.
304     void pop_function ();
305 
306     /// Are we inside a function?
307     bool inside_function() const;
308 
309     /// Return the basic block where we go after returning from the current
310     /// function.
311     llvm::BasicBlock *return_block () const;
312 
313     /// Save the basic block pointers when entering a loop.
314     void push_loop (llvm::BasicBlock *step, llvm::BasicBlock *after);
315 
316     /// Pop basic block pointers when exiting a loop.
317     void pop_loop ();
318 
319     /// Return the basic block of the current loop's 'step' instructions.
320     llvm::BasicBlock *loop_step_block () const;
321 
322     /// Return the basic block of the current loop's exit point.
323     llvm::BasicBlock *loop_after_block () const;
324 
325 
type_float()326     llvm::Type *type_float() const { return m_llvm_type_float; }
type_double()327     llvm::Type *type_double() const { return m_llvm_type_double; }
type_int()328     llvm::Type *type_int() const { return m_llvm_type_int; }
type_int8()329     llvm::Type *type_int8() const { return m_llvm_type_int8; }
type_int16()330     llvm::Type *type_int16() const { return m_llvm_type_int16; }
type_addrint()331     llvm::Type *type_addrint() const { return m_llvm_type_addrint; }
type_bool()332     llvm::Type *type_bool() const { return m_llvm_type_bool; }
type_char()333     llvm::Type *type_char() const { return m_llvm_type_char; }
type_longlong()334     llvm::Type *type_longlong() const { return m_llvm_type_longlong; }
type_void()335     llvm::Type *type_void() const { return m_llvm_type_void; }
type_triple()336     llvm::Type *type_triple() const { return m_llvm_type_triple; }
type_matrix()337     llvm::Type *type_matrix() const { return m_llvm_type_matrix; }
type_typedesc()338     llvm::Type *type_typedesc() const { return m_llvm_type_longlong; }
type_void_ptr()339     llvm::PointerType *type_void_ptr() const { return m_llvm_type_void_ptr; }
type_string()340     llvm::PointerType *type_string() { return m_llvm_type_char_ptr; }
type_ustring_ptr()341     llvm::PointerType *type_ustring_ptr() const { return m_llvm_type_ustring_ptr; }
type_char_ptr()342     llvm::PointerType *type_char_ptr() const { return m_llvm_type_char_ptr; }
type_bool_ptr()343     llvm::PointerType *type_bool_ptr() const { return m_llvm_type_bool_ptr; }
type_int_ptr()344     llvm::PointerType *type_int_ptr() const { return m_llvm_type_int_ptr; }
type_float_ptr()345     llvm::PointerType *type_float_ptr() const { return m_llvm_type_float_ptr; }
type_longlong_ptr()346     llvm::PointerType *type_longlong_ptr() const { return m_llvm_type_longlong_ptr; }
type_triple_ptr()347     llvm::PointerType *type_triple_ptr() const { return m_llvm_type_triple_ptr; }
type_matrix_ptr()348     llvm::PointerType *type_matrix_ptr() const { return m_llvm_type_matrix_ptr; }
type_double_ptr()349     llvm::PointerType *type_double_ptr() const { return m_llvm_type_double_ptr; }
350 
type_wide_float()351     llvm::Type *type_wide_float() const { return m_llvm_type_wide_float; }
type_wide_double()352     llvm::Type *type_wide_double() const { return m_llvm_type_wide_double; }
type_wide_int()353     llvm::Type *type_wide_int() const { return m_llvm_type_wide_int; }
type_wide_bool()354     llvm::Type *type_wide_bool() const { return m_llvm_type_wide_bool; }
type_wide_char()355     llvm::Type *type_wide_char() const { return m_llvm_type_wide_char; }
type_wide_longlong()356     llvm::Type *type_wide_longlong() const { return m_llvm_type_wide_longlong; }
type_wide_triple()357     llvm::Type *type_wide_triple() const { return m_llvm_type_wide_triple; }
type_wide_matrix()358     llvm::Type *type_wide_matrix() const { return m_llvm_type_wide_matrix; }
type_wide_void_ptr()359     llvm::Type *type_wide_void_ptr() const { return m_llvm_type_wide_void_ptr; }
type_wide_string()360     llvm::Type *type_wide_string() const { return m_llvm_type_wide_ustring_ptr; }
type_wide_char_ptr()361     llvm::PointerType *type_wide_char_ptr() const { return m_llvm_type_wide_char_ptr; }
type_wide_bool_ptr()362     llvm::PointerType *type_wide_bool_ptr() const { return m_llvm_type_wide_bool_ptr; }
type_wide_int_ptr()363     llvm::PointerType *type_wide_int_ptr() const { return m_llvm_type_wide_int_ptr; }
type_wide_float_ptr()364     llvm::PointerType *type_wide_float_ptr() const { return m_llvm_type_wide_float_ptr; }
365 
366     // Different ISA's may have different representations of a mask from
367     // llvm's vector of bits that comparison operations emit. And we need
368     // varying boolean symbols to have the correct data type (size) on the
369     // stack and in data structures.
type_native_mask()370     llvm::Type *type_native_mask() const { return m_llvm_type_native_mask; }
371 
372     /// Generate the appropriate llvm type definition for a TypeDesc
373     /// (this is the actual type, for example when we allocate it).
374     llvm::Type *llvm_type (const OIIO::TypeDesc &typedesc);
375 
376     /// Generate the appropriate llvm vector type definition for a TypeDesc
377     /// (this is the actual type, for example when we allocate it).
378     llvm::Type *llvm_vector_type (const OIIO::TypeDesc &typedesc);
379 
380     /// This will return a llvm::Type that is the same as a C union of
381     /// the given types[].
382     llvm::Type *type_union (const std::vector<llvm::Type *> &types);
383 
384     /// This will return a llvm::Type that is the same as a C struct
385     /// comprised fields of the given types[], in order.
386     llvm::Type *type_struct (const std::vector<llvm::Type *> &types,
387                              const std::string &name="", bool is_packed=false);
388 
389 
390     /// Return the llvm::Type that is a pointer to the given llvm type.
391     llvm::Type *type_ptr (llvm::Type *type);
392 
393     /// Return the llvm::Type that is an array of n elements of the given
394     /// llvm type.
395     llvm::Type *type_array (llvm::Type *type, int n);
396 
397     /// Return an llvm::FunctionType that describes a function with the
398     /// given return types, parameter types (in a vector), and whether it
399     /// uses varargs conventions.
400     llvm::FunctionType *type_function (llvm::Type *rettype,
401                                        const std::vector<llvm::Type*> &params,
402                                        bool varargs=false);
403 
404     /// Return a llvm::PointerType that's a pointer to the described
405     /// kind of function.
406     llvm::PointerType *type_function_ptr (llvm::Type *rettype,
407                                           const std::vector<llvm::Type*> &params,
408                                           bool varargs=false);
409 
410     /// Return the human-readable name of the type of the llvm type.
411     std::string llvm_typename (llvm::Type *type) const;
412 
413     /// Return the llvm::Type of the llvm value.
414     llvm::Type *llvm_typeof (llvm::Value *val) const;
415 
416     /// Return the human-readable name of the type of the llvm value.
417     std::string llvm_typenameof (llvm::Value *val) const;
418 
419     /// Return an llvm::Value holding the given floating point constant.
420     llvm::Value *constant (float f);
421 
422     /// Return an llvm::Value holding the given integer constant.
423     llvm::Value *constant (int i);
424 
425     /// Return an llvm::Value holding the given integer constant.
426     llvm::Value *constant8 (int i);
427     llvm::Value *constant16 (uint16_t i);
428     llvm::Value *constant64 (uint64_t i);
429     llvm::Value *constant128 (uint64_t i);
430     llvm::Value *constant128 (uint64_t left, uint64_t right);
431 
432     /// Return an llvm::Value holding the given size_t constant.
433     llvm::Value *constant (size_t i);
434 
435     /// Return an llvm::Value holding the given bool constant.
436     /// Change the name so it doesn't get mixed up with int.
437     llvm::Value *constant_bool (bool b);
438 
439     /// Return a constant void pointer to the given constant address.
440     /// If the type specified is NULL, it will make a 'void *'.
441     llvm::Value *constant_ptr (void *p, llvm::PointerType *type=NULL);
442 
443     /// Return an llvm::Value holding the given string constant.
444     llvm::Value *constant (ustring s);
constant(string_view s)445     llvm::Value *constant (string_view s) {
446         return constant(ustring(s));
447     }
448 
449     /// Return an llvm::Value for a long long that is a packed
450     /// representation of a TypeDesc.
451     llvm::Value *constant (const OIIO::TypeDesc &type);
452 
453     // Return "wide" (SIMD vector) constants of various types.
454     llvm::Value *wide_constant (float f);
455     llvm::Value *wide_constant (int i);
456     llvm::Value *wide_constant (size_t i);
457     llvm::Value *wide_constant_bool (bool b);
458     llvm::Value *wide_constant (ustring s);
wide_constant(string_view s)459     llvm::Value *wide_constant (string_view s) {
460         return wide_constant(ustring(s));
461     }
462 
463     /// Return an llvm::Value holding wide version of the given scalar
464     /// constant.
465     llvm::Value *wide_constant (llvm::Value *constant_val);
466 
467     /// Return an llvm::Value for a void* variable with value NULL.
468     llvm::Value *void_ptr_null ();
469 
470     /// Cast the pointer variable specified by val to the kind of pointer
471     /// described by type (as an llvm pointer type).
472     llvm::Value *ptr_cast (llvm::Value* val, llvm::Type *type);
ptr_cast(llvm::Value * val,llvm::PointerType * type)473     llvm::Value *ptr_cast (llvm::Value* val, llvm::PointerType *type) {
474         return ptr_cast (val, (llvm::Type *)type);
475     }
476 
477     /// Cast the pointer variable specified by val to a pointer to the type
478     /// described by type (as an llvm data type).
479     llvm::Value *ptr_to_cast (llvm::Value* val, llvm::Type *type);
480 
481     /// Cast the pointer variable specified by val to a pointer to the given
482     /// data type, return the llvm::Value of the new pointer.
483     llvm::Value *ptr_cast (llvm::Value* val, const OIIO::TypeDesc &type);
484 
485     llvm::Value *wide_ptr_cast (llvm::Value* val, const OIIO::TypeDesc &type);
486 
487     /// Cast the variable specified by val to a pointer of type void*,
488     /// return the llvm::Value of the new pointer.
489     llvm::Value *int_to_ptr_cast (llvm::Value* val);
490 
491     /// Cast the pointer variable specified by val to a pointer of type
492     /// void* return the llvm::Value of the new pointer.
493     llvm::Value *void_ptr (llvm::Value* val);
494 
495     /// Generate a pointer that is (ptrtype)((char *)ptr + offset).
496     /// If ptrtype is NULL, just return a void*.
497     llvm::Value *offset_ptr (llvm::Value *ptr, int offset,
498                              llvm::Type *ptrtype=NULL);
499 
500     /// Generate an alloca instruction to allocate space for n copies of the
501     /// given llvm type, and return its pointer.
502     llvm::Value *op_alloca (llvm::Type *llvmtype, int n=1,
503                             const std::string &name=std::string(), int align=0);
504     llvm::Value *op_alloca (llvm::PointerType *llvmtype, int n=1,
505                             const std::string &name=std::string(), int align=0) {
506         return op_alloca ((llvm::Type *)llvmtype, n, name, align);
507     }
508 
509     /// Generate an alloca instruction to allocate space for n copies of the
510     /// given type, and return its pointer.
511     llvm::Value *op_alloca (const OIIO::TypeDesc &type, int n=1,
512                             const std::string &name=std::string(), int align=0);
513 
514     /// Generate an alloca instruction to allocate space for n copies of the
515     /// given type, and return its pointer.
516     llvm::Value *wide_op_alloca (const OIIO::TypeDesc &type, int n=1,
517                                  const std::string &name=std::string(), int align=0);
518 
519     /// Generate code for a call to the function pointer, with the given
520     /// arg list.  Return an llvm::Value* corresponding to the return
521     /// value of the function, if any.
522     llvm::Value *call_function (llvm::Value *func, cspan<llvm::Value *> args);
523     /// Generate code for a call to the named function with the given arg
524     /// list.  Return an llvm::Value* corresponding to the return value of
525     /// the function, if any.
526     llvm::Value *call_function (const char *name, cspan<llvm::Value *> args);
527 
call_function(const char * name,llvm::Value * arg0)528     llvm::Value *call_function (const char *name, llvm::Value *arg0) {
529         return call_function (name, cspan<llvm::Value*>(&arg0, 1));
530     }
call_function(const char * name,llvm::Value * arg0,llvm::Value * arg1)531     llvm::Value *call_function (const char *name, llvm::Value *arg0,
532                                 llvm::Value *arg1) {
533         return call_function (name, { arg0, arg1 });
534     }
call_function(const char * name,llvm::Value * arg0,llvm::Value * arg1,llvm::Value * arg2)535     llvm::Value *call_function (const char *name, llvm::Value *arg0,
536                                 llvm::Value *arg1, llvm::Value *arg2) {
537         return call_function (name, { arg0, arg1, arg2 });
538     }
call_function(const char * name,llvm::Value * arg0,llvm::Value * arg1,llvm::Value * arg2,llvm::Value * arg3)539     llvm::Value *call_function (const char *name, llvm::Value *arg0,
540                                 llvm::Value *arg1, llvm::Value *arg2,
541                                 llvm::Value *arg3) {
542         return call_function (name, { arg0, arg1, arg2, arg3 });
543     }
544 
545     /// Mark the function call (which MUST be the value returned by a
546     /// call_function()) as using the 'fast' calling convention.
547     void mark_fast_func_call (llvm::Value *funccall);
548 
549     /// Set the code insertion point for subsequent ops to block.
550     void set_insert_point (llvm::BasicBlock *block);
551 
552     /// Return op from a void function.  If retval is NULL, we are returning
553     /// from a void function.
554     void op_return (llvm::Value *retval=NULL);
555 
556     /// Create a branch instruction to block and establish that as the as
557     /// the new code insertion point.
558     void op_branch (llvm::BasicBlock *block);
559 
560     /// Create a conditional branch instruction to trueblock if cond is
561     /// true, to falseblock if cond is false, and establish trueblock as the
562     /// new insertion point).
563     void op_branch (llvm::Value *cond, llvm::BasicBlock *trueblock,
564                     llvm::BasicBlock *falseblock);
565 
566     /// Generate code for a memset.
567     void op_memset (llvm::Value *ptr, int val, int len, int align=1);
568 
569     /// Generate code for variable size memset
570     void op_memset (llvm::Value *ptr, int val, llvm::Value *len, int align=1);
571 
572     /// Generate code for a memcpy.
573     void op_memcpy (llvm::Value *dst, llvm::Value *src, int len, int align=1);
574 
575     /// Generate code for a memcpy.
576     void op_memcpy (llvm::Value *dst, int dstalign,
577                     llvm::Value *src, int srcalign, int len);
578 
579     /// Dereference a pointer:  return *ptr
580     llvm::Value *op_load (llvm::Value *ptr);
581 
582     /// Store to a dereferenced pointer:   *ptr = val
583     void op_store (llvm::Value *val, llvm::Value *ptr);
584 
585     // N.B. "GEP" -- GetElementPointer -- is a particular LLVM-ism that is
586     // the means for retrieving elements from some kind of aggregate: the
587     // i-th field in a struct, the i-th element of an array.  They can be
588     // chained together, to get at items in a recursive hierarchy.
589 
590     /// Generate a GEP (get element pointer) where the element index is an
591     /// llvm::Value, which can be generated from either a constant or a
592     /// runtime-computed integer element index.
593     llvm::Value *GEP (llvm::Value *ptr, llvm::Value *elem);
594 
595     /// Generate a GEP (get element pointer) with an integer element
596     /// offset.
597     llvm::Value *GEP (llvm::Value *ptr, int elem);
598 
599     /// Generate a GEP (get element pointer) with two integer element
600     /// offsets.  This is just a special (and common) case of GEP where
601     /// we have a 2-level hierarchy and we have fixed element indices
602     /// that are known at compile time.
603     llvm::Value *GEP (llvm::Value *ptr, int elem1, int elem2);
604 
605     // Arithmetic ops.  It auto-detects the type (int vs float).
606     // ...
607     llvm::Value *op_add (llvm::Value *a, llvm::Value *b);
608     llvm::Value *op_sub (llvm::Value *a, llvm::Value *b);
609     llvm::Value *op_neg (llvm::Value *a);
610     llvm::Value *op_mul (llvm::Value *a, llvm::Value *b);
611     llvm::Value *op_div (llvm::Value *a, llvm::Value *b);
612     llvm::Value *op_mod (llvm::Value *a, llvm::Value *b);
613     llvm::Value *op_float_to_int (llvm::Value *a);
614     llvm::Value *op_int_to_float (llvm::Value *a);
615     llvm::Value *op_bool_to_int (llvm::Value *a);
616     llvm::Value *op_bool_to_float (llvm::Value *a);
617     llvm::Value *op_int_to_bool (llvm::Value *a);
618     llvm::Value *op_float_to_double (llvm::Value *a);
619     llvm::Value *op_int_to_longlong (llvm::Value *a);
620 
621     llvm::Value *op_and (llvm::Value *a, llvm::Value *b);
622     llvm::Value *op_or (llvm::Value *a, llvm::Value *b);
623     llvm::Value *op_xor (llvm::Value *a, llvm::Value *b);
624     llvm::Value *op_shl (llvm::Value *a, llvm::Value *b);
625     llvm::Value *op_shr (llvm::Value *a, llvm::Value *b);
626     llvm::Value *op_not (llvm::Value *a);
627 
628     /// Generate IR for (cond ? a : b).  Cond should be a bool.
629     llvm::Value *op_select (llvm::Value *cond, llvm::Value *a, llvm::Value *b);
630 
631     /// Extracts a scalar value from a vector type
632     llvm::Value *op_extract(llvm::Value *a, int index);
633     llvm::Value *op_extract(llvm::Value *a, llvm::Value * index);
634     llvm::Value *op_insert(llvm::Value *v, llvm::Value *a, int index);
635 
636     // Comparison ops.  It auto-detects the type (int vs float).
637     // ordered only applies to float comparisons -- ordered means the
638     // comparison will succeed only if neither arg is NaN.
639     // ...
640     llvm::Value *op_eq (llvm::Value *a, llvm::Value *b, bool ordered=false);
641     llvm::Value *op_ne (llvm::Value *a, llvm::Value *b, bool ordered=false);
642     llvm::Value *op_gt (llvm::Value *a, llvm::Value *b, bool ordered=false);
643     llvm::Value *op_lt (llvm::Value *a, llvm::Value *b, bool ordered=false);
644     llvm::Value *op_ge (llvm::Value *a, llvm::Value *b, bool ordered=false);
645     llvm::Value *op_le (llvm::Value *a, llvm::Value *b, bool ordered=false);
646 
647     llvm::Value *op_fabs(llvm::Value *v);
648     llvm::Value *op_is_not_finite(llvm::Value *v);
649 
650     /// Write the module's bitcode (after compilation/optimization) to a
651     /// file.  If err is not NULL, errors will be deposited there.
652     void write_bitcode_file (const char *filename, std::string *err=NULL);
653 
654     /// Generate PTX for the current Module and return it as a string
655     bool ptx_compile_group (llvm::Module* lib_module, const std::string& name,
656                             std::string& out);
657 
658     /// Convert a whole module's bitcode to a string.
659     std::string bitcode_string (llvm::Module *module);
660 
661     /// Convert one function's bitcode to a string.
662     std::string bitcode_string (llvm::Function *func);
663 
664     /// Delete the IR for the body of the given function to reclaim its
665     /// memory (only helpful if we know we won't use it again).
666     void delete_func_body (llvm::Function *func);
667 
668     /// Is the function empty, except for simply a ret statement?
669     bool func_is_empty (llvm::Function *func);
670 
671     std::string func_name (llvm::Function *f);
672 
673     static size_t total_jit_memory_held ();
674 
675 private:
676     class MemoryManager;
677     class IRBuilder;
678 
679     void SetupLLVM ();
680     IRBuilder& builder();
681 
682     int m_debug;
683     bool m_dumpasm = false;
684     bool m_jit_fma = false;
685     bool m_jit_aggressive = false;
686     PerThreadInfo::Impl *m_thread;
687     llvm::LLVMContext *m_llvm_context;
688     llvm::Module *m_llvm_module;
689     IRBuilder *m_builder;
690     llvm::SectionMemoryManager *m_llvm_jitmm;
691     llvm::Function *m_current_function;
692     llvm::legacy::PassManager *m_llvm_module_passes;
693     llvm::legacy::FunctionPassManager *m_llvm_func_passes;
694     llvm::ExecutionEngine *m_llvm_exec;
695     TargetISA m_target_isa = TargetISA::UNKNOWN;
696     std::vector<llvm::BasicBlock *> m_return_block;     // stack for func call
697     std::vector<llvm::BasicBlock *> m_loop_after_block; // stack for break
698     std::vector<llvm::BasicBlock *> m_loop_step_block;  // stack for continue
699 
700     llvm::Type *m_llvm_type_float;
701     llvm::Type *m_llvm_type_double;
702     llvm::Type *m_llvm_type_int;
703     llvm::Type *m_llvm_type_int8;
704     llvm::Type *m_llvm_type_int16;
705     llvm::Type *m_llvm_type_addrint;
706     llvm::Type *m_llvm_type_bool;
707     llvm::Type *m_llvm_type_char;
708     llvm::Type *m_llvm_type_longlong;
709     llvm::Type *m_llvm_type_void;
710     llvm::Type *m_llvm_type_triple;
711     llvm::Type *m_llvm_type_matrix;
712     llvm::PointerType *m_llvm_type_void_ptr;
713     llvm::PointerType *m_llvm_type_ustring_ptr;
714     llvm::PointerType *m_llvm_type_char_ptr;
715     llvm::PointerType *m_llvm_type_bool_ptr;
716     llvm::PointerType *m_llvm_type_int_ptr;
717     llvm::PointerType *m_llvm_type_float_ptr;
718     llvm::PointerType *m_llvm_type_longlong_ptr;
719     llvm::PointerType *m_llvm_type_triple_ptr;
720     llvm::PointerType *m_llvm_type_matrix_ptr;
721     llvm::PointerType *m_llvm_type_double_ptr;
722 
723     int m_vector_width;
724     llvm::Type * m_llvm_type_wide_float;
725     llvm::Type * m_llvm_type_wide_double;
726     llvm::Type * m_llvm_type_wide_int;
727     llvm::Type * m_llvm_type_wide_bool;
728     llvm::Type * m_llvm_type_wide_char;
729     llvm::Type * m_llvm_type_wide_longlong;
730     llvm::Type * m_llvm_type_wide_triple;
731     llvm::Type * m_llvm_type_wide_matrix;
732     llvm::Type * m_llvm_type_wide_void_ptr;
733     llvm::Type * m_llvm_type_wide_ustring_ptr;
734     llvm::PointerType * m_llvm_type_wide_char_ptr;
735     llvm::PointerType * m_llvm_type_wide_int_ptr;
736     llvm::PointerType * m_llvm_type_wide_bool_ptr;
737     llvm::PointerType * m_llvm_type_wide_float_ptr;
738     llvm::Type * m_llvm_type_native_mask;
739 
740     bool m_supports_masked_stores = false;
741     bool m_supports_llvm_bit_masks_natively = false;
742     bool m_supports_avx512f = false;
743     bool m_supports_avx2 = false;
744     bool m_supports_avx = false;
745 
746     // Profiling Info
747     llvm::JITEventListener* mVTuneNotifier;
748 
749     // Debug Info
750     llvm::DIFile * getOrCreateDebugFileFor(const std::string &file_name);
751     llvm::DIScope * getCurrentDebugScope() const;
752     llvm::DILocation *getCurrentInliningSite() const;
753 
754     llvm::DIBuilder* m_llvm_debug_builder;
755     llvm::DICompileUnit *mDebugCU;
756     std::vector<llvm::DIScope *> mLexicalBlocks;
757 
758     typedef std::unordered_map<std::string, llvm::DIFile *> FileByNameType;
759     FileByNameType mDebugFileByName;
760     std::vector<llvm::DILocation *> mInliningSites;
761     llvm::DISubroutineType * mSubTypeForInlinedFunction;
762     bool m_ModuleIsFinalized;
763     bool m_ModuleIsPruned;
764 };
765 
766 
767 
768 }; // namespace pvt
769 OSL_NAMESPACE_EXIT
770