1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
13  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
14  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
15  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
16  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
17  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
18  * USE OR OTHER DEALINGS IN THE SOFTWARE.
19  *
20  * The above copyright notice and this permission notice (including the
21  * next paragraph) shall be included in all copies or substantial portions
22  * of the Software.
23  *
24  */
25 
26 #include <llvm-c/Core.h>
27 #include <llvm/Analysis/TargetLibraryInfo.h>
28 #include <llvm/IR/IRBuilder.h>
29 #include <llvm/IR/LegacyPassManager.h>
30 #include <llvm/Target/TargetMachine.h>
31 #include <llvm/Transforms/IPO.h>
32 
33 #include <cstring>
34 
35 /* DO NOT REORDER THE HEADERS
36  * The LLVM headers need to all be included before any Mesa header,
37  * as they use the `restrict` keyword in ways that are incompatible
38  * with our #define in include/c99_compat.h
39  */
40 
41 #include "ac_binary.h"
42 #include "ac_llvm_util.h"
43 #include "ac_llvm_build.h"
44 #include "util/macros.h"
45 
ac_add_attr_dereferenceable(LLVMValueRef val,uint64_t bytes)46 void ac_add_attr_dereferenceable(LLVMValueRef val, uint64_t bytes)
47 {
48    llvm::Argument *A = llvm::unwrap<llvm::Argument>(val);
49    A->addAttr(llvm::Attribute::getWithDereferenceableBytes(A->getContext(), bytes));
50 }
51 
ac_add_attr_alignment(LLVMValueRef val,uint64_t bytes)52 void ac_add_attr_alignment(LLVMValueRef val, uint64_t bytes)
53 {
54    llvm::Argument *A = llvm::unwrap<llvm::Argument>(val);
55    A->addAttr(llvm::Attribute::getWithAlignment(A->getContext(), llvm::Align(bytes)));
56 }
57 
ac_is_sgpr_param(LLVMValueRef arg)58 bool ac_is_sgpr_param(LLVMValueRef arg)
59 {
60    llvm::Argument *A = llvm::unwrap<llvm::Argument>(arg);
61    llvm::AttributeList AS = A->getParent()->getAttributes();
62    unsigned ArgNo = A->getArgNo();
63    return AS.hasParamAttr(ArgNo, llvm::Attribute::InReg);
64 }
65 
ac_llvm_get_called_value(LLVMValueRef call)66 LLVMValueRef ac_llvm_get_called_value(LLVMValueRef call)
67 {
68    return LLVMGetCalledValue(call);
69 }
70 
ac_llvm_is_function(LLVMValueRef v)71 bool ac_llvm_is_function(LLVMValueRef v)
72 {
73    return LLVMGetValueKind(v) == LLVMFunctionValueKind;
74 }
75 
ac_create_module(LLVMTargetMachineRef tm,LLVMContextRef ctx)76 LLVMModuleRef ac_create_module(LLVMTargetMachineRef tm, LLVMContextRef ctx)
77 {
78    llvm::TargetMachine *TM = reinterpret_cast<llvm::TargetMachine *>(tm);
79    LLVMModuleRef module = LLVMModuleCreateWithNameInContext("mesa-shader", ctx);
80 
81    llvm::unwrap(module)->setTargetTriple(TM->getTargetTriple().getTriple());
82    llvm::unwrap(module)->setDataLayout(TM->createDataLayout());
83    return module;
84 }
85 
ac_create_builder(LLVMContextRef ctx,enum ac_float_mode float_mode)86 LLVMBuilderRef ac_create_builder(LLVMContextRef ctx, enum ac_float_mode float_mode)
87 {
88    LLVMBuilderRef builder = LLVMCreateBuilderInContext(ctx);
89 
90    llvm::FastMathFlags flags;
91 
92    switch (float_mode) {
93    case AC_FLOAT_MODE_DEFAULT:
94    case AC_FLOAT_MODE_DENORM_FLUSH_TO_ZERO:
95       break;
96 
97    case AC_FLOAT_MODE_DEFAULT_OPENGL:
98       /* Allow optimizations to treat the sign of a zero argument or
99        * result as insignificant.
100        */
101       flags.setNoSignedZeros(); /* nsz */
102 
103       /* Allow optimizations to use the reciprocal of an argument
104        * rather than perform division.
105        */
106       flags.setAllowReciprocal(); /* arcp */
107 
108       llvm::unwrap(builder)->setFastMathFlags(flags);
109       break;
110    }
111 
112    return builder;
113 }
114 
ac_enable_signed_zeros(struct ac_llvm_context * ctx)115 void ac_enable_signed_zeros(struct ac_llvm_context *ctx)
116 {
117    if (ctx->float_mode == AC_FLOAT_MODE_DEFAULT_OPENGL) {
118       auto *b = llvm::unwrap(ctx->builder);
119       llvm::FastMathFlags flags = b->getFastMathFlags();
120 
121       /* This disables the optimization of (x + 0), which is used
122        * to convert negative zero to positive zero.
123        */
124       flags.setNoSignedZeros(false);
125       b->setFastMathFlags(flags);
126    }
127 }
128 
ac_disable_signed_zeros(struct ac_llvm_context * ctx)129 void ac_disable_signed_zeros(struct ac_llvm_context *ctx)
130 {
131    if (ctx->float_mode == AC_FLOAT_MODE_DEFAULT_OPENGL) {
132       auto *b = llvm::unwrap(ctx->builder);
133       llvm::FastMathFlags flags = b->getFastMathFlags();
134 
135       flags.setNoSignedZeros();
136       b->setFastMathFlags(flags);
137    }
138 }
139 
ac_create_target_library_info(const char * triple)140 LLVMTargetLibraryInfoRef ac_create_target_library_info(const char *triple)
141 {
142    return reinterpret_cast<LLVMTargetLibraryInfoRef>(
143       new llvm::TargetLibraryInfoImpl(llvm::Triple(triple)));
144 }
145 
ac_dispose_target_library_info(LLVMTargetLibraryInfoRef library_info)146 void ac_dispose_target_library_info(LLVMTargetLibraryInfoRef library_info)
147 {
148    delete reinterpret_cast<llvm::TargetLibraryInfoImpl *>(library_info);
149 }
150 
151 /* Implementation of raw_pwrite_stream that works on malloc()ed memory for
152  * better compatibility with C code. */
153 struct raw_memory_ostream : public llvm::raw_pwrite_stream {
154    char *buffer;
155    size_t written;
156    size_t bufsize;
157 
raw_memory_ostreamraw_memory_ostream158    raw_memory_ostream()
159    {
160       buffer = NULL;
161       written = 0;
162       bufsize = 0;
163       SetUnbuffered();
164    }
165 
~raw_memory_ostreamraw_memory_ostream166    ~raw_memory_ostream()
167    {
168       free(buffer);
169    }
170 
clearraw_memory_ostream171    void clear()
172    {
173       written = 0;
174    }
175 
takeraw_memory_ostream176    void take(char *&out_buffer, size_t &out_size)
177    {
178       out_buffer = buffer;
179       out_size = written;
180       buffer = NULL;
181       written = 0;
182       bufsize = 0;
183    }
184 
185    void flush() = delete;
186 
write_implraw_memory_ostream187    void write_impl(const char *ptr, size_t size) override
188    {
189       if (unlikely(written + size < written))
190          abort();
191       if (written + size > bufsize) {
192          bufsize = MAX3(1024, written + size, bufsize / 3 * 4);
193          buffer = (char *)realloc(buffer, bufsize);
194          if (!buffer) {
195             fprintf(stderr, "amd: out of memory allocating ELF buffer\n");
196             abort();
197          }
198       }
199       memcpy(buffer + written, ptr, size);
200       written += size;
201    }
202 
pwrite_implraw_memory_ostream203    void pwrite_impl(const char *ptr, size_t size, uint64_t offset) override
204    {
205       assert(offset == (size_t)offset && offset + size >= offset && offset + size <= written);
206       memcpy(buffer + offset, ptr, size);
207    }
208 
current_posraw_memory_ostream209    uint64_t current_pos() const override
210    {
211       return written;
212    }
213 };
214 
215 /* The LLVM compiler is represented as a pass manager containing passes for
216  * optimizations, instruction selection, and code generation.
217  */
218 struct ac_compiler_passes {
219    raw_memory_ostream ostream;        /* ELF shader binary stream */
220    llvm::legacy::PassManager passmgr; /* list of passes */
221 };
222 
ac_create_llvm_passes(LLVMTargetMachineRef tm)223 struct ac_compiler_passes *ac_create_llvm_passes(LLVMTargetMachineRef tm)
224 {
225    struct ac_compiler_passes *p = new ac_compiler_passes();
226    if (!p)
227       return NULL;
228 
229    llvm::TargetMachine *TM = reinterpret_cast<llvm::TargetMachine *>(tm);
230 
231    if (TM->addPassesToEmitFile(p->passmgr, p->ostream, nullptr,
232                                llvm::CGFT_ObjectFile)) {
233       fprintf(stderr, "amd: TargetMachine can't emit a file of this type!\n");
234       delete p;
235       return NULL;
236    }
237    return p;
238 }
239 
ac_destroy_llvm_passes(struct ac_compiler_passes * p)240 void ac_destroy_llvm_passes(struct ac_compiler_passes *p)
241 {
242    delete p;
243 }
244 
245 /* This returns false on failure. */
ac_compile_module_to_elf(struct ac_compiler_passes * p,LLVMModuleRef module,char ** pelf_buffer,size_t * pelf_size)246 bool ac_compile_module_to_elf(struct ac_compiler_passes *p, LLVMModuleRef module,
247                               char **pelf_buffer, size_t *pelf_size)
248 {
249    p->passmgr.run(*llvm::unwrap(module));
250    p->ostream.take(*pelf_buffer, *pelf_size);
251    return true;
252 }
253 
ac_llvm_add_barrier_noop_pass(LLVMPassManagerRef passmgr)254 void ac_llvm_add_barrier_noop_pass(LLVMPassManagerRef passmgr)
255 {
256    llvm::unwrap(passmgr)->add(llvm::createBarrierNoopPass());
257 }
258 
ac_enable_global_isel(LLVMTargetMachineRef tm)259 void ac_enable_global_isel(LLVMTargetMachineRef tm)
260 {
261    reinterpret_cast<llvm::TargetMachine *>(tm)->setGlobalISel(true);
262 }
263 
ac_build_atomic_rmw(struct ac_llvm_context * ctx,LLVMAtomicRMWBinOp op,LLVMValueRef ptr,LLVMValueRef val,const char * sync_scope)264 LLVMValueRef ac_build_atomic_rmw(struct ac_llvm_context *ctx, LLVMAtomicRMWBinOp op,
265                                  LLVMValueRef ptr, LLVMValueRef val, const char *sync_scope)
266 {
267    llvm::AtomicRMWInst::BinOp binop;
268    switch (op) {
269    case LLVMAtomicRMWBinOpXchg:
270       binop = llvm::AtomicRMWInst::Xchg;
271       break;
272    case LLVMAtomicRMWBinOpAdd:
273       binop = llvm::AtomicRMWInst::Add;
274       break;
275    case LLVMAtomicRMWBinOpSub:
276       binop = llvm::AtomicRMWInst::Sub;
277       break;
278    case LLVMAtomicRMWBinOpAnd:
279       binop = llvm::AtomicRMWInst::And;
280       break;
281    case LLVMAtomicRMWBinOpNand:
282       binop = llvm::AtomicRMWInst::Nand;
283       break;
284    case LLVMAtomicRMWBinOpOr:
285       binop = llvm::AtomicRMWInst::Or;
286       break;
287    case LLVMAtomicRMWBinOpXor:
288       binop = llvm::AtomicRMWInst::Xor;
289       break;
290    case LLVMAtomicRMWBinOpMax:
291       binop = llvm::AtomicRMWInst::Max;
292       break;
293    case LLVMAtomicRMWBinOpMin:
294       binop = llvm::AtomicRMWInst::Min;
295       break;
296    case LLVMAtomicRMWBinOpUMax:
297       binop = llvm::AtomicRMWInst::UMax;
298       break;
299    case LLVMAtomicRMWBinOpUMin:
300       binop = llvm::AtomicRMWInst::UMin;
301       break;
302    case LLVMAtomicRMWBinOpFAdd:
303       binop = llvm::AtomicRMWInst::FAdd;
304       break;
305    default:
306       unreachable("invalid LLVMAtomicRMWBinOp");
307       break;
308    }
309    unsigned SSID = llvm::unwrap(ctx->context)->getOrInsertSyncScopeID(sync_scope);
310    return llvm::wrap(llvm::unwrap(ctx->builder)
311                         ->CreateAtomicRMW(binop, llvm::unwrap(ptr), llvm::unwrap(val),
312 #if LLVM_VERSION_MAJOR >= 13
313                                           llvm::MaybeAlign(0),
314 #endif
315                                           llvm::AtomicOrdering::SequentiallyConsistent, SSID));
316 }
317 
ac_build_atomic_cmp_xchg(struct ac_llvm_context * ctx,LLVMValueRef ptr,LLVMValueRef cmp,LLVMValueRef val,const char * sync_scope)318 LLVMValueRef ac_build_atomic_cmp_xchg(struct ac_llvm_context *ctx, LLVMValueRef ptr,
319                                       LLVMValueRef cmp, LLVMValueRef val, const char *sync_scope)
320 {
321    unsigned SSID = llvm::unwrap(ctx->context)->getOrInsertSyncScopeID(sync_scope);
322    return llvm::wrap(llvm::unwrap(ctx->builder)
323                         ->CreateAtomicCmpXchg(llvm::unwrap(ptr), llvm::unwrap(cmp),
324                                               llvm::unwrap(val),
325 #if LLVM_VERSION_MAJOR >= 13
326                                               llvm::MaybeAlign(0),
327 #endif
328                                               llvm::AtomicOrdering::SequentiallyConsistent,
329                                               llvm::AtomicOrdering::SequentiallyConsistent, SSID));
330 }
331