1 // Copyright 2020 The SwiftShader Authors. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //    http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "LLVMReactor.hpp"
16 
17 #include "Debug.hpp"
18 #include "ExecutableMemory.hpp"
19 #include "Routine.hpp"
20 
21 // TODO(b/143539525): Eliminate when warning has been fixed.
22 #ifdef _MSC_VER
23 __pragma(warning(push))
24     __pragma(warning(disable : 4146))  // unary minus operator applied to unsigned type, result still unsigned
25 #endif
26 
27 #include "llvm/ExecutionEngine/Orc/CompileUtils.h"
28 #include "llvm/ExecutionEngine/Orc/IRCompileLayer.h"
29 #include "llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h"
30 #include "llvm/ExecutionEngine/SectionMemoryManager.h"
31 #include "llvm/IR/LegacyPassManager.h"
32 #include "llvm/Support/Host.h"
33 #include "llvm/Support/TargetSelect.h"
34 #include "llvm/Transforms/InstCombine/InstCombine.h"
35 #include "llvm/Transforms/Instrumentation/MemorySanitizer.h"
36 #include "llvm/Transforms/Scalar.h"
37 #include "llvm/Transforms/Scalar/GVN.h"
38 
39 #ifdef _MSC_VER
40     __pragma(warning(pop))
41 #endif
42 
43 #if defined(_WIN64)
44         extern "C" void __chkstk();
45 #elif defined(_WIN32)
46 extern "C" void _chkstk();
47 #endif
48 
49 #ifdef __ARM_EABI__
50 extern "C" signed __aeabi_idivmod();
51 #endif
52 
53 #if __has_feature(memory_sanitizer)
54 #	include "sanitizer/msan_interface.h"  // TODO(b/155148722): Remove when we no longer unpoison all writes.
55 
56 #	include <dlfcn.h>  // dlsym()
57 
58 // MemorySanitizer uses thread-local storage (TLS) data arrays for passing around
59 // the 'shadow' values of function arguments and return values. The LLVM JIT can't
60 // access TLS directly, but it calls __emutls_get_address() to obtain the address.
61 // Typically, it would be passed a pointer to an __emutls_control structure with a
62 // name starting with "__emutls_v." that represents the TLS. Both the address of
63 // __emutls_get_address and the __emutls_v. structures are provided to the JIT by
64 // the symbol resolver, which can be overridden.
65 // We take advantage of this by substituting __emutls_get_address() with our own
66 // implementation, namely rr::getTLSAddress(), and substituting the __emutls_v
67 // variables with rr::MSanTLS enums. getTLSAddress() can then provide the address
68 // of the real TLS variable corresponding to the enum, in statically compiled C++.
69 
70 // Forward declare the real TLS variables used by MemorySanitizer. These are
71 // defined in llvm-project/compiler-rt/lib/msan/msan.cpp.
72 extern __thread unsigned long long __msan_param_tls[];
73 extern __thread unsigned long long __msan_retval_tls[];
74 extern __thread unsigned long long __msan_va_arg_tls[];
75 extern __thread unsigned long long __msan_va_arg_overflow_size_tls;
76 
77 namespace rr {
78 
79 enum class MSanTLS
80 {
81 	param = 1,            // __msan_param_tls
82 	retval,               // __msan_retval_tls
83 	va_arg,               // __msan_va_arg_tls
84 	va_arg_overflow_size  // __msan_va_arg_overflow_size_tls
85 };
86 
getTLSAddress(void * control)87 static void *getTLSAddress(void *control)
88 {
89 	auto tlsIndex = static_cast<MSanTLS>(reinterpret_cast<uintptr_t>(control));
90 	switch(tlsIndex)
91 	{
92 
93 		case MSanTLS::param: return reinterpret_cast<void *>(&__msan_param_tls);
94 		case MSanTLS::retval: return reinterpret_cast<void *>(&__msan_retval_tls);
95 		case MSanTLS::va_arg: return reinterpret_cast<void *>(&__msan_va_arg_tls);
96 		case MSanTLS::va_arg_overflow_size: return reinterpret_cast<void *>(&__msan_va_arg_overflow_size_tls);
97 		default:
98 			UNSUPPORTED("MemorySanitizer used an unrecognized TLS variable: %d", tlsIndex);
99 			return nullptr;
100 	}
101 }
102 
103 }  // namespace rr
104 #endif
105 
106 namespace {
107 
108 // JITGlobals is a singleton that holds all the immutable machine specific
109 // information for the host device.
110 class JITGlobals
111 {
112 public:
113 	static JITGlobals *get();
114 
115 	llvm::orc::JITTargetMachineBuilder getTargetMachineBuilder(rr::Optimization::Level optLevel) const;
116 	const llvm::DataLayout &getDataLayout() const;
117 	const llvm::Triple &getTargetTriple() const;
118 
119 private:
120 	JITGlobals(llvm::orc::JITTargetMachineBuilder &&jitTargetMachineBuilder, llvm::DataLayout &&dataLayout);
121 
122 	static llvm::CodeGenOpt::Level toLLVM(rr::Optimization::Level level);
123 
124 	const llvm::orc::JITTargetMachineBuilder jitTargetMachineBuilder;
125 	const llvm::DataLayout dataLayout;
126 };
127 
get()128 JITGlobals *JITGlobals::get()
129 {
130 	static JITGlobals instance = [] {
131 		llvm::InitializeNativeTarget();
132 		llvm::InitializeNativeTargetAsmPrinter();
133 		llvm::InitializeNativeTargetAsmParser();
134 
135 		// TODO(b/171236524): JITTargetMachineBuilder::detectHost() currently uses the target triple of the host,
136 		// rather than a valid triple for the current process. Once fixed, we can use that function instead.
137 		llvm::orc::JITTargetMachineBuilder jitTargetMachineBuilder(llvm::Triple(LLVM_DEFAULT_TARGET_TRIPLE));
138 
139 		// Retrieve host CPU name and sub-target features and add them to builder.
140 		// Relocation model, code model and codegen opt level are kept to default values.
141 		llvm::StringMap<bool> cpuFeatures;
142 		bool ok = llvm::sys::getHostCPUFeatures(cpuFeatures);
143 
144 #if defined(__i386__) || defined(__x86_64__) || \
145     (defined(__linux__) && (defined(__arm__) || defined(__aarch64__)))
146 		ASSERT_MSG(ok, "llvm::sys::getHostCPUFeatures returned false");
147 #else
148 		(void)ok;  // getHostCPUFeatures always returns false on other platforms
149 #endif
150 
151 		for(auto &feature : cpuFeatures)
152 		{
153 			jitTargetMachineBuilder.getFeatures().AddFeature(feature.first(), feature.second);
154 		}
155 
156 #if LLVM_VERSION_MAJOR >= 11 /* TODO(b/165000222): Unconditional after LLVM 11 upgrade */
157 		jitTargetMachineBuilder.setCPU(std::string(llvm::sys::getHostCPUName()));
158 #else
159 		jitTargetMachineBuilder.setCPU(llvm::sys::getHostCPUName());
160 #endif
161 
162 		// Reactor's MemorySanitizer support depends on intercepting __emutls_get_address calls.
163 		ASSERT(!__has_feature(memory_sanitizer) || (jitTargetMachineBuilder.getOptions().ExplicitEmulatedTLS &&
164 		                                            jitTargetMachineBuilder.getOptions().EmulatedTLS));
165 
166 		auto dataLayout = jitTargetMachineBuilder.getDefaultDataLayoutForTarget();
167 		ASSERT_MSG(dataLayout, "JITTargetMachineBuilder::getDefaultDataLayoutForTarget() failed");
168 
169 		return JITGlobals(std::move(jitTargetMachineBuilder), std::move(dataLayout.get()));
170 	}();
171 
172 	return &instance;
173 }
174 
getTargetMachineBuilder(rr::Optimization::Level optLevel) const175 llvm::orc::JITTargetMachineBuilder JITGlobals::getTargetMachineBuilder(rr::Optimization::Level optLevel) const
176 {
177 	llvm::orc::JITTargetMachineBuilder out = jitTargetMachineBuilder;
178 	out.setCodeGenOptLevel(toLLVM(optLevel));
179 
180 	return out;
181 }
182 
getDataLayout() const183 const llvm::DataLayout &JITGlobals::getDataLayout() const
184 {
185 	return dataLayout;
186 }
187 
getTargetTriple() const188 const llvm::Triple &JITGlobals::getTargetTriple() const
189 {
190 	return jitTargetMachineBuilder.getTargetTriple();
191 }
192 
JITGlobals(llvm::orc::JITTargetMachineBuilder && jitTargetMachineBuilder,llvm::DataLayout && dataLayout)193 JITGlobals::JITGlobals(llvm::orc::JITTargetMachineBuilder &&jitTargetMachineBuilder, llvm::DataLayout &&dataLayout)
194     : jitTargetMachineBuilder(jitTargetMachineBuilder)
195     , dataLayout(dataLayout)
196 {
197 }
198 
toLLVM(rr::Optimization::Level level)199 llvm::CodeGenOpt::Level JITGlobals::toLLVM(rr::Optimization::Level level)
200 {
201 	switch(level)
202 	{
203 		case rr::Optimization::Level::None: return llvm::CodeGenOpt::None;
204 		case rr::Optimization::Level::Less: return llvm::CodeGenOpt::Less;
205 		case rr::Optimization::Level::Default: return llvm::CodeGenOpt::Default;
206 		case rr::Optimization::Level::Aggressive: return llvm::CodeGenOpt::Aggressive;
207 		default: UNREACHABLE("Unknown Optimization Level %d", int(level));
208 	}
209 	return llvm::CodeGenOpt::Default;
210 }
211 
212 class MemoryMapper final : public llvm::SectionMemoryManager::MemoryMapper
213 {
214 public:
MemoryMapper()215 	MemoryMapper() {}
~MemoryMapper()216 	~MemoryMapper() final {}
217 
allocateMappedMemory(llvm::SectionMemoryManager::AllocationPurpose purpose,size_t numBytes,const llvm::sys::MemoryBlock * const nearBlock,unsigned flags,std::error_code & errorCode)218 	llvm::sys::MemoryBlock allocateMappedMemory(
219 	    llvm::SectionMemoryManager::AllocationPurpose purpose,
220 	    size_t numBytes, const llvm::sys::MemoryBlock *const nearBlock,
221 	    unsigned flags, std::error_code &errorCode) final
222 	{
223 		errorCode = std::error_code();
224 
225 		// Round up numBytes to page size.
226 		size_t pageSize = rr::memoryPageSize();
227 		numBytes = (numBytes + pageSize - 1) & ~(pageSize - 1);
228 
229 		bool need_exec =
230 		    purpose == llvm::SectionMemoryManager::AllocationPurpose::Code;
231 		void *addr = rr::allocateMemoryPages(
232 		    numBytes, flagsToPermissions(flags), need_exec);
233 		if(!addr)
234 			return llvm::sys::MemoryBlock();
235 		return llvm::sys::MemoryBlock(addr, numBytes);
236 	}
237 
protectMappedMemory(const llvm::sys::MemoryBlock & block,unsigned flags)238 	std::error_code protectMappedMemory(const llvm::sys::MemoryBlock &block,
239 	                                    unsigned flags)
240 	{
241 		// Round down base address to align with a page boundary. This matches
242 		// DefaultMMapper behavior.
243 		void *addr = block.base();
244 		size_t size = block.allocatedSize();
245 		size_t pageSize = rr::memoryPageSize();
246 		addr = reinterpret_cast<void *>(
247 		    reinterpret_cast<uintptr_t>(addr) & ~(pageSize - 1));
248 		size += reinterpret_cast<uintptr_t>(block.base()) -
249 		        reinterpret_cast<uintptr_t>(addr);
250 
251 		rr::protectMemoryPages(addr, size, flagsToPermissions(flags));
252 		return std::error_code();
253 	}
254 
releaseMappedMemory(llvm::sys::MemoryBlock & block)255 	std::error_code releaseMappedMemory(llvm::sys::MemoryBlock &block)
256 	{
257 		size_t size = block.allocatedSize();
258 
259 		rr::deallocateMemoryPages(block.base(), size);
260 		return std::error_code();
261 	}
262 
263 private:
flagsToPermissions(unsigned flags)264 	int flagsToPermissions(unsigned flags)
265 	{
266 		int result = 0;
267 		if(flags & llvm::sys::Memory::MF_READ)
268 		{
269 			result |= rr::PERMISSION_READ;
270 		}
271 		if(flags & llvm::sys::Memory::MF_WRITE)
272 		{
273 			result |= rr::PERMISSION_WRITE;
274 		}
275 		if(flags & llvm::sys::Memory::MF_EXEC)
276 		{
277 			result |= rr::PERMISSION_EXECUTE;
278 		}
279 		return result;
280 	}
281 };
282 
283 template<typename T>
alignUp(T val,T alignment)284 T alignUp(T val, T alignment)
285 {
286 	return alignment * ((val + alignment - 1) / alignment);
287 }
288 
alignedAlloc(size_t size,size_t alignment)289 void *alignedAlloc(size_t size, size_t alignment)
290 {
291 	ASSERT(alignment < 256);
292 	auto allocation = new uint8_t[size + sizeof(uint8_t) + alignment];
293 	auto aligned = allocation;
294 	aligned += sizeof(uint8_t);                                                                       // Make space for the base-address offset.
295 	aligned = reinterpret_cast<uint8_t *>(alignUp(reinterpret_cast<uintptr_t>(aligned), alignment));  // align
296 	auto offset = static_cast<uint8_t>(aligned - allocation);
297 	aligned[-1] = offset;
298 	return aligned;
299 }
300 
alignedFree(void * ptr)301 void alignedFree(void *ptr)
302 {
303 	auto aligned = reinterpret_cast<uint8_t *>(ptr);
304 	auto offset = aligned[-1];
305 	auto allocation = aligned - offset;
306 	delete[] allocation;
307 }
308 
309 template<typename T>
atomicLoad(void * ptr,void * ret,llvm::AtomicOrdering ordering)310 static void atomicLoad(void *ptr, void *ret, llvm::AtomicOrdering ordering)
311 {
312 	*reinterpret_cast<T *>(ret) = std::atomic_load_explicit<T>(reinterpret_cast<std::atomic<T> *>(ptr), rr::atomicOrdering(ordering));
313 }
314 
315 template<typename T>
atomicStore(void * ptr,void * val,llvm::AtomicOrdering ordering)316 static void atomicStore(void *ptr, void *val, llvm::AtomicOrdering ordering)
317 {
318 	std::atomic_store_explicit<T>(reinterpret_cast<std::atomic<T> *>(ptr), *reinterpret_cast<T *>(val), rr::atomicOrdering(ordering));
319 }
320 
321 #ifdef __ANDROID__
322 template<typename F>
sync_fetch_and_op(uint32_t volatile * ptr,uint32_t val,F f)323 static uint32_t sync_fetch_and_op(uint32_t volatile *ptr, uint32_t val, F f)
324 {
325 	// Build an arbitrary op out of looped CAS
326 	for(;;)
327 	{
328 		uint32_t expected = *ptr;
329 		uint32_t desired = f(expected, val);
330 
331 		if(expected == __sync_val_compare_and_swap_4(ptr, expected, desired))
332 		{
333 			return expected;
334 		}
335 	}
336 }
337 #endif
338 
339 #if LLVM_VERSION_MAJOR >= 11 /* TODO(b/165000222): Unconditional after LLVM 11 upgrade */
340 class ExternalSymbolGenerator : public llvm::orc::DefinitionGenerator
341 #else
342 class ExternalSymbolGenerator : public llvm::orc::JITDylib::DefinitionGenerator
343 #endif
344 {
345 	struct Atomic
346 	{
load__anon4dbc1b3a0111::ExternalSymbolGenerator::Atomic347 		static void load(size_t size, void *ptr, void *ret, llvm::AtomicOrdering ordering)
348 		{
349 			switch(size)
350 			{
351 				case 1: atomicLoad<uint8_t>(ptr, ret, ordering); break;
352 				case 2: atomicLoad<uint16_t>(ptr, ret, ordering); break;
353 				case 4: atomicLoad<uint32_t>(ptr, ret, ordering); break;
354 				case 8: atomicLoad<uint64_t>(ptr, ret, ordering); break;
355 				default:
356 					UNIMPLEMENTED_NO_BUG("Atomic::load(size: %d)", int(size));
357 			}
358 		}
store__anon4dbc1b3a0111::ExternalSymbolGenerator::Atomic359 		static void store(size_t size, void *ptr, void *ret, llvm::AtomicOrdering ordering)
360 		{
361 			switch(size)
362 			{
363 				case 1: atomicStore<uint8_t>(ptr, ret, ordering); break;
364 				case 2: atomicStore<uint16_t>(ptr, ret, ordering); break;
365 				case 4: atomicStore<uint32_t>(ptr, ret, ordering); break;
366 				case 8: atomicStore<uint64_t>(ptr, ret, ordering); break;
367 				default:
368 					UNIMPLEMENTED_NO_BUG("Atomic::store(size: %d)", int(size));
369 			}
370 		}
371 	};
372 
nop()373 	static void nop() {}
neverCalled()374 	static void neverCalled() { UNREACHABLE("Should never be called"); }
375 
coroutine_alloc_frame(size_t size)376 	static void *coroutine_alloc_frame(size_t size) { return alignedAlloc(size, 16); }
coroutine_free_frame(void * ptr)377 	static void coroutine_free_frame(void *ptr) { alignedFree(ptr); }
378 
379 #ifdef __ANDROID__
380 	// forwarders since we can't take address of builtins
sync_synchronize()381 	static void sync_synchronize() { __sync_synchronize(); }
sync_fetch_and_add_4(uint32_t * ptr,uint32_t val)382 	static uint32_t sync_fetch_and_add_4(uint32_t *ptr, uint32_t val) { return __sync_fetch_and_add_4(ptr, val); }
sync_fetch_and_and_4(uint32_t * ptr,uint32_t val)383 	static uint32_t sync_fetch_and_and_4(uint32_t *ptr, uint32_t val) { return __sync_fetch_and_and_4(ptr, val); }
sync_fetch_and_or_4(uint32_t * ptr,uint32_t val)384 	static uint32_t sync_fetch_and_or_4(uint32_t *ptr, uint32_t val) { return __sync_fetch_and_or_4(ptr, val); }
sync_fetch_and_xor_4(uint32_t * ptr,uint32_t val)385 	static uint32_t sync_fetch_and_xor_4(uint32_t *ptr, uint32_t val) { return __sync_fetch_and_xor_4(ptr, val); }
sync_fetch_and_sub_4(uint32_t * ptr,uint32_t val)386 	static uint32_t sync_fetch_and_sub_4(uint32_t *ptr, uint32_t val) { return __sync_fetch_and_sub_4(ptr, val); }
sync_lock_test_and_set_4(uint32_t * ptr,uint32_t val)387 	static uint32_t sync_lock_test_and_set_4(uint32_t *ptr, uint32_t val) { return __sync_lock_test_and_set_4(ptr, val); }
sync_val_compare_and_swap_4(uint32_t * ptr,uint32_t expected,uint32_t desired)388 	static uint32_t sync_val_compare_and_swap_4(uint32_t *ptr, uint32_t expected, uint32_t desired) { return __sync_val_compare_and_swap_4(ptr, expected, desired); }
389 
sync_fetch_and_max_4(uint32_t * ptr,uint32_t val)390 	static uint32_t sync_fetch_and_max_4(uint32_t *ptr, uint32_t val)
391 	{
392 		return sync_fetch_and_op(ptr, val, [](int32_t a, int32_t b) { return std::max(a, b); });
393 	}
sync_fetch_and_min_4(uint32_t * ptr,uint32_t val)394 	static uint32_t sync_fetch_and_min_4(uint32_t *ptr, uint32_t val)
395 	{
396 		return sync_fetch_and_op(ptr, val, [](int32_t a, int32_t b) { return std::min(a, b); });
397 	}
sync_fetch_and_umax_4(uint32_t * ptr,uint32_t val)398 	static uint32_t sync_fetch_and_umax_4(uint32_t *ptr, uint32_t val)
399 	{
400 		return sync_fetch_and_op(ptr, val, [](uint32_t a, uint32_t b) { return std::max(a, b); });
401 	}
sync_fetch_and_umin_4(uint32_t * ptr,uint32_t val)402 	static uint32_t sync_fetch_and_umin_4(uint32_t *ptr, uint32_t val)
403 	{
404 		return sync_fetch_and_op(ptr, val, [](uint32_t a, uint32_t b) { return std::min(a, b); });
405 	}
406 #endif
407 
408 	class Resolver
409 	{
410 	public:
411 		using FunctionMap = llvm::StringMap<void *>;
412 
413 		FunctionMap functions;
414 
Resolver()415 		Resolver()
416 		{
417 #ifdef ENABLE_RR_PRINT
418 			functions.try_emplace("rr::DebugPrintf", reinterpret_cast<void *>(rr::DebugPrintf));
419 #endif
420 			functions.try_emplace("nop", reinterpret_cast<void *>(nop));
421 			functions.try_emplace("floorf", reinterpret_cast<void *>(floorf));
422 			functions.try_emplace("nearbyintf", reinterpret_cast<void *>(nearbyintf));
423 			functions.try_emplace("truncf", reinterpret_cast<void *>(truncf));
424 			functions.try_emplace("printf", reinterpret_cast<void *>(printf));
425 			functions.try_emplace("puts", reinterpret_cast<void *>(puts));
426 			functions.try_emplace("fmodf", reinterpret_cast<void *>(fmodf));
427 
428 			functions.try_emplace("sinf", reinterpret_cast<void *>(sinf));
429 			functions.try_emplace("cosf", reinterpret_cast<void *>(cosf));
430 			functions.try_emplace("asinf", reinterpret_cast<void *>(asinf));
431 			functions.try_emplace("acosf", reinterpret_cast<void *>(acosf));
432 			functions.try_emplace("atanf", reinterpret_cast<void *>(atanf));
433 			functions.try_emplace("sinhf", reinterpret_cast<void *>(sinhf));
434 			functions.try_emplace("coshf", reinterpret_cast<void *>(coshf));
435 			functions.try_emplace("tanhf", reinterpret_cast<void *>(tanhf));
436 			functions.try_emplace("asinhf", reinterpret_cast<void *>(asinhf));
437 			functions.try_emplace("acoshf", reinterpret_cast<void *>(acoshf));
438 			functions.try_emplace("atanhf", reinterpret_cast<void *>(atanhf));
439 			functions.try_emplace("atan2f", reinterpret_cast<void *>(atan2f));
440 			functions.try_emplace("powf", reinterpret_cast<void *>(powf));
441 			functions.try_emplace("expf", reinterpret_cast<void *>(expf));
442 			functions.try_emplace("logf", reinterpret_cast<void *>(logf));
443 			functions.try_emplace("exp2f", reinterpret_cast<void *>(exp2f));
444 			functions.try_emplace("log2f", reinterpret_cast<void *>(log2f));
445 
446 			functions.try_emplace("sin", reinterpret_cast<void *>(static_cast<double (*)(double)>(sin)));
447 			functions.try_emplace("cos", reinterpret_cast<void *>(static_cast<double (*)(double)>(cos)));
448 			functions.try_emplace("asin", reinterpret_cast<void *>(static_cast<double (*)(double)>(asin)));
449 			functions.try_emplace("acos", reinterpret_cast<void *>(static_cast<double (*)(double)>(acos)));
450 			functions.try_emplace("atan", reinterpret_cast<void *>(static_cast<double (*)(double)>(atan)));
451 			functions.try_emplace("sinh", reinterpret_cast<void *>(static_cast<double (*)(double)>(sinh)));
452 			functions.try_emplace("cosh", reinterpret_cast<void *>(static_cast<double (*)(double)>(cosh)));
453 			functions.try_emplace("tanh", reinterpret_cast<void *>(static_cast<double (*)(double)>(tanh)));
454 			functions.try_emplace("asinh", reinterpret_cast<void *>(static_cast<double (*)(double)>(asinh)));
455 			functions.try_emplace("acosh", reinterpret_cast<void *>(static_cast<double (*)(double)>(acosh)));
456 			functions.try_emplace("atanh", reinterpret_cast<void *>(static_cast<double (*)(double)>(atanh)));
457 			functions.try_emplace("atan2", reinterpret_cast<void *>(static_cast<double (*)(double, double)>(atan2)));
458 			functions.try_emplace("pow", reinterpret_cast<void *>(static_cast<double (*)(double, double)>(pow)));
459 			functions.try_emplace("exp", reinterpret_cast<void *>(static_cast<double (*)(double)>(exp)));
460 			functions.try_emplace("log", reinterpret_cast<void *>(static_cast<double (*)(double)>(log)));
461 			functions.try_emplace("exp2", reinterpret_cast<void *>(static_cast<double (*)(double)>(exp2)));
462 			functions.try_emplace("log2", reinterpret_cast<void *>(static_cast<double (*)(double)>(log2)));
463 
464 			functions.try_emplace("atomic_load", reinterpret_cast<void *>(Atomic::load));
465 			functions.try_emplace("atomic_store", reinterpret_cast<void *>(Atomic::store));
466 
467 			// FIXME(b/119409619): use an allocator here so we can control all memory allocations
468 			functions.try_emplace("coroutine_alloc_frame", reinterpret_cast<void *>(coroutine_alloc_frame));
469 			functions.try_emplace("coroutine_free_frame", reinterpret_cast<void *>(coroutine_free_frame));
470 
471 			functions.try_emplace("memset", reinterpret_cast<void *>(memset));
472 
473 #ifdef __APPLE__
474 			functions.try_emplace("sincosf_stret", reinterpret_cast<void *>(__sincosf_stret));
475 #elif defined(__linux__)
476 			functions.try_emplace("sincosf", reinterpret_cast<void *>(sincosf));
477 #elif defined(_WIN64)
478 			functions.try_emplace("chkstk", reinterpret_cast<void *>(__chkstk));
479 #elif defined(_WIN32)
480 			functions.try_emplace("chkstk", reinterpret_cast<void *>(_chkstk));
481 #endif
482 
483 #ifdef __ARM_EABI__
484 			functions.try_emplace("aeabi_idivmod", reinterpret_cast<void *>(__aeabi_idivmod));
485 #endif
486 #ifdef __ANDROID__
487 			functions.try_emplace("aeabi_unwind_cpp_pr0", reinterpret_cast<void *>(neverCalled));
488 			functions.try_emplace("sync_synchronize", reinterpret_cast<void *>(sync_synchronize));
489 			functions.try_emplace("sync_fetch_and_add_4", reinterpret_cast<void *>(sync_fetch_and_add_4));
490 			functions.try_emplace("sync_fetch_and_and_4", reinterpret_cast<void *>(sync_fetch_and_and_4));
491 			functions.try_emplace("sync_fetch_and_or_4", reinterpret_cast<void *>(sync_fetch_and_or_4));
492 			functions.try_emplace("sync_fetch_and_xor_4", reinterpret_cast<void *>(sync_fetch_and_xor_4));
493 			functions.try_emplace("sync_fetch_and_sub_4", reinterpret_cast<void *>(sync_fetch_and_sub_4));
494 			functions.try_emplace("sync_lock_test_and_set_4", reinterpret_cast<void *>(sync_lock_test_and_set_4));
495 			functions.try_emplace("sync_val_compare_and_swap_4", reinterpret_cast<void *>(sync_val_compare_and_swap_4));
496 			functions.try_emplace("sync_fetch_and_max_4", reinterpret_cast<void *>(sync_fetch_and_max_4));
497 			functions.try_emplace("sync_fetch_and_min_4", reinterpret_cast<void *>(sync_fetch_and_min_4));
498 			functions.try_emplace("sync_fetch_and_umax_4", reinterpret_cast<void *>(sync_fetch_and_umax_4));
499 			functions.try_emplace("sync_fetch_and_umin_4", reinterpret_cast<void *>(sync_fetch_and_umin_4));
500 #endif
501 #if __has_feature(memory_sanitizer)
502 			functions.try_emplace("msan_unpoison", reinterpret_cast<void *>(__msan_unpoison));  // TODO(b/155148722): Remove when we no longer unpoison all writes.
503 
504 			functions.try_emplace("emutls_get_address", reinterpret_cast<void *>(rr::getTLSAddress));
505 			functions.try_emplace("emutls_v.__msan_retval_tls", reinterpret_cast<void *>(static_cast<uintptr_t>(rr::MSanTLS::retval)));
506 			functions.try_emplace("emutls_v.__msan_param_tls", reinterpret_cast<void *>(static_cast<uintptr_t>(rr::MSanTLS::param)));
507 			functions.try_emplace("emutls_v.__msan_va_arg_tls", reinterpret_cast<void *>(static_cast<uintptr_t>(rr::MSanTLS::va_arg)));
508 			functions.try_emplace("emutls_v.__msan_va_arg_overflow_size_tls", reinterpret_cast<void *>(static_cast<uintptr_t>(rr::MSanTLS::va_arg_overflow_size)));
509 #endif
510 		}
511 	};
512 
tryToGenerate(llvm::orc::LookupState & state,llvm::orc::LookupKind kind,llvm::orc::JITDylib & dylib,llvm::orc::JITDylibLookupFlags flags,const llvm::orc::SymbolLookupSet & set)513 	llvm::Error tryToGenerate(
514 #if LLVM_VERSION_MAJOR >= 11 /* TODO(b/165000222): Unconditional after LLVM 11 upgrade */
515 	    llvm::orc::LookupState &state,
516 #endif
517 	    llvm::orc::LookupKind kind,
518 	    llvm::orc::JITDylib &dylib,
519 	    llvm::orc::JITDylibLookupFlags flags,
520 	    const llvm::orc::SymbolLookupSet &set) override
521 	{
522 		static Resolver resolver;
523 
524 		llvm::orc::SymbolMap symbols;
525 
526 #if !defined(NDEBUG) || defined(DCHECK_ALWAYS_ON)
527 		std::string missing;
528 #endif  // !defined(NDEBUG) || defined(DCHECK_ALWAYS_ON)
529 
530 		for(auto symbol : set)
531 		{
532 			auto name = symbol.first;
533 
534 			// Trim off any underscores from the start of the symbol. LLVM likes
535 			// to append these on macOS.
536 			auto trimmed = (*name).drop_while([](char c) { return c == '_'; });
537 
538 			auto it = resolver.functions.find(trimmed.str());
539 			if(it != resolver.functions.end())
540 			{
541 				symbols[name] = llvm::JITEvaluatedSymbol(
542 				    static_cast<llvm::JITTargetAddress>(reinterpret_cast<uintptr_t>(it->second)),
543 				    llvm::JITSymbolFlags::Exported);
544 
545 				continue;
546 			}
547 
548 #if __has_feature(memory_sanitizer)
549 			// MemorySanitizer uses a dynamically linked runtime. Instrumented routines reference
550 			// some symbols from this library. Look them up dynamically in the default namespace.
551 			// Note this approach should not be used for other symbols, since they might not be
552 			// visible (e.g. due to static linking), we may wish to provide an alternate
553 			// implementation, and/or it would be a security vulnerability.
554 
555 			void *address = dlsym(RTLD_DEFAULT, (*symbol.first).data());
556 
557 			if(address)
558 			{
559 				symbols[name] = llvm::JITEvaluatedSymbol(
560 				    static_cast<llvm::JITTargetAddress>(reinterpret_cast<uintptr_t>(address)),
561 				    llvm::JITSymbolFlags::Exported);
562 
563 				continue;
564 			}
565 #endif
566 
567 #if !defined(NDEBUG) || defined(DCHECK_ALWAYS_ON)
568 			missing += (missing.empty() ? "'" : ", '") + (*name).str() + "'";
569 #endif
570 		}
571 
572 #if !defined(NDEBUG) || defined(DCHECK_ALWAYS_ON)
573 		// Missing functions will likely make the module fail in non-obvious ways.
574 		if(!missing.empty())
575 		{
576 			WARN("Missing external functions: %s", missing.c_str());
577 		}
578 #endif
579 
580 		if(symbols.empty())
581 		{
582 			return llvm::Error::success();
583 		}
584 
585 		return dylib.define(llvm::orc::absoluteSymbols(std::move(symbols)));
586 	}
587 };
588 
589 // As we must support different LLVM versions, add a generic Unwrap for functions that return Expected<T> or the actual T.
590 // TODO(b/165000222): Remove after LLVM 11 upgrade
591 template<typename T>
Unwrap(llvm::Expected<T> && v)592 auto &Unwrap(llvm::Expected<T> &&v)
593 {
594 	return v.get();
595 }
596 template<typename T>
Unwrap(T && v)597 auto &Unwrap(T &&v)
598 {
599 	return v;
600 }
601 
602 // JITRoutine is a rr::Routine that holds a LLVM JIT session, compiler and
603 // object layer as each routine may require different target machine
604 // settings and no Reactor routine directly links against another.
605 class JITRoutine : public rr::Routine
606 {
607 	llvm::orc::ExecutionSession session;
608 	llvm::orc::RTDyldObjectLinkingLayer objectLayer;
609 	llvm::orc::IRCompileLayer compileLayer;
610 	llvm::orc::MangleAndInterner mangle;
611 	llvm::orc::ThreadSafeContext ctx;
612 	llvm::orc::JITDylib &dylib;
613 	std::vector<const void *> addresses;
614 
615 public:
JITRoutine(std::unique_ptr<llvm::Module> module,llvm::Function ** funcs,size_t count,const rr::Config & config)616 	JITRoutine(
617 	    std::unique_ptr<llvm::Module> module,
618 	    llvm::Function **funcs,
619 	    size_t count,
620 	    const rr::Config &config)
621 	    : objectLayer(session, []() {
622 		    static MemoryMapper memoryMapper;
623 		    return std::make_unique<llvm::SectionMemoryManager>(&memoryMapper);
624 	    })
625 	    , compileLayer(session, objectLayer, std::make_unique<llvm::orc::ConcurrentIRCompiler>(JITGlobals::get()->getTargetMachineBuilder(config.getOptimization().getLevel())))
626 	    , mangle(session, JITGlobals::get()->getDataLayout())
627 	    , ctx(std::make_unique<llvm::LLVMContext>())
628 	    , dylib(Unwrap(session.createJITDylib("<routine>")))
629 	    , addresses(count)
630 	{
631 
632 #ifdef ENABLE_RR_DEBUG_INFO
633 		// TODO(b/165000222): Update this on next LLVM roll.
634 		// https://github.com/llvm/llvm-project/commit/98f2bb4461072347dcca7d2b1b9571b3a6525801
635 		// introduces RTDyldObjectLinkingLayer::registerJITEventListener().
636 		// The current API does not appear to have any way to bind the
637 		// rr::DebugInfo::NotifyFreeingObject event.
638 		objectLayer.setNotifyLoaded([](llvm::orc::VModuleKey,
639 		                               const llvm::object::ObjectFile &obj,
__anon4dbc1b3a0902(llvm::orc::VModuleKey, const llvm::object::ObjectFile &obj, const llvm::RuntimeDyld::LoadedObjectInfo &l) 640 		                               const llvm::RuntimeDyld::LoadedObjectInfo &l) {
641 			static std::atomic<uint64_t> unique_key{ 0 };
642 			rr::DebugInfo::NotifyObjectEmitted(unique_key++, obj, l);
643 		});
644 #endif  // ENABLE_RR_DEBUG_INFO
645 
646 		if(JITGlobals::get()->getTargetTriple().isOSBinFormatCOFF())
647 		{
648 			// Hack to support symbol visibility in COFF.
649 			// Matches hack in llvm::orc::LLJIT::createObjectLinkingLayer().
650 			// See documentation on these functions for more detail.
651 			objectLayer.setOverrideObjectFlagsWithResponsibilityFlags(true);
652 			objectLayer.setAutoClaimResponsibilityForObjectSymbols(true);
653 		}
654 
655 		dylib.addGenerator(std::make_unique<ExternalSymbolGenerator>());
656 
657 		llvm::SmallVector<llvm::orc::SymbolStringPtr, 8> names(count);
658 		for(size_t i = 0; i < count; i++)
659 		{
660 			auto func = funcs[i];
661 			func->setLinkage(llvm::GlobalValue::ExternalLinkage);
662 			func->setDoesNotThrow();
663 			if(!func->hasName())
664 			{
665 				func->setName("f" + llvm::Twine(i).str());
666 			}
667 			names[i] = mangle(func->getName());
668 		}
669 
670 		// Once the module is passed to the compileLayer, the
671 		// llvm::Functions are freed. Make sure funcs are not referenced
672 		// after this point.
673 		funcs = nullptr;
674 
675 		llvm::cantFail(compileLayer.add(dylib, llvm::orc::ThreadSafeModule(std::move(module), ctx)));
676 
677 		// Resolve the function addresses.
678 		for(size_t i = 0; i < count; i++)
679 		{
680 			auto symbol = session.lookup({ &dylib }, names[i]);
681 			ASSERT_MSG(symbol, "Failed to lookup address of routine function %d: %s",
682 			           (int)i, llvm::toString(symbol.takeError()).c_str());
683 			addresses[i] = reinterpret_cast<void *>(static_cast<intptr_t>(symbol->getAddress()));
684 		}
685 	}
686 
~JITRoutine()687 	~JITRoutine()
688 	{
689 #if LLVM_VERSION_MAJOR >= 11 /* TODO(b/165000222): Unconditional after LLVM 11 upgrade */
690 		if(auto err = session.endSession())
691 		{
692 			session.reportError(std::move(err));
693 		}
694 #endif
695 	}
696 
getEntry(int index) const697 	const void *getEntry(int index) const override
698 	{
699 		return addresses[index];
700 	}
701 };
702 
703 }  // anonymous namespace
704 
705 namespace rr {
706 
JITBuilder(const rr::Config & config)707 JITBuilder::JITBuilder(const rr::Config &config)
708     : config(config)
709     , module(new llvm::Module("", context))
710     , builder(new llvm::IRBuilder<>(context))
711 {
712 	module->setTargetTriple(LLVM_DEFAULT_TARGET_TRIPLE);
713 	module->setDataLayout(JITGlobals::get()->getDataLayout());
714 }
715 
optimize(const rr::Config & cfg)716 void JITBuilder::optimize(const rr::Config &cfg)
717 {
718 #ifdef ENABLE_RR_DEBUG_INFO
719 	if(debugInfo != nullptr)
720 	{
721 		return;  // Don't optimize if we're generating debug info.
722 	}
723 #endif  // ENABLE_RR_DEBUG_INFO
724 
725 	llvm::legacy::PassManager passManager;
726 
727 #if REACTOR_ENABLE_MEMORY_SANITIZER_INSTRUMENTATION
728 	if(__has_feature(memory_sanitizer))
729 	{
730 		passManager.add(llvm::createMemorySanitizerLegacyPassPass());
731 	}
732 #endif
733 
734 	for(auto pass : cfg.getOptimization().getPasses())
735 	{
736 		switch(pass)
737 		{
738 			case rr::Optimization::Pass::Disabled: break;
739 			case rr::Optimization::Pass::CFGSimplification: passManager.add(llvm::createCFGSimplificationPass()); break;
740 			case rr::Optimization::Pass::LICM: passManager.add(llvm::createLICMPass()); break;
741 			case rr::Optimization::Pass::AggressiveDCE: passManager.add(llvm::createAggressiveDCEPass()); break;
742 			case rr::Optimization::Pass::GVN: passManager.add(llvm::createGVNPass()); break;
743 			case rr::Optimization::Pass::InstructionCombining: passManager.add(llvm::createInstructionCombiningPass()); break;
744 			case rr::Optimization::Pass::Reassociate: passManager.add(llvm::createReassociatePass()); break;
745 			case rr::Optimization::Pass::DeadStoreElimination: passManager.add(llvm::createDeadStoreEliminationPass()); break;
746 			case rr::Optimization::Pass::SCCP: passManager.add(llvm::createSCCPPass()); break;
747 			case rr::Optimization::Pass::ScalarReplAggregates: passManager.add(llvm::createSROAPass()); break;
748 			case rr::Optimization::Pass::EarlyCSEPass: passManager.add(llvm::createEarlyCSEPass()); break;
749 			default:
750 				UNREACHABLE("pass: %d", int(pass));
751 		}
752 	}
753 
754 	passManager.run(*module);
755 }
756 
acquireRoutine(llvm::Function ** funcs,size_t count,const rr::Config & cfg)757 std::shared_ptr<rr::Routine> JITBuilder::acquireRoutine(llvm::Function **funcs, size_t count, const rr::Config &cfg)
758 {
759 	ASSERT(module);
760 	return std::make_shared<JITRoutine>(std::move(module), funcs, count, cfg);
761 }
762 
763 }  // namespace rr
764