1 #include "LLVM_Runtime_Linker.h"
2 #include "LLVM_Headers.h"
3
4 namespace Halide {
5
6 using std::string;
7 using std::vector;
8
9 namespace {
10
parse_bitcode_file(llvm::StringRef buf,llvm::LLVMContext * context,const char * id)11 std::unique_ptr<llvm::Module> parse_bitcode_file(llvm::StringRef buf, llvm::LLVMContext *context, const char *id) {
12
13 llvm::MemoryBufferRef bitcode_buffer = llvm::MemoryBufferRef(buf, id);
14
15 auto ret_val = llvm::expectedToErrorOr(
16 llvm::parseBitcodeFile(bitcode_buffer, *context));
17 if (!ret_val) {
18 internal_error << "Could not parse built-in bitcode file " << id
19 << " llvm error is " << ret_val.getError() << "\n";
20 }
21
22 std::unique_ptr<llvm::Module> result(std::move(*ret_val));
23 result->setModuleIdentifier(id);
24
25 return result;
26 }
27
28 } // namespace
29
30 #define DECLARE_INITMOD(mod) \
31 extern "C" unsigned char halide_internal_initmod_##mod[]; \
32 extern "C" int halide_internal_initmod_##mod##_length; \
33 std::unique_ptr<llvm::Module> get_initmod_##mod(llvm::LLVMContext *context) { \
34 llvm::StringRef sb = llvm::StringRef((const char *)halide_internal_initmod_##mod, \
35 halide_internal_initmod_##mod##_length); \
36 return parse_bitcode_file(sb, context, #mod); \
37 }
38
39 #define DECLARE_NO_INITMOD(mod) \
40 std::unique_ptr<llvm::Module> get_initmod_##mod(llvm::LLVMContext *, bool, bool) { \
41 user_error << "Halide was compiled without support for this target\n"; \
42 return std::unique_ptr<llvm::Module>(); \
43 } \
44 std::unique_ptr<llvm::Module> get_initmod_##mod##_ll(llvm::LLVMContext *) { \
45 user_error << "Halide was compiled without support for this target\n"; \
46 return std::unique_ptr<llvm::Module>(); \
47 }
48
49 #define DECLARE_CPP_INITMOD(mod) \
50 DECLARE_INITMOD(mod##_32_debug) \
51 DECLARE_INITMOD(mod##_64_debug) \
52 DECLARE_INITMOD(mod##_32) \
53 DECLARE_INITMOD(mod##_64) \
54 std::unique_ptr<llvm::Module> get_initmod_##mod(llvm::LLVMContext *context, bool bits_64, bool debug) { \
55 if (bits_64) { \
56 if (debug) { \
57 return get_initmod_##mod##_64_debug(context); \
58 } else { \
59 return get_initmod_##mod##_64(context); \
60 } \
61 } else { \
62 if (debug) { \
63 return get_initmod_##mod##_32_debug(context); \
64 } else { \
65 return get_initmod_##mod##_32(context); \
66 } \
67 } \
68 }
69
70 #define DECLARE_LL_INITMOD(mod) \
71 DECLARE_INITMOD(mod##_ll)
72
73 // Universal CPP Initmods. Please keep sorted alphabetically.
74 DECLARE_CPP_INITMOD(alignment_128)
75 DECLARE_CPP_INITMOD(alignment_32)
76 DECLARE_CPP_INITMOD(allocation_cache)
77 DECLARE_CPP_INITMOD(alignment_64)
78 DECLARE_CPP_INITMOD(android_clock)
79 DECLARE_CPP_INITMOD(android_host_cpu_count)
80 DECLARE_CPP_INITMOD(android_io)
81 DECLARE_CPP_INITMOD(halide_buffer_t)
82 DECLARE_CPP_INITMOD(cache)
83 DECLARE_CPP_INITMOD(can_use_target)
84 DECLARE_CPP_INITMOD(cuda)
85 DECLARE_CPP_INITMOD(destructors)
86 DECLARE_CPP_INITMOD(device_interface)
87 DECLARE_CPP_INITMOD(errors)
88 DECLARE_CPP_INITMOD(fake_get_symbol)
89 DECLARE_CPP_INITMOD(fake_thread_pool)
90 DECLARE_CPP_INITMOD(float16_t)
91 DECLARE_CPP_INITMOD(fuchsia_clock)
92 DECLARE_CPP_INITMOD(fuchsia_host_cpu_count)
93 DECLARE_CPP_INITMOD(fuchsia_yield)
94 DECLARE_CPP_INITMOD(gpu_device_selection)
95 DECLARE_CPP_INITMOD(hexagon_dma)
96 DECLARE_CPP_INITMOD(hexagon_host)
97 DECLARE_CPP_INITMOD(ios_io)
98 DECLARE_CPP_INITMOD(linux_clock)
99 DECLARE_CPP_INITMOD(linux_host_cpu_count)
100 DECLARE_CPP_INITMOD(linux_yield)
101 DECLARE_CPP_INITMOD(matlab)
102 DECLARE_CPP_INITMOD(metadata)
103 DECLARE_CPP_INITMOD(module_aot_ref_count)
104 DECLARE_CPP_INITMOD(module_jit_ref_count)
105 DECLARE_CPP_INITMOD(msan)
106 DECLARE_CPP_INITMOD(msan_stubs)
107 DECLARE_CPP_INITMOD(opencl)
108 DECLARE_CPP_INITMOD(opengl)
109 DECLARE_CPP_INITMOD(openglcompute)
110 DECLARE_CPP_INITMOD(opengl_egl_context)
111 DECLARE_CPP_INITMOD(opengl_glx_context)
112 DECLARE_CPP_INITMOD(osx_clock)
113 DECLARE_CPP_INITMOD(osx_get_symbol)
114 DECLARE_CPP_INITMOD(osx_host_cpu_count)
115 DECLARE_CPP_INITMOD(osx_opengl_context)
116 DECLARE_CPP_INITMOD(osx_yield)
117 DECLARE_CPP_INITMOD(posix_abort)
118 DECLARE_CPP_INITMOD(posix_allocator)
119 DECLARE_CPP_INITMOD(posix_clock)
120 DECLARE_CPP_INITMOD(posix_error_handler)
121 DECLARE_CPP_INITMOD(posix_get_symbol)
122 DECLARE_CPP_INITMOD(posix_io)
123 DECLARE_CPP_INITMOD(posix_print)
124 DECLARE_CPP_INITMOD(posix_threads)
125 DECLARE_CPP_INITMOD(posix_threads_tsan)
126 DECLARE_CPP_INITMOD(prefetch)
127 DECLARE_CPP_INITMOD(profiler)
128 DECLARE_CPP_INITMOD(profiler_inlined)
129 DECLARE_CPP_INITMOD(pseudostack)
130 DECLARE_CPP_INITMOD(qurt_allocator)
131 DECLARE_CPP_INITMOD(hexagon_cache_allocator)
132 DECLARE_CPP_INITMOD(hexagon_dma_pool)
133 DECLARE_CPP_INITMOD(qurt_hvx)
134 DECLARE_CPP_INITMOD(qurt_hvx_vtcm)
135 DECLARE_CPP_INITMOD(qurt_init_fini)
136 DECLARE_CPP_INITMOD(qurt_threads)
137 DECLARE_CPP_INITMOD(qurt_threads_tsan)
138 DECLARE_CPP_INITMOD(qurt_yield)
139 DECLARE_CPP_INITMOD(runtime_api)
140 DECLARE_CPP_INITMOD(ssp)
141 DECLARE_CPP_INITMOD(to_string)
142 DECLARE_CPP_INITMOD(trace_helper)
143 DECLARE_CPP_INITMOD(tracing)
144 DECLARE_CPP_INITMOD(windows_clock)
145 DECLARE_CPP_INITMOD(windows_cuda)
146 DECLARE_CPP_INITMOD(windows_get_symbol)
147 DECLARE_CPP_INITMOD(windows_abort)
148 DECLARE_CPP_INITMOD(windows_io)
149 DECLARE_CPP_INITMOD(windows_opencl)
150 DECLARE_CPP_INITMOD(windows_profiler)
151 DECLARE_CPP_INITMOD(windows_threads)
152 DECLARE_CPP_INITMOD(windows_threads_tsan)
153 DECLARE_CPP_INITMOD(windows_yield)
154 DECLARE_CPP_INITMOD(write_debug_image)
155
156 // Universal LL Initmods. Please keep sorted alphabetically.
157 DECLARE_LL_INITMOD(posix_math)
158 DECLARE_LL_INITMOD(win32_math)
159 DECLARE_LL_INITMOD(ptx_dev)
160
161 // Various conditional initmods follow (both LL and CPP).
162 #ifdef WITH_METAL
163 DECLARE_CPP_INITMOD(metal)
164 #ifdef WITH_ARM
165 DECLARE_CPP_INITMOD(metal_objc_arm)
166 #else
167 DECLARE_NO_INITMOD(metal_objc_arm)
168 #endif
169 #ifdef WITH_X86
170 DECLARE_CPP_INITMOD(metal_objc_x86)
171 #else
172 DECLARE_NO_INITMOD(metal_objc_x86)
173 #endif
174 #else
175 DECLARE_NO_INITMOD(metal)
176 DECLARE_NO_INITMOD(metal_objc_arm)
177 DECLARE_NO_INITMOD(metal_objc_x86)
178 #endif // WITH_METAL
179
180 #ifdef WITH_ARM
181 DECLARE_LL_INITMOD(arm)
182 DECLARE_LL_INITMOD(arm_no_neon)
183 DECLARE_CPP_INITMOD(arm_cpu_features)
184 #else
185 DECLARE_NO_INITMOD(arm)
186 DECLARE_NO_INITMOD(arm_no_neon)
187 DECLARE_NO_INITMOD(arm_cpu_features)
188 #endif // WITH_ARM
189
190 #ifdef WITH_AARCH64
191 DECLARE_LL_INITMOD(aarch64)
192 DECLARE_CPP_INITMOD(aarch64_cpu_features)
193 #else
194 DECLARE_NO_INITMOD(aarch64)
195 DECLARE_NO_INITMOD(aarch64_cpu_features)
196 #endif // WITH_AARCH64
197
198 #ifdef WITH_NVPTX
199 DECLARE_LL_INITMOD(ptx_compute_20)
200 DECLARE_LL_INITMOD(ptx_compute_30)
201 DECLARE_LL_INITMOD(ptx_compute_35)
202 #endif // WITH_NVPTX
203
204 #ifdef WITH_D3D12
205 DECLARE_CPP_INITMOD(windows_d3d12compute_x86)
206 #else
207 DECLARE_NO_INITMOD(windows_d3d12compute_x86)
208 #endif
209
210 #ifdef WITH_X86
211 DECLARE_LL_INITMOD(x86_avx2)
212 DECLARE_LL_INITMOD(x86_avx)
213 DECLARE_LL_INITMOD(x86)
214 DECLARE_LL_INITMOD(x86_sse41)
215 DECLARE_CPP_INITMOD(x86_cpu_features)
216 #else
217 DECLARE_NO_INITMOD(x86_avx2)
218 DECLARE_NO_INITMOD(x86_avx)
219 DECLARE_NO_INITMOD(x86)
220 DECLARE_NO_INITMOD(x86_sse41)
221 DECLARE_NO_INITMOD(x86_cpu_features)
222 #endif // WITH_X86
223
224 #ifdef WITH_MIPS
225 DECLARE_LL_INITMOD(mips)
226 DECLARE_CPP_INITMOD(mips_cpu_features)
227 #else
228 DECLARE_NO_INITMOD(mips)
229 DECLARE_NO_INITMOD(mips_cpu_features)
230 #endif // WITH_MIPS
231
232 #ifdef WITH_POWERPC
233 DECLARE_LL_INITMOD(powerpc)
234 DECLARE_CPP_INITMOD(powerpc_cpu_features)
235 #else
236 DECLARE_NO_INITMOD(powerpc)
237 DECLARE_NO_INITMOD(powerpc_cpu_features)
238 #endif // WITH_POWERPC
239
240 #ifdef WITH_HEXAGON
241 DECLARE_LL_INITMOD(hvx_64)
242 DECLARE_LL_INITMOD(hvx_128)
243 DECLARE_CPP_INITMOD(hexagon_cpu_features)
244 #else
245 DECLARE_NO_INITMOD(hvx_64)
246 DECLARE_NO_INITMOD(hvx_128)
247 DECLARE_NO_INITMOD(hexagon_cpu_features)
248 #endif // WITH_HEXAGON
249
250 #ifdef WITH_WEBASSEMBLY
251 DECLARE_CPP_INITMOD(wasm_cpu_features)
252 DECLARE_LL_INITMOD(wasm_math)
253 #else
254 DECLARE_NO_INITMOD(wasm_cpu_features)
255 DECLARE_NO_INITMOD(wasm_math)
256 #endif // WITH_WEBASSEMBLY
257
258 #ifdef WITH_RISCV
259 //DECLARE_LL_INITMOD(riscv)
260 DECLARE_CPP_INITMOD(riscv_cpu_features)
261 #else
262 //DECLARE_NO_INITMOD(riscv)
263 DECLARE_NO_INITMOD(riscv_cpu_features)
264 #endif // WITH_RISCV
265
266 namespace {
267
get_data_layout_for_target(Target target)268 llvm::DataLayout get_data_layout_for_target(Target target) {
269 if (target.arch == Target::X86) {
270 if (target.bits == 32) {
271 if (target.os == Target::OSX) {
272 #if LLVM_VERSION >= 100
273 return llvm::DataLayout("e-m:o-p:32:32-p270:32:32-p271:32:32-p272:64:64-f64:32:64-f80:128-n8:16:32-S128");
274 #else
275 return llvm::DataLayout("e-m:o-p:32:32-f64:32:64-f80:128-n8:16:32-S128");
276 #endif
277 } else if (target.os == Target::IOS) {
278 #if LLVM_VERSION >= 100
279 return llvm::DataLayout("e-m:o-p:32:32-p270:32:32-p271:32:32-p272:64:64-f64:32:64-f80:128-n8:16:32-S128");
280 #else
281 return llvm::DataLayout("e-m:o-p:32:32-f64:32:64-f80:128-n8:16:32-S128");
282 #endif
283 } else if (target.os == Target::Windows && !target.has_feature(Target::JIT)) {
284 #if LLVM_VERSION >= 100
285 return llvm::DataLayout("e-m:x-p:32:32-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:32-n8:16:32-a:0:32-S32");
286 #else
287 return llvm::DataLayout("e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32");
288 #endif
289 } else if (target.os == Target::Windows) {
290 #if LLVM_VERSION >= 100
291 return llvm::DataLayout("e-m:e-p:32:32-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:32-n8:16:32-a:0:32-S32");
292 #else
293 return llvm::DataLayout("e-m:e-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32");
294 #endif
295 } else {
296 // Linux/Android
297 #if LLVM_VERSION >= 100
298 return llvm::DataLayout("e-m:e-p:32:32-p270:32:32-p271:32:32-p272:64:64-f64:32:64-f80:32-n8:16:32-S128");
299 #else
300 return llvm::DataLayout("e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128");
301 #endif
302 }
303 } else { // 64-bit
304 if (target.os == Target::OSX) {
305 #if LLVM_VERSION >= 100
306 return llvm::DataLayout("e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128");
307 #else
308 return llvm::DataLayout("e-m:o-i64:64-f80:128-n8:16:32:64-S128");
309 #endif
310 } else if (target.os == Target::IOS) {
311 #if LLVM_VERSION >= 100
312 return llvm::DataLayout("e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128");
313 #else
314 return llvm::DataLayout("e-m:o-i64:64-f80:128-n8:16:32:64-S128");
315 #endif
316 } else if (target.os == Target::Windows && !target.has_feature(Target::JIT)) {
317 #if LLVM_VERSION >= 100
318 return llvm::DataLayout("e-m:w-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128");
319 #else
320 return llvm::DataLayout("e-m:w-i64:64-f80:128-n8:16:32:64-S128");
321 #endif
322 } else if (target.os == Target::Windows) {
323 #if LLVM_VERSION >= 100
324 return llvm::DataLayout("e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128");
325 #else
326 return llvm::DataLayout("e-m:e-i64:64-f80:128-n8:16:32:64-S128");
327 #endif
328 } else {
329 #if LLVM_VERSION >= 100
330 return llvm::DataLayout("e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128");
331 #else
332 return llvm::DataLayout("e-m:e-i64:64-f80:128-n8:16:32:64-S128");
333 #endif
334 }
335 }
336 } else if (target.arch == Target::ARM) {
337 if (target.bits == 32) {
338 if (target.os == Target::IOS) {
339 return llvm::DataLayout("e-m:o-p:32:32-Fi8-f64:32:64-v64:32:64-v128:32:128-a:0:32-n32-S32");
340 } else {
341 return llvm::DataLayout("e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64");
342 }
343 } else { // 64-bit
344 if (target.os == Target::IOS) {
345 return llvm::DataLayout("e-m:o-i64:64-i128:128-n32:64-S128");
346 } else if (target.os == Target::OSX) {
347 return llvm::DataLayout("e-m:o-i64:64-i128:128-n32:64-S128");
348 } else if (target.os == Target::Windows) {
349 return llvm::DataLayout("e-m:w-p:64:64-i32:32-i64:64-i128:128-n32:64-S128");
350 } else {
351 return llvm::DataLayout("e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128");
352 }
353 }
354 } else if (target.arch == Target::MIPS) {
355 if (target.bits == 32) {
356 return llvm::DataLayout("e-m:m-p:32:32-i8:8:32-i16:16:32-i64:64-n32-S64");
357 } else {
358 return llvm::DataLayout("e-m:m-i8:8:32-i16:16:32-i64:64-n32:64-S128");
359 }
360 } else if (target.arch == Target::POWERPC) {
361 if (target.bits == 32) {
362 return llvm::DataLayout("e-m:e-i32:32-n32");
363 } else {
364 return llvm::DataLayout("e-m:e-i64:64-n32:64");
365 }
366 } else if (target.arch == Target::Hexagon) {
367 return llvm::DataLayout(
368 "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8"
369 "-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048");
370 } else if (target.arch == Target::WebAssembly) {
371 if (target.bits == 32) {
372 return llvm::DataLayout("e-m:e-p:32:32-i64:64-n32:64-S128");
373 } else {
374 return llvm::DataLayout("e-m:e-p:64:64-i64:64-n32:64-S128");
375 }
376 } else if (target.arch == Target::RISCV) {
377 // TODO: Valdidate this data layout is correct for RISCV. Assumption is it is like MIPS.
378 if (target.bits == 32) {
379 return llvm::DataLayout("e-m:e-p:32:32-i64:64-n32-S128");
380 } else {
381 return llvm::DataLayout("e-m:e-p:64:64-i64:64-i128:128-n64-S128");
382 }
383 } else {
384 internal_error << "Bad target arch: " << target.arch << "\n";
385 return llvm::DataLayout("unreachable");
386 }
387 }
388
389 } // namespace
390
391 namespace Internal {
392
get_triple_for_target(const Target & target)393 llvm::Triple get_triple_for_target(const Target &target) {
394 llvm::Triple triple;
395
396 if (target.arch == Target::X86) {
397 if (target.bits == 32) {
398 triple.setArch(llvm::Triple::x86);
399 } else {
400 user_assert(target.bits == 64) << "Target must be 32- or 64-bit.\n";
401 triple.setArch(llvm::Triple::x86_64);
402 }
403
404 if (target.os == Target::Linux) {
405 triple.setOS(llvm::Triple::Linux);
406 triple.setEnvironment(llvm::Triple::GNU);
407 } else if (target.os == Target::OSX) {
408 triple.setVendor(llvm::Triple::Apple);
409 triple.setOS(llvm::Triple::MacOSX);
410 } else if (target.os == Target::Windows) {
411 triple.setVendor(llvm::Triple::PC);
412 triple.setOS(llvm::Triple::Win32);
413 triple.setEnvironment(llvm::Triple::MSVC);
414 if (target.has_feature(Target::JIT)) {
415 // Use ELF for jitting
416 triple.setObjectFormat(llvm::Triple::ELF);
417 }
418 } else if (target.os == Target::Android) {
419 triple.setOS(llvm::Triple::Linux);
420 triple.setEnvironment(llvm::Triple::Android);
421 } else if (target.os == Target::IOS) {
422 // X86 on iOS for the simulator
423 triple.setVendor(llvm::Triple::Apple);
424 triple.setOS(llvm::Triple::IOS);
425 } else if (target.os == Target::Fuchsia) {
426 triple.setOS(llvm::Triple::Fuchsia);
427 }
428 } else if (target.arch == Target::ARM) {
429 if (target.bits == 32) {
430 if (target.has_feature(Target::ARMv7s)) {
431 triple.setArchName("armv7s");
432 } else {
433 triple.setArch(llvm::Triple::arm);
434 }
435 } else {
436 user_assert(target.bits == 64) << "Target bits must be 32 or 64\n";
437 #ifdef WITH_AARCH64
438 triple.setArch(llvm::Triple::aarch64);
439 #else
440 user_error << "AArch64 llvm target not enabled in this build of Halide\n";
441 #endif
442 }
443
444 if (target.os == Target::Android) {
445 triple.setOS(llvm::Triple::Linux);
446 triple.setEnvironment(llvm::Triple::EABI);
447 } else if (target.os == Target::IOS) {
448 triple.setOS(llvm::Triple::IOS);
449 triple.setVendor(llvm::Triple::Apple);
450 } else if (target.os == Target::Linux) {
451 triple.setOS(llvm::Triple::Linux);
452 triple.setEnvironment(llvm::Triple::GNUEABIHF);
453 } else if (target.os == Target::Windows) {
454 user_assert(target.bits == 64) << "Windows ARM targets must be 64-bit.\n";
455 triple.setVendor(llvm::Triple::PC);
456 triple.setOS(llvm::Triple::Win32);
457 triple.setEnvironment(llvm::Triple::MSVC);
458 if (target.has_feature(Target::JIT)) {
459 // TODO(shoaibkamil): figure out a way to test this.
460 // Currently blocked by https://github.com/halide/Halide/issues/5040
461 user_error << "No JIT support for this OS/CPU combination yet.\n";
462 }
463 } else if (target.os == Target::Fuchsia) {
464 triple.setOS(llvm::Triple::Fuchsia);
465 } else if (target.os == Target::OSX) {
466 triple.setVendor(llvm::Triple::Apple);
467 triple.setOS(llvm::Triple::MacOSX);
468 triple.setArchName("arm64");
469 } else if (target.os == Target::NoOS) {
470 // For bare-metal environments
471
472 } else {
473 user_error << "No arm support for this OS\n";
474 }
475 } else if (target.arch == Target::MIPS) {
476 // Currently MIPS support is only little-endian.
477 if (target.bits == 32) {
478 triple.setArch(llvm::Triple::mipsel);
479 } else {
480 user_assert(target.bits == 64) << "Target must be 32- or 64-bit.\n";
481 triple.setArch(llvm::Triple::mips64el);
482 }
483
484 if (target.os == Target::Android) {
485 triple.setOS(llvm::Triple::Linux);
486 triple.setEnvironment(llvm::Triple::Android);
487 } else {
488 user_error << "No mips support for this OS\n";
489 }
490 } else if (target.arch == Target::POWERPC) {
491 #ifdef WITH_POWERPC
492 // Only ppc*-unknown-linux-gnu are supported for the time being.
493 user_assert(target.os == Target::Linux) << "PowerPC target is Linux-only.\n";
494 triple.setVendor(llvm::Triple::UnknownVendor);
495 triple.setOS(llvm::Triple::Linux);
496 triple.setEnvironment(llvm::Triple::GNU);
497 if (target.bits == 32) {
498 triple.setArch(llvm::Triple::ppc);
499 } else {
500 // Currently POWERPC64 support is only little-endian.
501 user_assert(target.bits == 64) << "Target must be 32- or 64-bit.\n";
502 triple.setArch(llvm::Triple::ppc64le);
503 }
504 #else
505 user_error << "PowerPC llvm target not enabled in this build of Halide\n";
506 #endif
507 } else if (target.arch == Target::Hexagon) {
508 triple.setVendor(llvm::Triple::UnknownVendor);
509 triple.setArch(llvm::Triple::hexagon);
510 triple.setObjectFormat(llvm::Triple::ELF);
511 } else if (target.arch == Target::WebAssembly) {
512 triple.setVendor(llvm::Triple::UnknownVendor);
513 if (target.bits == 32) {
514 triple.setArch(llvm::Triple::wasm32);
515 } else {
516 triple.setArch(llvm::Triple::wasm64);
517 }
518 triple.setObjectFormat(llvm::Triple::Wasm);
519 } else if (target.arch == Target::RISCV) {
520 if (target.bits == 32) {
521 triple.setArch(llvm::Triple::riscv32);
522 } else {
523 user_assert(target.bits == 64) << "Target must be 32- or 64-bit.\n";
524 triple.setArch(llvm::Triple::riscv64);
525 }
526
527 if (target.os == Target::Linux) {
528 triple.setOS(llvm::Triple::Linux);
529 // TODO: Check what options there are here.
530 triple.setEnvironment(llvm::Triple::GNUEABIHF);
531 } else if (target.os == Target::NoOS) {
532 // for baremetal environment
533 } else {
534 user_error << "No RISCV support for this OS\n";
535 }
536 } else {
537 internal_error << "Bad target arch: " << target.arch << "\n";
538 }
539
540 return triple;
541 }
542
543 } // namespace Internal
544
545 namespace {
546
convert_weak_to_linkonce(llvm::GlobalValue & gv)547 void convert_weak_to_linkonce(llvm::GlobalValue &gv) {
548 llvm::GlobalValue::LinkageTypes linkage = gv.getLinkage();
549 if (linkage == llvm::GlobalValue::WeakAnyLinkage) {
550 gv.setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage);
551 } else if (linkage == llvm::GlobalValue::WeakODRLinkage) {
552 gv.setLinkage(llvm::GlobalValue::LinkOnceODRLinkage);
553 } else if (linkage == llvm::GlobalValue::ExternalWeakLinkage) {
554 gv.setLinkage(llvm::GlobalValue::ExternalLinkage);
555 }
556 }
557
558 // Link all modules together and with the result in modules[0], all
559 // other input modules are destroyed. Sets the datalayout and target
560 // triple appropriately for the target.
link_modules(std::vector<std::unique_ptr<llvm::Module>> & modules,Target t,bool allow_stripping_all_weak_functions=false)561 void link_modules(std::vector<std::unique_ptr<llvm::Module>> &modules, Target t,
562 bool allow_stripping_all_weak_functions = false) {
563 llvm::DataLayout data_layout = get_data_layout_for_target(t);
564 llvm::Triple triple = Internal::get_triple_for_target(t);
565
566 // Set the layout and triple on the modules before linking, so
567 // llvm doesn't complain while combining them.
568 for (size_t i = 0; i < modules.size(); i++) {
569 if (t.os == Target::Windows &&
570 !Internal::starts_with(modules[i]->getName().str(), "windows_")) {
571 // When compiling for windows, all wchars are
572 // 16-bit. Generic modules may have it set to 32-bit. Drop
573 // any module flags on the generic modules and use the
574 // more correct ones on the windows-specific modules to
575 // avoid a conflict. This is safe as long as the generic
576 // modules never actually use a wchar.
577 if (auto *module_flags = modules[i]->getModuleFlagsMetadata()) {
578 modules[i]->eraseNamedMetadata(module_flags);
579 }
580 }
581 modules[i]->setDataLayout(data_layout);
582 modules[i]->setTargetTriple(triple.str());
583 }
584
585 // Link them all together
586 for (size_t i = 1; i < modules.size(); i++) {
587 bool failed = llvm::Linker::linkModules(*modules[0],
588 std::move(modules[i]));
589 if (failed) {
590 internal_error << "Failure linking initial modules\n";
591 }
592 }
593
594 // Now re-mark most weak symbols as linkonce. They are only weak to
595 // prevent llvm from stripping them during initial module
596 // assembly. This means they can be stripped later.
597
598 // The symbols that we might want to call as a user even if not
599 // used in the Halide-generated code must remain weak. This is
600 // handled automatically by assuming any symbol starting with
601 // "halide_" that is weak will be retained. There are a few
602 // symbols for which this convention is not followed and these are
603 // in this set.
604 const std::set<string> retain = {"__stack_chk_guard",
605 "__stack_chk_fail"};
606
607 // Enumerate the global variables.
608 for (auto &gv : modules[0]->globals()) {
609 // No variables are part of the public interface (even the ones labelled halide_)
610 convert_weak_to_linkonce(gv);
611 }
612
613 // Enumerate the functions.
614 for (auto &f : *modules[0]) {
615 const std::string f_name = Internal::get_llvm_function_name(f);
616
617 bool is_halide_extern_c_sym = Internal::starts_with(f_name, "halide_");
618 internal_assert(!is_halide_extern_c_sym || f.isWeakForLinker() || f.isDeclaration())
619 << " for function " << f_name << "\n";
620
621 // We never want *any* Function marked as external-weak here;
622 // convert all of those to plain external.
623 if (f.getLinkage() == llvm::GlobalValue::ExternalWeakLinkage) {
624 f.setLinkage(llvm::GlobalValue::ExternalLinkage);
625 } else {
626 const bool can_strip = !is_halide_extern_c_sym && retain.count(f_name) == 0;
627 if (can_strip || allow_stripping_all_weak_functions) {
628 convert_weak_to_linkonce(f);
629 }
630 }
631
632 // Windows requires every symbol that's going to get merged
633 // has a comdat that specifies how. The linkage type alone
634 // isn't enough.
635 if (t.os == Target::Windows && f.isWeakForLinker()) {
636 llvm::Comdat *comdat = modules[0]->getOrInsertComdat(f_name);
637 comdat->setSelectionKind(llvm::Comdat::Any);
638 f.setComdat(comdat);
639 }
640 }
641
642 // Now remove the force-usage global that prevented clang from
643 // dropping functions from the initial module.
644 llvm::GlobalValue *llvm_used = modules[0]->getNamedGlobal("llvm.used");
645 if (llvm_used) {
646 llvm_used->eraseFromParent();
647 }
648
649 // Also drop the dummy runtime api usage. We only needed it so
650 // that the declarations are retained in the module during the
651 // linking procedure above.
652 llvm::GlobalValue *runtime_api =
653 modules[0]->getNamedGlobal("halide_runtime_api_functions");
654 if (runtime_api) {
655 runtime_api->eraseFromParent();
656 }
657 }
658
659 } // namespace
660
661 namespace Internal {
662
663 /** When JIT-compiling on 32-bit windows, we need to rewrite calls
664 * to name-mangled win32 api calls to non-name-mangled versions.
665 */
undo_win32_name_mangling(llvm::Module * m)666 void undo_win32_name_mangling(llvm::Module *m) {
667 llvm::IRBuilder<> builder(m->getContext());
668 // For every function prototype...
669 for (llvm::Module::iterator iter = m->begin(); iter != m->end(); ++iter) {
670 llvm::Function &f = *iter;
671 string n = get_llvm_function_name(f);
672 // if it's a __stdcall call that starts with \01_, then we're making a win32 api call
673 if (f.getCallingConv() == llvm::CallingConv::X86_StdCall &&
674 f.empty() &&
675 n.size() > 2 && n[0] == 1 && n[1] == '_') {
676
677 // Unmangle the name.
678 string unmangled_name = n.substr(2);
679 size_t at = unmangled_name.rfind('@');
680 unmangled_name = unmangled_name.substr(0, at);
681
682 // Extern declare the unmangled version.
683 llvm::Function *unmangled = llvm::Function::Create(f.getFunctionType(), f.getLinkage(), unmangled_name, m);
684 unmangled->setCallingConv(f.getCallingConv());
685
686 // Add a body to the mangled version that calls the unmangled version.
687 llvm::BasicBlock *block = llvm::BasicBlock::Create(m->getContext(), "entry", &f);
688 builder.SetInsertPoint(block);
689
690 vector<llvm::Value *> args;
691 for (auto &arg : f.args()) {
692 args.push_back(&arg);
693 }
694
695 llvm::CallInst *c = builder.CreateCall(unmangled, args);
696 c->setCallingConv(f.getCallingConv());
697
698 if (f.getReturnType()->isVoidTy()) {
699 builder.CreateRetVoid();
700 } else {
701 builder.CreateRet(c);
702 }
703 }
704 }
705 }
706
add_underscore_to_posix_call(llvm::CallInst * call,llvm::Function * fn,llvm::Module * m)707 void add_underscore_to_posix_call(llvm::CallInst *call, llvm::Function *fn, llvm::Module *m) {
708 string new_name = "_" + fn->getName().str();
709 llvm::Function *alt = m->getFunction(new_name);
710 if (!alt) {
711 alt = llvm::Function::Create(fn->getFunctionType(),
712 llvm::GlobalValue::ExternalLinkage,
713 new_name, m);
714 }
715 internal_assert(alt->getName() == new_name);
716 call->setCalledFunction(alt);
717 }
718
719 /** Windows uses _close, _open, _write, etc instead of the posix
720 * names. Defining stubs that redirect causes mis-compilations inside
721 * of mcjit, so we just rewrite uses of these functions to include an
722 * underscore. */
add_underscores_to_posix_calls_on_windows(llvm::Module * m)723 void add_underscores_to_posix_calls_on_windows(llvm::Module *m) {
724 string posix_fns[] = {"vsnprintf", "open", "close", "write", "fileno"};
725
726 string *posix_fns_begin = posix_fns;
727 string *posix_fns_end = posix_fns + sizeof(posix_fns) / sizeof(posix_fns[0]);
728
729 for (auto &fn : *m) {
730 for (auto &basic_block : fn) {
731 for (auto &instruction : basic_block) {
732 if (llvm::CallInst *call = llvm::dyn_cast<llvm::CallInst>(&instruction)) {
733 if (llvm::Function *called_fn = call->getCalledFunction()) {
734 if (std::find(posix_fns_begin, posix_fns_end, called_fn->getName()) != posix_fns_end) {
735 add_underscore_to_posix_call(call, called_fn, m);
736 }
737 }
738 }
739 }
740 }
741 }
742 }
743
link_with_wasm_jit_runtime(llvm::LLVMContext * c,const Target & t,std::unique_ptr<llvm::Module> extra_module)744 std::unique_ptr<llvm::Module> link_with_wasm_jit_runtime(llvm::LLVMContext *c, const Target &t,
745 std::unique_ptr<llvm::Module> extra_module) {
746 bool bits_64 = (t.bits == 64);
747 bool debug = t.has_feature(Target::Debug);
748
749 // We only need to include things that must be linked in as callable entrypoints;
750 // things that are 'alwaysinline' can be included here but are unnecessary.
751 vector<std::unique_ptr<llvm::Module>> modules;
752 modules.push_back(std::move(extra_module));
753 modules.push_back(get_initmod_fake_thread_pool(c, bits_64, debug));
754 modules.push_back(get_initmod_posix_allocator(c, bits_64, debug));
755 modules.push_back(get_initmod_halide_buffer_t(c, bits_64, debug));
756 modules.push_back(get_initmod_destructors(c, bits_64, debug));
757 // These two aren't necessary, since they are 100% alwaysinline
758 // modules.push_back(get_initmod_posix_math_ll(c));
759 // modules.push_back(get_initmod_wasm_math_ll(c));
760 modules.push_back(get_initmod_tracing(c, bits_64, debug));
761 modules.push_back(get_initmod_cache(c, bits_64, debug));
762 modules.push_back(get_initmod_to_string(c, bits_64, debug));
763 modules.push_back(get_initmod_alignment_32(c, bits_64, debug));
764 modules.push_back(get_initmod_device_interface(c, bits_64, debug));
765 modules.push_back(get_initmod_metadata(c, bits_64, debug));
766 modules.push_back(get_initmod_float16_t(c, bits_64, debug));
767 modules.push_back(get_initmod_errors(c, bits_64, debug));
768 modules.push_back(get_initmod_posix_abort(c, bits_64, debug));
769 modules.push_back(get_initmod_msan_stubs(c, bits_64, debug));
770
771 // We don't want anything marked as weak for the wasm-jit runtime,
772 // so convert all of them to linkonce
773 constexpr bool allow_stripping_all_weak_functions = true;
774 link_modules(modules, t, allow_stripping_all_weak_functions);
775
776 return std::move(modules[0]);
777 }
778
779 /** Create an llvm module containing the support code for a given target. */
get_initial_module_for_target(Target t,llvm::LLVMContext * c,bool for_shared_jit_runtime,bool just_gpu)780 std::unique_ptr<llvm::Module> get_initial_module_for_target(Target t, llvm::LLVMContext *c, bool for_shared_jit_runtime, bool just_gpu) {
781 enum InitialModuleType {
782 ModuleAOT,
783 ModuleAOTNoRuntime,
784 ModuleJITShared,
785 ModuleJITInlined,
786 ModuleGPU
787 } module_type;
788
789 if (t.has_feature(Target::JIT)) {
790 if (just_gpu) {
791 module_type = ModuleGPU;
792 } else if (for_shared_jit_runtime) {
793 module_type = ModuleJITShared;
794 } else {
795 module_type = ModuleJITInlined;
796 }
797 } else if (t.has_feature(Target::NoRuntime)) {
798 module_type = ModuleAOTNoRuntime;
799 } else {
800 module_type = ModuleAOT;
801 }
802
803 // Halide::Internal::debug(0) << "Getting initial module type " << (int)module_type << "\n";
804
805 internal_assert(t.bits == 32 || t.bits == 64)
806 << "Bad target: " << t.to_string();
807 bool bits_64 = (t.bits == 64);
808 bool debug = t.has_feature(Target::Debug);
809 bool tsan = t.has_feature(Target::TSAN);
810
811 vector<std::unique_ptr<llvm::Module>> modules;
812
813 if (module_type != ModuleGPU) {
814 if (module_type != ModuleJITInlined && module_type != ModuleAOTNoRuntime) {
815 // Windows has a unique abort, but everyone else uses POSIX
816 if (t.os == Target::Windows) {
817 modules.push_back(get_initmod_windows_abort(c, bits_64, debug));
818 } else {
819 modules.push_back(get_initmod_posix_abort(c, bits_64, debug));
820 }
821
822 // OS-dependent modules
823 if (t.os == Target::Linux) {
824 modules.push_back(get_initmod_posix_allocator(c, bits_64, debug));
825 modules.push_back(get_initmod_posix_error_handler(c, bits_64, debug));
826 modules.push_back(get_initmod_posix_print(c, bits_64, debug));
827 if (t.arch == Target::X86) {
828 modules.push_back(get_initmod_linux_clock(c, bits_64, debug));
829 } else {
830 modules.push_back(get_initmod_posix_clock(c, bits_64, debug));
831 }
832 modules.push_back(get_initmod_posix_io(c, bits_64, debug));
833 modules.push_back(get_initmod_linux_host_cpu_count(c, bits_64, debug));
834 modules.push_back(get_initmod_linux_yield(c, bits_64, debug));
835 if (tsan) {
836 modules.push_back(get_initmod_posix_threads_tsan(c, bits_64, debug));
837 } else {
838 modules.push_back(get_initmod_posix_threads(c, bits_64, debug));
839 }
840 modules.push_back(get_initmod_posix_get_symbol(c, bits_64, debug));
841 } else if (t.os == Target::WebAssemblyRuntime) {
842 modules.push_back(get_initmod_posix_allocator(c, bits_64, debug));
843 modules.push_back(get_initmod_posix_error_handler(c, bits_64, debug));
844 modules.push_back(get_initmod_posix_print(c, bits_64, debug));
845 modules.push_back(get_initmod_posix_clock(c, bits_64, debug));
846 modules.push_back(get_initmod_posix_io(c, bits_64, debug));
847 modules.push_back(get_initmod_linux_host_cpu_count(c, bits_64, debug));
848 modules.push_back(get_initmod_linux_yield(c, bits_64, debug));
849 modules.push_back(get_initmod_fake_thread_pool(c, bits_64, debug));
850 modules.push_back(get_initmod_fake_get_symbol(c, bits_64, debug));
851 } else if (t.os == Target::OSX) {
852 modules.push_back(get_initmod_posix_allocator(c, bits_64, debug));
853 modules.push_back(get_initmod_posix_error_handler(c, bits_64, debug));
854 modules.push_back(get_initmod_posix_print(c, bits_64, debug));
855 modules.push_back(get_initmod_osx_clock(c, bits_64, debug));
856 modules.push_back(get_initmod_posix_io(c, bits_64, debug));
857 modules.push_back(get_initmod_osx_host_cpu_count(c, bits_64, debug));
858 modules.push_back(get_initmod_osx_yield(c, bits_64, debug));
859 if (tsan) {
860 modules.push_back(get_initmod_posix_threads_tsan(c, bits_64, debug));
861 } else {
862 modules.push_back(get_initmod_posix_threads(c, bits_64, debug));
863 }
864 modules.push_back(get_initmod_osx_get_symbol(c, bits_64, debug));
865 modules.push_back(get_initmod_osx_host_cpu_count(c, bits_64, debug));
866 } else if (t.os == Target::Android) {
867 modules.push_back(get_initmod_posix_allocator(c, bits_64, debug));
868 modules.push_back(get_initmod_posix_error_handler(c, bits_64, debug));
869 modules.push_back(get_initmod_posix_print(c, bits_64, debug));
870 if (t.arch == Target::ARM) {
871 modules.push_back(get_initmod_android_clock(c, bits_64, debug));
872 } else {
873 modules.push_back(get_initmod_posix_clock(c, bits_64, debug));
874 }
875 modules.push_back(get_initmod_android_io(c, bits_64, debug));
876 modules.push_back(get_initmod_android_host_cpu_count(c, bits_64, debug));
877 modules.push_back(get_initmod_linux_yield(c, bits_64, debug)); // TODO: verify
878 if (tsan) {
879 modules.push_back(get_initmod_posix_threads_tsan(c, bits_64, debug));
880 } else {
881 modules.push_back(get_initmod_posix_threads(c, bits_64, debug));
882 }
883 modules.push_back(get_initmod_posix_get_symbol(c, bits_64, debug));
884 } else if (t.os == Target::Windows) {
885 modules.push_back(get_initmod_posix_allocator(c, bits_64, debug));
886 modules.push_back(get_initmod_posix_error_handler(c, bits_64, debug));
887 modules.push_back(get_initmod_posix_print(c, bits_64, debug));
888 modules.push_back(get_initmod_windows_clock(c, bits_64, debug));
889 modules.push_back(get_initmod_windows_io(c, bits_64, debug));
890 modules.push_back(get_initmod_windows_yield(c, bits_64, debug));
891 if (tsan) {
892 modules.push_back(get_initmod_windows_threads_tsan(c, bits_64, debug));
893 } else {
894 modules.push_back(get_initmod_windows_threads(c, bits_64, debug));
895 }
896 modules.push_back(get_initmod_windows_get_symbol(c, bits_64, debug));
897 } else if (t.os == Target::IOS) {
898 modules.push_back(get_initmod_posix_allocator(c, bits_64, debug));
899 modules.push_back(get_initmod_posix_error_handler(c, bits_64, debug));
900 modules.push_back(get_initmod_posix_print(c, bits_64, debug));
901 modules.push_back(get_initmod_posix_clock(c, bits_64, debug));
902 modules.push_back(get_initmod_ios_io(c, bits_64, debug));
903 modules.push_back(get_initmod_osx_host_cpu_count(c, bits_64, debug));
904 modules.push_back(get_initmod_osx_yield(c, bits_64, debug));
905 if (tsan) {
906 modules.push_back(get_initmod_posix_threads_tsan(c, bits_64, debug));
907 } else {
908 modules.push_back(get_initmod_posix_threads(c, bits_64, debug));
909 }
910 } else if (t.os == Target::QuRT) {
911 modules.push_back(get_initmod_qurt_allocator(c, bits_64, debug));
912 modules.push_back(get_initmod_qurt_yield(c, bits_64, debug));
913 if (tsan) {
914 modules.push_back(get_initmod_qurt_threads_tsan(c, bits_64, debug));
915 } else {
916 modules.push_back(get_initmod_qurt_threads(c, bits_64, debug));
917 }
918 modules.push_back(get_initmod_qurt_init_fini(c, bits_64, debug));
919 } else if (t.os == Target::NoOS) {
920 // The OS-specific symbols provided by the modules
921 // above are expected to be provided by the containing
922 // process instead at link time. Less aggressive than
923 // NoRuntime, as OS-agnostic modules like tracing are
924 // still included below.
925 if (t.arch == Target::Hexagon) {
926 modules.push_back(get_initmod_qurt_allocator(c, bits_64, debug));
927 }
928 modules.push_back(get_initmod_fake_thread_pool(c, bits_64, debug));
929 } else if (t.os == Target::Fuchsia) {
930 modules.push_back(get_initmod_posix_allocator(c, bits_64, debug));
931 modules.push_back(get_initmod_posix_error_handler(c, bits_64, debug));
932 modules.push_back(get_initmod_posix_print(c, bits_64, debug));
933 modules.push_back(get_initmod_fuchsia_clock(c, bits_64, debug));
934 modules.push_back(get_initmod_posix_io(c, bits_64, debug));
935 modules.push_back(get_initmod_fuchsia_host_cpu_count(c, bits_64, debug));
936 modules.push_back(get_initmod_fuchsia_yield(c, bits_64, debug));
937 if (tsan) {
938 modules.push_back(get_initmod_posix_threads_tsan(c, bits_64, debug));
939 } else {
940 modules.push_back(get_initmod_posix_threads(c, bits_64, debug));
941 }
942 modules.push_back(get_initmod_posix_get_symbol(c, bits_64, debug));
943 }
944 }
945
946 if (module_type != ModuleJITShared) {
947 // The first module for inline only case has to be C/C++ compiled otherwise the
948 // datalayout is not properly setup.
949 modules.push_back(get_initmod_halide_buffer_t(c, bits_64, debug));
950 modules.push_back(get_initmod_destructors(c, bits_64, debug));
951 modules.push_back(get_initmod_pseudostack(c, bits_64, debug));
952 // Math intrinsics vary slightly across platforms
953 if (t.os == Target::Windows) {
954 if (t.bits == 32) {
955 modules.push_back(get_initmod_win32_math_ll(c));
956 } else {
957 modules.push_back(get_initmod_posix_math_ll(c));
958 }
959 } else {
960 modules.push_back(get_initmod_posix_math_ll(c));
961 }
962 }
963
964 if (module_type != ModuleJITInlined && module_type != ModuleAOTNoRuntime) {
965 // These modules are always used and shared
966 modules.push_back(get_initmod_gpu_device_selection(c, bits_64, debug));
967 if (t.arch != Target::Hexagon) {
968 // These modules don't behave correctly on a real
969 // Hexagon device (they do work in the simulator
970 // though...).
971 modules.push_back(get_initmod_tracing(c, bits_64, debug));
972 modules.push_back(get_initmod_trace_helper(c, bits_64, debug));
973 modules.push_back(get_initmod_write_debug_image(c, bits_64, debug));
974
975 // TODO: Support this module in the Hexagon backend,
976 // currently generates assert at src/HexagonOffload.cpp:279
977 modules.push_back(get_initmod_cache(c, bits_64, debug));
978 }
979 modules.push_back(get_initmod_to_string(c, bits_64, debug));
980
981 if (t.arch == Target::Hexagon ||
982 t.has_feature(Target::HVX_64) ||
983 t.has_feature(Target::HVX_128)) {
984 modules.push_back(get_initmod_alignment_128(c, bits_64, debug));
985 } else if (t.arch == Target::X86) {
986 // AVX-512 requires 64-byte alignment. Could only increase alignment
987 // if AVX-512 is in the target, but that falls afoul of linking
988 // multiple versions of a filter for different levels of x86 -- weak
989 // linking will pick one of the alignment modules unpredictably.
990 // Another way to go is to query the CPU features and align by
991 // 64 oonly if the procesor has AVX-512.
992 // The choice to go 64 all the time is for simplicity and on the idea
993 // that it won't be a noticeable cost in the majority of x86 usage.
994 modules.push_back(get_initmod_alignment_64(c, bits_64, debug));
995 } else {
996 modules.push_back(get_initmod_alignment_32(c, bits_64, debug));
997 }
998
999 modules.push_back(get_initmod_allocation_cache(c, bits_64, debug));
1000 modules.push_back(get_initmod_device_interface(c, bits_64, debug));
1001 modules.push_back(get_initmod_metadata(c, bits_64, debug));
1002 modules.push_back(get_initmod_float16_t(c, bits_64, debug));
1003 modules.push_back(get_initmod_errors(c, bits_64, debug));
1004
1005 // Some environments don't support the atomics the profiler requires.
1006 if (t.arch != Target::MIPS && t.os != Target::NoOS && t.os != Target::QuRT) {
1007 if (t.os == Target::Windows) {
1008 modules.push_back(get_initmod_windows_profiler(c, bits_64, debug));
1009 } else {
1010 modules.push_back(get_initmod_profiler(c, bits_64, debug));
1011 }
1012 }
1013
1014 if (t.has_feature(Target::MSAN)) {
1015 modules.push_back(get_initmod_msan(c, bits_64, debug));
1016 } else {
1017 modules.push_back(get_initmod_msan_stubs(c, bits_64, debug));
1018 }
1019 }
1020
1021 if (module_type != ModuleJITShared) {
1022 // These modules are optional
1023 if (t.arch == Target::X86) {
1024 modules.push_back(get_initmod_x86_ll(c));
1025 }
1026 if (t.arch == Target::ARM) {
1027 if (t.bits == 64) {
1028 modules.push_back(get_initmod_aarch64_ll(c));
1029 } else if (t.has_feature(Target::ARMv7s)) {
1030 modules.push_back(get_initmod_arm_ll(c));
1031 } else if (!t.has_feature(Target::NoNEON)) {
1032 modules.push_back(get_initmod_arm_ll(c));
1033 } else {
1034 modules.push_back(get_initmod_arm_no_neon_ll(c));
1035 }
1036 }
1037 if (t.arch == Target::MIPS) {
1038 modules.push_back(get_initmod_mips_ll(c));
1039 }
1040 if (t.arch == Target::POWERPC) {
1041 modules.push_back(get_initmod_powerpc_ll(c));
1042 }
1043 if (t.arch == Target::Hexagon) {
1044 modules.push_back(get_initmod_qurt_hvx(c, bits_64, debug));
1045 if (t.has_feature(Target::HVX_64)) {
1046 modules.push_back(get_initmod_hvx_64_ll(c));
1047 } else if (t.has_feature(Target::HVX_128)) {
1048 modules.push_back(get_initmod_hvx_128_ll(c));
1049 }
1050 if (t.features_any_of({Target::HVX_v65, Target::HVX_v66})) {
1051 modules.push_back(get_initmod_qurt_hvx_vtcm(c, bits_64,
1052 debug));
1053 }
1054
1055 } else {
1056 modules.push_back(get_initmod_prefetch(c, bits_64, debug));
1057 }
1058 if (t.has_feature(Target::SSE41)) {
1059 modules.push_back(get_initmod_x86_sse41_ll(c));
1060 }
1061 if (t.has_feature(Target::AVX)) {
1062 modules.push_back(get_initmod_x86_avx_ll(c));
1063 }
1064 if (t.has_feature(Target::AVX2)) {
1065 modules.push_back(get_initmod_x86_avx2_ll(c));
1066 }
1067 if (t.has_feature(Target::Profile)) {
1068 user_assert(t.os != Target::WebAssemblyRuntime) << "The profiler cannot be used in a threadless environment.";
1069 modules.push_back(get_initmod_profiler_inlined(c, bits_64, debug));
1070 }
1071 if (t.arch == Target::WebAssembly) {
1072 modules.push_back(get_initmod_wasm_math_ll(c));
1073 }
1074 }
1075
1076 if (module_type == ModuleAOT) {
1077 // These modules are only used for AOT compilation
1078 modules.push_back(get_initmod_can_use_target(c, bits_64, debug));
1079 if (t.arch == Target::X86) {
1080 modules.push_back(get_initmod_x86_cpu_features(c, bits_64, debug));
1081 }
1082 if (t.arch == Target::ARM) {
1083 if (t.bits == 64) {
1084 modules.push_back(get_initmod_aarch64_cpu_features(c, bits_64, debug));
1085 } else {
1086 modules.push_back(get_initmod_arm_cpu_features(c, bits_64, debug));
1087 }
1088 }
1089 if (t.arch == Target::MIPS) {
1090 modules.push_back(get_initmod_mips_cpu_features(c, bits_64, debug));
1091 }
1092 if (t.arch == Target::POWERPC) {
1093 modules.push_back(get_initmod_powerpc_cpu_features(c, bits_64, debug));
1094 }
1095 if (t.arch == Target::Hexagon) {
1096 modules.push_back(get_initmod_hexagon_cpu_features(c, bits_64, debug));
1097 }
1098 if (t.arch == Target::RISCV) {
1099 modules.push_back(get_initmod_riscv_cpu_features(c, bits_64, debug));
1100 }
1101 if (t.arch == Target::WebAssembly) {
1102 modules.push_back(get_initmod_wasm_cpu_features(c, bits_64, debug));
1103 }
1104 }
1105 }
1106
1107 if (module_type == ModuleJITShared || module_type == ModuleGPU) {
1108 modules.push_back(get_initmod_module_jit_ref_count(c, bits_64, debug));
1109 } else if (module_type == ModuleAOT) {
1110 modules.push_back(get_initmod_module_aot_ref_count(c, bits_64, debug));
1111 }
1112
1113 if (module_type == ModuleAOT || module_type == ModuleGPU) {
1114 if (t.has_feature(Target::CUDA)) {
1115 if (t.os == Target::Windows) {
1116 modules.push_back(get_initmod_windows_cuda(c, bits_64, debug));
1117 } else {
1118 modules.push_back(get_initmod_cuda(c, bits_64, debug));
1119 }
1120 }
1121 if (t.has_feature(Target::OpenCL)) {
1122 if (t.os == Target::Windows) {
1123 modules.push_back(get_initmod_windows_opencl(c, bits_64, debug));
1124 } else {
1125 modules.push_back(get_initmod_opencl(c, bits_64, debug));
1126 }
1127 }
1128 if (t.has_feature(Target::OpenGL)) {
1129 modules.push_back(get_initmod_opengl(c, bits_64, debug));
1130 if (t.os == Target::Linux) {
1131 if (t.has_feature(Target::EGL)) {
1132 modules.push_back(get_initmod_opengl_egl_context(c, bits_64, debug));
1133 } else {
1134 modules.push_back(get_initmod_opengl_glx_context(c, bits_64, debug));
1135 }
1136 } else if (t.os == Target::OSX) {
1137 modules.push_back(get_initmod_osx_opengl_context(c, bits_64, debug));
1138 } else if (t.os == Target::Android) {
1139 modules.push_back(get_initmod_opengl_egl_context(c, bits_64, debug));
1140 } else {
1141 // You're on your own to provide definitions of halide_opengl_get_proc_address and halide_opengl_create_context
1142 }
1143 }
1144 if (t.has_feature(Target::OpenGLCompute)) {
1145 modules.push_back(get_initmod_openglcompute(c, bits_64, debug));
1146 if (t.os == Target::Android) {
1147 // Only platform that supports OpenGL Compute for now.
1148 modules.push_back(get_initmod_opengl_egl_context(c, bits_64, debug));
1149 } else if (t.os == Target::Linux) {
1150 if (t.has_feature(Target::EGL)) {
1151 modules.push_back(get_initmod_opengl_egl_context(c, bits_64, debug));
1152 } else {
1153 modules.push_back(get_initmod_opengl_glx_context(c, bits_64, debug));
1154 }
1155 } else if (t.os == Target::OSX) {
1156 modules.push_back(get_initmod_osx_opengl_context(c, bits_64, debug));
1157 } else {
1158 // You're on your own to provide definitions of halide_opengl_get_proc_address and halide_opengl_create_context
1159 }
1160 }
1161 if (t.has_feature(Target::Metal)) {
1162 modules.push_back(get_initmod_metal(c, bits_64, debug));
1163 if (t.arch == Target::ARM) {
1164 modules.push_back(get_initmod_metal_objc_arm(c, bits_64, debug));
1165 } else if (t.arch == Target::X86) {
1166 modules.push_back(get_initmod_metal_objc_x86(c, bits_64, debug));
1167 } else {
1168 user_error << "Metal can only be used on ARM or X86 architectures.\n";
1169 }
1170 }
1171 if (t.has_feature(Target::D3D12Compute)) {
1172 user_assert(bits_64) << "D3D12Compute target only available on 64-bit targets for now.\n";
1173 user_assert(t.os == Target::Windows) << "D3D12Compute target only available on Windows targets.\n";
1174 modules.push_back(get_initmod_windows_d3d12compute_x86(c, bits_64, debug));
1175 }
1176 if (t.arch != Target::Hexagon && t.features_any_of({Target::HVX_64, Target::HVX_128})) {
1177 modules.push_back(get_initmod_module_jit_ref_count(c, bits_64, debug));
1178 modules.push_back(get_initmod_hexagon_host(c, bits_64, debug));
1179 }
1180 if (t.has_feature(Target::HexagonDma)) {
1181 modules.push_back(get_initmod_hexagon_cache_allocator(c, bits_64, debug));
1182 modules.push_back(get_initmod_hexagon_dma(c, bits_64, debug));
1183 modules.push_back(get_initmod_hexagon_dma_pool(c, bits_64, debug));
1184 }
1185 }
1186
1187 if (module_type == ModuleAOT && t.has_feature(Target::Matlab)) {
1188 modules.push_back(get_initmod_matlab(c, bits_64, debug));
1189 }
1190
1191 if (module_type == ModuleAOTNoRuntime ||
1192 module_type == ModuleJITInlined ||
1193 t.os == Target::NoOS) {
1194 modules.push_back(get_initmod_runtime_api(c, bits_64, debug));
1195 }
1196
1197 link_modules(modules, t);
1198
1199 if (t.os == Target::Windows &&
1200 t.bits == 32 &&
1201 (t.has_feature(Target::JIT))) {
1202 undo_win32_name_mangling(modules[0].get());
1203 }
1204
1205 if (t.os == Target::Windows) {
1206 add_underscores_to_posix_calls_on_windows(modules[0].get());
1207 }
1208
1209 return std::move(modules[0]);
1210 }
1211
1212 #ifdef WITH_NVPTX
get_initial_module_for_ptx_device(Target target,llvm::LLVMContext * c)1213 std::unique_ptr<llvm::Module> get_initial_module_for_ptx_device(Target target, llvm::LLVMContext *c) {
1214 std::vector<std::unique_ptr<llvm::Module>> modules;
1215 modules.push_back(get_initmod_ptx_dev_ll(c));
1216
1217 std::unique_ptr<llvm::Module> module;
1218
1219 // This table is based on the guidance at:
1220 // http://docs.nvidia.com/cuda/libdevice-users-guide/basic-usage.html#linking-with-libdevice
1221 if (target.has_feature(Target::CUDACapability35)) {
1222 module = get_initmod_ptx_compute_35_ll(c);
1223 } else if (target.features_any_of({Target::CUDACapability32,
1224 Target::CUDACapability50})) {
1225 // For some reason sm_32 and sm_50 use libdevice 20
1226 module = get_initmod_ptx_compute_20_ll(c);
1227 } else if (target.has_feature(Target::CUDACapability30)) {
1228 module = get_initmod_ptx_compute_30_ll(c);
1229 } else {
1230 module = get_initmod_ptx_compute_20_ll(c);
1231 }
1232 modules.push_back(std::move(module));
1233
1234 link_modules(modules, target);
1235
1236 // For now, the PTX backend does not handle calling functions. So mark all functions
1237 // AvailableExternally to ensure they are inlined or deleted.
1238 for (llvm::Module::iterator iter = modules[0]->begin(); iter != modules[0]->end(); iter++) {
1239 llvm::Function &f = *iter;
1240
1241 // This is intended to set all definitions (not extern declarations)
1242 // to "available externally" which should guarantee they do not exist
1243 // after the resulting module is finalized to code. That is they must
1244 // be inlined to be used.
1245 //
1246 // However libdevice has a few routines that are marked
1247 // "noinline" which must either be changed to alow inlining or
1248 // preserved in generated code. This preserves the intent of
1249 // keeping these routines out-of-line and hence called by
1250 // not marking them AvailableExternally.
1251
1252 if (!f.isDeclaration() && !f.hasFnAttribute(llvm::Attribute::NoInline)) {
1253 f.setLinkage(llvm::GlobalValue::AvailableExternallyLinkage);
1254 }
1255 }
1256
1257 llvm::Triple triple("nvptx64--");
1258 modules[0]->setTargetTriple(triple.str());
1259
1260 llvm::DataLayout dl("e-i64:64-v16:16-v32:32-n16:32:64");
1261 modules[0]->setDataLayout(dl);
1262
1263 return std::move(modules[0]);
1264 }
1265 #endif
1266
add_bitcode_to_module(llvm::LLVMContext * context,llvm::Module & module,const std::vector<uint8_t> & bitcode,const std::string & name)1267 void add_bitcode_to_module(llvm::LLVMContext *context, llvm::Module &module,
1268 const std::vector<uint8_t> &bitcode, const std::string &name) {
1269 llvm::StringRef sb = llvm::StringRef((const char *)&bitcode[0], bitcode.size());
1270 std::unique_ptr<llvm::Module> add_in = parse_bitcode_file(sb, context, name.c_str());
1271
1272 bool failed = llvm::Linker::linkModules(module, std::move(add_in));
1273 if (failed) {
1274 internal_error << "Failure linking in additional module: " << name << "\n";
1275 }
1276 }
1277
1278 } // namespace Internal
1279 } // namespace Halide
1280