1 #include "LLVM_Runtime_Linker.h"
2 #include "LLVM_Headers.h"
3 
4 namespace Halide {
5 
6 using std::string;
7 using std::vector;
8 
9 namespace {
10 
parse_bitcode_file(llvm::StringRef buf,llvm::LLVMContext * context,const char * id)11 std::unique_ptr<llvm::Module> parse_bitcode_file(llvm::StringRef buf, llvm::LLVMContext *context, const char *id) {
12 
13     llvm::MemoryBufferRef bitcode_buffer = llvm::MemoryBufferRef(buf, id);
14 
15     auto ret_val = llvm::expectedToErrorOr(
16         llvm::parseBitcodeFile(bitcode_buffer, *context));
17     if (!ret_val) {
18         internal_error << "Could not parse built-in bitcode file " << id
19                        << " llvm error is " << ret_val.getError() << "\n";
20     }
21 
22     std::unique_ptr<llvm::Module> result(std::move(*ret_val));
23     result->setModuleIdentifier(id);
24 
25     return result;
26 }
27 
28 }  // namespace
29 
30 #define DECLARE_INITMOD(mod)                                                              \
31     extern "C" unsigned char halide_internal_initmod_##mod[];                             \
32     extern "C" int halide_internal_initmod_##mod##_length;                                \
33     std::unique_ptr<llvm::Module> get_initmod_##mod(llvm::LLVMContext *context) {         \
34         llvm::StringRef sb = llvm::StringRef((const char *)halide_internal_initmod_##mod, \
35                                              halide_internal_initmod_##mod##_length);     \
36         return parse_bitcode_file(sb, context, #mod);                                     \
37     }
38 
39 #define DECLARE_NO_INITMOD(mod)                                                        \
40     std::unique_ptr<llvm::Module> get_initmod_##mod(llvm::LLVMContext *, bool, bool) { \
41         user_error << "Halide was compiled without support for this target\n";         \
42         return std::unique_ptr<llvm::Module>();                                        \
43     }                                                                                  \
44     std::unique_ptr<llvm::Module> get_initmod_##mod##_ll(llvm::LLVMContext *) {        \
45         user_error << "Halide was compiled without support for this target\n";         \
46         return std::unique_ptr<llvm::Module>();                                        \
47     }
48 
49 #define DECLARE_CPP_INITMOD(mod)                                                                            \
50     DECLARE_INITMOD(mod##_32_debug)                                                                         \
51     DECLARE_INITMOD(mod##_64_debug)                                                                         \
52     DECLARE_INITMOD(mod##_32)                                                                               \
53     DECLARE_INITMOD(mod##_64)                                                                               \
54     std::unique_ptr<llvm::Module> get_initmod_##mod(llvm::LLVMContext *context, bool bits_64, bool debug) { \
55         if (bits_64) {                                                                                      \
56             if (debug) {                                                                                    \
57                 return get_initmod_##mod##_64_debug(context);                                               \
58             } else {                                                                                        \
59                 return get_initmod_##mod##_64(context);                                                     \
60             }                                                                                               \
61         } else {                                                                                            \
62             if (debug) {                                                                                    \
63                 return get_initmod_##mod##_32_debug(context);                                               \
64             } else {                                                                                        \
65                 return get_initmod_##mod##_32(context);                                                     \
66             }                                                                                               \
67         }                                                                                                   \
68     }
69 
70 #define DECLARE_LL_INITMOD(mod) \
71     DECLARE_INITMOD(mod##_ll)
72 
73 // Universal CPP Initmods. Please keep sorted alphabetically.
74 DECLARE_CPP_INITMOD(alignment_128)
75 DECLARE_CPP_INITMOD(alignment_32)
76 DECLARE_CPP_INITMOD(allocation_cache)
77 DECLARE_CPP_INITMOD(alignment_64)
78 DECLARE_CPP_INITMOD(android_clock)
79 DECLARE_CPP_INITMOD(android_host_cpu_count)
80 DECLARE_CPP_INITMOD(android_io)
81 DECLARE_CPP_INITMOD(halide_buffer_t)
82 DECLARE_CPP_INITMOD(cache)
83 DECLARE_CPP_INITMOD(can_use_target)
84 DECLARE_CPP_INITMOD(cuda)
85 DECLARE_CPP_INITMOD(destructors)
86 DECLARE_CPP_INITMOD(device_interface)
87 DECLARE_CPP_INITMOD(errors)
88 DECLARE_CPP_INITMOD(fake_get_symbol)
89 DECLARE_CPP_INITMOD(fake_thread_pool)
90 DECLARE_CPP_INITMOD(float16_t)
91 DECLARE_CPP_INITMOD(fuchsia_clock)
92 DECLARE_CPP_INITMOD(fuchsia_host_cpu_count)
93 DECLARE_CPP_INITMOD(fuchsia_yield)
94 DECLARE_CPP_INITMOD(gpu_device_selection)
95 DECLARE_CPP_INITMOD(hexagon_dma)
96 DECLARE_CPP_INITMOD(hexagon_host)
97 DECLARE_CPP_INITMOD(ios_io)
98 DECLARE_CPP_INITMOD(linux_clock)
99 DECLARE_CPP_INITMOD(linux_host_cpu_count)
100 DECLARE_CPP_INITMOD(linux_yield)
101 DECLARE_CPP_INITMOD(matlab)
102 DECLARE_CPP_INITMOD(metadata)
103 DECLARE_CPP_INITMOD(module_aot_ref_count)
104 DECLARE_CPP_INITMOD(module_jit_ref_count)
105 DECLARE_CPP_INITMOD(msan)
106 DECLARE_CPP_INITMOD(msan_stubs)
107 DECLARE_CPP_INITMOD(opencl)
108 DECLARE_CPP_INITMOD(opengl)
109 DECLARE_CPP_INITMOD(openglcompute)
110 DECLARE_CPP_INITMOD(opengl_egl_context)
111 DECLARE_CPP_INITMOD(opengl_glx_context)
112 DECLARE_CPP_INITMOD(osx_clock)
113 DECLARE_CPP_INITMOD(osx_get_symbol)
114 DECLARE_CPP_INITMOD(osx_host_cpu_count)
115 DECLARE_CPP_INITMOD(osx_opengl_context)
116 DECLARE_CPP_INITMOD(osx_yield)
117 DECLARE_CPP_INITMOD(posix_abort)
118 DECLARE_CPP_INITMOD(posix_allocator)
119 DECLARE_CPP_INITMOD(posix_clock)
120 DECLARE_CPP_INITMOD(posix_error_handler)
121 DECLARE_CPP_INITMOD(posix_get_symbol)
122 DECLARE_CPP_INITMOD(posix_io)
123 DECLARE_CPP_INITMOD(posix_print)
124 DECLARE_CPP_INITMOD(posix_threads)
125 DECLARE_CPP_INITMOD(posix_threads_tsan)
126 DECLARE_CPP_INITMOD(prefetch)
127 DECLARE_CPP_INITMOD(profiler)
128 DECLARE_CPP_INITMOD(profiler_inlined)
129 DECLARE_CPP_INITMOD(pseudostack)
130 DECLARE_CPP_INITMOD(qurt_allocator)
131 DECLARE_CPP_INITMOD(hexagon_cache_allocator)
132 DECLARE_CPP_INITMOD(hexagon_dma_pool)
133 DECLARE_CPP_INITMOD(qurt_hvx)
134 DECLARE_CPP_INITMOD(qurt_hvx_vtcm)
135 DECLARE_CPP_INITMOD(qurt_init_fini)
136 DECLARE_CPP_INITMOD(qurt_threads)
137 DECLARE_CPP_INITMOD(qurt_threads_tsan)
138 DECLARE_CPP_INITMOD(qurt_yield)
139 DECLARE_CPP_INITMOD(runtime_api)
140 DECLARE_CPP_INITMOD(ssp)
141 DECLARE_CPP_INITMOD(to_string)
142 DECLARE_CPP_INITMOD(trace_helper)
143 DECLARE_CPP_INITMOD(tracing)
144 DECLARE_CPP_INITMOD(windows_clock)
145 DECLARE_CPP_INITMOD(windows_cuda)
146 DECLARE_CPP_INITMOD(windows_get_symbol)
147 DECLARE_CPP_INITMOD(windows_abort)
148 DECLARE_CPP_INITMOD(windows_io)
149 DECLARE_CPP_INITMOD(windows_opencl)
150 DECLARE_CPP_INITMOD(windows_profiler)
151 DECLARE_CPP_INITMOD(windows_threads)
152 DECLARE_CPP_INITMOD(windows_threads_tsan)
153 DECLARE_CPP_INITMOD(windows_yield)
154 DECLARE_CPP_INITMOD(write_debug_image)
155 
156 // Universal LL Initmods. Please keep sorted alphabetically.
157 DECLARE_LL_INITMOD(posix_math)
158 DECLARE_LL_INITMOD(win32_math)
159 DECLARE_LL_INITMOD(ptx_dev)
160 
161 // Various conditional initmods follow (both LL and CPP).
162 #ifdef WITH_METAL
163 DECLARE_CPP_INITMOD(metal)
164 #ifdef WITH_ARM
165 DECLARE_CPP_INITMOD(metal_objc_arm)
166 #else
167 DECLARE_NO_INITMOD(metal_objc_arm)
168 #endif
169 #ifdef WITH_X86
170 DECLARE_CPP_INITMOD(metal_objc_x86)
171 #else
172 DECLARE_NO_INITMOD(metal_objc_x86)
173 #endif
174 #else
175 DECLARE_NO_INITMOD(metal)
176 DECLARE_NO_INITMOD(metal_objc_arm)
177 DECLARE_NO_INITMOD(metal_objc_x86)
178 #endif  // WITH_METAL
179 
180 #ifdef WITH_ARM
181 DECLARE_LL_INITMOD(arm)
182 DECLARE_LL_INITMOD(arm_no_neon)
183 DECLARE_CPP_INITMOD(arm_cpu_features)
184 #else
185 DECLARE_NO_INITMOD(arm)
186 DECLARE_NO_INITMOD(arm_no_neon)
187 DECLARE_NO_INITMOD(arm_cpu_features)
188 #endif  // WITH_ARM
189 
190 #ifdef WITH_AARCH64
191 DECLARE_LL_INITMOD(aarch64)
192 DECLARE_CPP_INITMOD(aarch64_cpu_features)
193 #else
194 DECLARE_NO_INITMOD(aarch64)
195 DECLARE_NO_INITMOD(aarch64_cpu_features)
196 #endif  // WITH_AARCH64
197 
198 #ifdef WITH_NVPTX
199 DECLARE_LL_INITMOD(ptx_compute_20)
200 DECLARE_LL_INITMOD(ptx_compute_30)
201 DECLARE_LL_INITMOD(ptx_compute_35)
202 #endif  // WITH_NVPTX
203 
204 #ifdef WITH_D3D12
205 DECLARE_CPP_INITMOD(windows_d3d12compute_x86)
206 #else
207 DECLARE_NO_INITMOD(windows_d3d12compute_x86)
208 #endif
209 
210 #ifdef WITH_X86
211 DECLARE_LL_INITMOD(x86_avx2)
212 DECLARE_LL_INITMOD(x86_avx)
213 DECLARE_LL_INITMOD(x86)
214 DECLARE_LL_INITMOD(x86_sse41)
215 DECLARE_CPP_INITMOD(x86_cpu_features)
216 #else
217 DECLARE_NO_INITMOD(x86_avx2)
218 DECLARE_NO_INITMOD(x86_avx)
219 DECLARE_NO_INITMOD(x86)
220 DECLARE_NO_INITMOD(x86_sse41)
221 DECLARE_NO_INITMOD(x86_cpu_features)
222 #endif  // WITH_X86
223 
224 #ifdef WITH_MIPS
225 DECLARE_LL_INITMOD(mips)
226 DECLARE_CPP_INITMOD(mips_cpu_features)
227 #else
228 DECLARE_NO_INITMOD(mips)
229 DECLARE_NO_INITMOD(mips_cpu_features)
230 #endif  // WITH_MIPS
231 
232 #ifdef WITH_POWERPC
233 DECLARE_LL_INITMOD(powerpc)
234 DECLARE_CPP_INITMOD(powerpc_cpu_features)
235 #else
236 DECLARE_NO_INITMOD(powerpc)
237 DECLARE_NO_INITMOD(powerpc_cpu_features)
238 #endif  // WITH_POWERPC
239 
240 #ifdef WITH_HEXAGON
241 DECLARE_LL_INITMOD(hvx_64)
242 DECLARE_LL_INITMOD(hvx_128)
243 DECLARE_CPP_INITMOD(hexagon_cpu_features)
244 #else
245 DECLARE_NO_INITMOD(hvx_64)
246 DECLARE_NO_INITMOD(hvx_128)
247 DECLARE_NO_INITMOD(hexagon_cpu_features)
248 #endif  // WITH_HEXAGON
249 
250 #ifdef WITH_WEBASSEMBLY
251 DECLARE_CPP_INITMOD(wasm_cpu_features)
252 DECLARE_LL_INITMOD(wasm_math)
253 #else
254 DECLARE_NO_INITMOD(wasm_cpu_features)
255 DECLARE_NO_INITMOD(wasm_math)
256 #endif  // WITH_WEBASSEMBLY
257 
258 #ifdef WITH_RISCV
259 //DECLARE_LL_INITMOD(riscv)
260 DECLARE_CPP_INITMOD(riscv_cpu_features)
261 #else
262 //DECLARE_NO_INITMOD(riscv)
263 DECLARE_NO_INITMOD(riscv_cpu_features)
264 #endif  // WITH_RISCV
265 
266 namespace {
267 
get_data_layout_for_target(Target target)268 llvm::DataLayout get_data_layout_for_target(Target target) {
269     if (target.arch == Target::X86) {
270         if (target.bits == 32) {
271             if (target.os == Target::OSX) {
272 #if LLVM_VERSION >= 100
273                 return llvm::DataLayout("e-m:o-p:32:32-p270:32:32-p271:32:32-p272:64:64-f64:32:64-f80:128-n8:16:32-S128");
274 #else
275                 return llvm::DataLayout("e-m:o-p:32:32-f64:32:64-f80:128-n8:16:32-S128");
276 #endif
277             } else if (target.os == Target::IOS) {
278 #if LLVM_VERSION >= 100
279                 return llvm::DataLayout("e-m:o-p:32:32-p270:32:32-p271:32:32-p272:64:64-f64:32:64-f80:128-n8:16:32-S128");
280 #else
281                 return llvm::DataLayout("e-m:o-p:32:32-f64:32:64-f80:128-n8:16:32-S128");
282 #endif
283             } else if (target.os == Target::Windows && !target.has_feature(Target::JIT)) {
284 #if LLVM_VERSION >= 100
285                 return llvm::DataLayout("e-m:x-p:32:32-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:32-n8:16:32-a:0:32-S32");
286 #else
287                 return llvm::DataLayout("e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32");
288 #endif
289             } else if (target.os == Target::Windows) {
290 #if LLVM_VERSION >= 100
291                 return llvm::DataLayout("e-m:e-p:32:32-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:32-n8:16:32-a:0:32-S32");
292 #else
293                 return llvm::DataLayout("e-m:e-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32");
294 #endif
295             } else {
296                 // Linux/Android
297 #if LLVM_VERSION >= 100
298                 return llvm::DataLayout("e-m:e-p:32:32-p270:32:32-p271:32:32-p272:64:64-f64:32:64-f80:32-n8:16:32-S128");
299 #else
300                 return llvm::DataLayout("e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128");
301 #endif
302             }
303         } else {  // 64-bit
304             if (target.os == Target::OSX) {
305 #if LLVM_VERSION >= 100
306                 return llvm::DataLayout("e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128");
307 #else
308                 return llvm::DataLayout("e-m:o-i64:64-f80:128-n8:16:32:64-S128");
309 #endif
310             } else if (target.os == Target::IOS) {
311 #if LLVM_VERSION >= 100
312                 return llvm::DataLayout("e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128");
313 #else
314                 return llvm::DataLayout("e-m:o-i64:64-f80:128-n8:16:32:64-S128");
315 #endif
316             } else if (target.os == Target::Windows && !target.has_feature(Target::JIT)) {
317 #if LLVM_VERSION >= 100
318                 return llvm::DataLayout("e-m:w-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128");
319 #else
320                 return llvm::DataLayout("e-m:w-i64:64-f80:128-n8:16:32:64-S128");
321 #endif
322             } else if (target.os == Target::Windows) {
323 #if LLVM_VERSION >= 100
324                 return llvm::DataLayout("e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128");
325 #else
326                 return llvm::DataLayout("e-m:e-i64:64-f80:128-n8:16:32:64-S128");
327 #endif
328             } else {
329 #if LLVM_VERSION >= 100
330                 return llvm::DataLayout("e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128");
331 #else
332                 return llvm::DataLayout("e-m:e-i64:64-f80:128-n8:16:32:64-S128");
333 #endif
334             }
335         }
336     } else if (target.arch == Target::ARM) {
337         if (target.bits == 32) {
338             if (target.os == Target::IOS) {
339                 return llvm::DataLayout("e-m:o-p:32:32-Fi8-f64:32:64-v64:32:64-v128:32:128-a:0:32-n32-S32");
340             } else {
341                 return llvm::DataLayout("e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64");
342             }
343         } else {  // 64-bit
344             if (target.os == Target::IOS) {
345                 return llvm::DataLayout("e-m:o-i64:64-i128:128-n32:64-S128");
346             } else if (target.os == Target::OSX) {
347                 return llvm::DataLayout("e-m:o-i64:64-i128:128-n32:64-S128");
348             } else if (target.os == Target::Windows) {
349                 return llvm::DataLayout("e-m:w-p:64:64-i32:32-i64:64-i128:128-n32:64-S128");
350             } else {
351                 return llvm::DataLayout("e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128");
352             }
353         }
354     } else if (target.arch == Target::MIPS) {
355         if (target.bits == 32) {
356             return llvm::DataLayout("e-m:m-p:32:32-i8:8:32-i16:16:32-i64:64-n32-S64");
357         } else {
358             return llvm::DataLayout("e-m:m-i8:8:32-i16:16:32-i64:64-n32:64-S128");
359         }
360     } else if (target.arch == Target::POWERPC) {
361         if (target.bits == 32) {
362             return llvm::DataLayout("e-m:e-i32:32-n32");
363         } else {
364             return llvm::DataLayout("e-m:e-i64:64-n32:64");
365         }
366     } else if (target.arch == Target::Hexagon) {
367         return llvm::DataLayout(
368             "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8"
369             "-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048");
370     } else if (target.arch == Target::WebAssembly) {
371         if (target.bits == 32) {
372             return llvm::DataLayout("e-m:e-p:32:32-i64:64-n32:64-S128");
373         } else {
374             return llvm::DataLayout("e-m:e-p:64:64-i64:64-n32:64-S128");
375         }
376     } else if (target.arch == Target::RISCV) {
377         // TODO: Valdidate this data layout is correct for RISCV. Assumption is it is like MIPS.
378         if (target.bits == 32) {
379             return llvm::DataLayout("e-m:e-p:32:32-i64:64-n32-S128");
380         } else {
381             return llvm::DataLayout("e-m:e-p:64:64-i64:64-i128:128-n64-S128");
382         }
383     } else {
384         internal_error << "Bad target arch: " << target.arch << "\n";
385         return llvm::DataLayout("unreachable");
386     }
387 }
388 
389 }  // namespace
390 
391 namespace Internal {
392 
get_triple_for_target(const Target & target)393 llvm::Triple get_triple_for_target(const Target &target) {
394     llvm::Triple triple;
395 
396     if (target.arch == Target::X86) {
397         if (target.bits == 32) {
398             triple.setArch(llvm::Triple::x86);
399         } else {
400             user_assert(target.bits == 64) << "Target must be 32- or 64-bit.\n";
401             triple.setArch(llvm::Triple::x86_64);
402         }
403 
404         if (target.os == Target::Linux) {
405             triple.setOS(llvm::Triple::Linux);
406             triple.setEnvironment(llvm::Triple::GNU);
407         } else if (target.os == Target::OSX) {
408             triple.setVendor(llvm::Triple::Apple);
409             triple.setOS(llvm::Triple::MacOSX);
410         } else if (target.os == Target::Windows) {
411             triple.setVendor(llvm::Triple::PC);
412             triple.setOS(llvm::Triple::Win32);
413             triple.setEnvironment(llvm::Triple::MSVC);
414             if (target.has_feature(Target::JIT)) {
415                 // Use ELF for jitting
416                 triple.setObjectFormat(llvm::Triple::ELF);
417             }
418         } else if (target.os == Target::Android) {
419             triple.setOS(llvm::Triple::Linux);
420             triple.setEnvironment(llvm::Triple::Android);
421         } else if (target.os == Target::IOS) {
422             // X86 on iOS for the simulator
423             triple.setVendor(llvm::Triple::Apple);
424             triple.setOS(llvm::Triple::IOS);
425         } else if (target.os == Target::Fuchsia) {
426             triple.setOS(llvm::Triple::Fuchsia);
427         }
428     } else if (target.arch == Target::ARM) {
429         if (target.bits == 32) {
430             if (target.has_feature(Target::ARMv7s)) {
431                 triple.setArchName("armv7s");
432             } else {
433                 triple.setArch(llvm::Triple::arm);
434             }
435         } else {
436             user_assert(target.bits == 64) << "Target bits must be 32 or 64\n";
437 #ifdef WITH_AARCH64
438             triple.setArch(llvm::Triple::aarch64);
439 #else
440             user_error << "AArch64 llvm target not enabled in this build of Halide\n";
441 #endif
442         }
443 
444         if (target.os == Target::Android) {
445             triple.setOS(llvm::Triple::Linux);
446             triple.setEnvironment(llvm::Triple::EABI);
447         } else if (target.os == Target::IOS) {
448             triple.setOS(llvm::Triple::IOS);
449             triple.setVendor(llvm::Triple::Apple);
450         } else if (target.os == Target::Linux) {
451             triple.setOS(llvm::Triple::Linux);
452             triple.setEnvironment(llvm::Triple::GNUEABIHF);
453         } else if (target.os == Target::Windows) {
454             user_assert(target.bits == 64) << "Windows ARM targets must be 64-bit.\n";
455             triple.setVendor(llvm::Triple::PC);
456             triple.setOS(llvm::Triple::Win32);
457             triple.setEnvironment(llvm::Triple::MSVC);
458             if (target.has_feature(Target::JIT)) {
459                 // TODO(shoaibkamil): figure out a way to test this.
460                 // Currently blocked by https://github.com/halide/Halide/issues/5040
461                 user_error << "No JIT support for this OS/CPU combination yet.\n";
462             }
463         } else if (target.os == Target::Fuchsia) {
464             triple.setOS(llvm::Triple::Fuchsia);
465         } else if (target.os == Target::OSX) {
466             triple.setVendor(llvm::Triple::Apple);
467             triple.setOS(llvm::Triple::MacOSX);
468             triple.setArchName("arm64");
469         } else if (target.os == Target::NoOS) {
470             // For bare-metal environments
471 
472         } else {
473             user_error << "No arm support for this OS\n";
474         }
475     } else if (target.arch == Target::MIPS) {
476         // Currently MIPS support is only little-endian.
477         if (target.bits == 32) {
478             triple.setArch(llvm::Triple::mipsel);
479         } else {
480             user_assert(target.bits == 64) << "Target must be 32- or 64-bit.\n";
481             triple.setArch(llvm::Triple::mips64el);
482         }
483 
484         if (target.os == Target::Android) {
485             triple.setOS(llvm::Triple::Linux);
486             triple.setEnvironment(llvm::Triple::Android);
487         } else {
488             user_error << "No mips support for this OS\n";
489         }
490     } else if (target.arch == Target::POWERPC) {
491 #ifdef WITH_POWERPC
492         // Only ppc*-unknown-linux-gnu are supported for the time being.
493         user_assert(target.os == Target::Linux) << "PowerPC target is Linux-only.\n";
494         triple.setVendor(llvm::Triple::UnknownVendor);
495         triple.setOS(llvm::Triple::Linux);
496         triple.setEnvironment(llvm::Triple::GNU);
497         if (target.bits == 32) {
498             triple.setArch(llvm::Triple::ppc);
499         } else {
500             // Currently POWERPC64 support is only little-endian.
501             user_assert(target.bits == 64) << "Target must be 32- or 64-bit.\n";
502             triple.setArch(llvm::Triple::ppc64le);
503         }
504 #else
505         user_error << "PowerPC llvm target not enabled in this build of Halide\n";
506 #endif
507     } else if (target.arch == Target::Hexagon) {
508         triple.setVendor(llvm::Triple::UnknownVendor);
509         triple.setArch(llvm::Triple::hexagon);
510         triple.setObjectFormat(llvm::Triple::ELF);
511     } else if (target.arch == Target::WebAssembly) {
512         triple.setVendor(llvm::Triple::UnknownVendor);
513         if (target.bits == 32) {
514             triple.setArch(llvm::Triple::wasm32);
515         } else {
516             triple.setArch(llvm::Triple::wasm64);
517         }
518         triple.setObjectFormat(llvm::Triple::Wasm);
519     } else if (target.arch == Target::RISCV) {
520         if (target.bits == 32) {
521             triple.setArch(llvm::Triple::riscv32);
522         } else {
523             user_assert(target.bits == 64) << "Target must be 32- or 64-bit.\n";
524             triple.setArch(llvm::Triple::riscv64);
525         }
526 
527         if (target.os == Target::Linux) {
528             triple.setOS(llvm::Triple::Linux);
529             // TODO: Check what options there are here.
530             triple.setEnvironment(llvm::Triple::GNUEABIHF);
531         } else if (target.os == Target::NoOS) {
532             // for baremetal environment
533         } else {
534             user_error << "No RISCV support for this OS\n";
535         }
536     } else {
537         internal_error << "Bad target arch: " << target.arch << "\n";
538     }
539 
540     return triple;
541 }
542 
543 }  // namespace Internal
544 
545 namespace {
546 
convert_weak_to_linkonce(llvm::GlobalValue & gv)547 void convert_weak_to_linkonce(llvm::GlobalValue &gv) {
548     llvm::GlobalValue::LinkageTypes linkage = gv.getLinkage();
549     if (linkage == llvm::GlobalValue::WeakAnyLinkage) {
550         gv.setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage);
551     } else if (linkage == llvm::GlobalValue::WeakODRLinkage) {
552         gv.setLinkage(llvm::GlobalValue::LinkOnceODRLinkage);
553     } else if (linkage == llvm::GlobalValue::ExternalWeakLinkage) {
554         gv.setLinkage(llvm::GlobalValue::ExternalLinkage);
555     }
556 }
557 
558 // Link all modules together and with the result in modules[0], all
559 // other input modules are destroyed. Sets the datalayout and target
560 // triple appropriately for the target.
link_modules(std::vector<std::unique_ptr<llvm::Module>> & modules,Target t,bool allow_stripping_all_weak_functions=false)561 void link_modules(std::vector<std::unique_ptr<llvm::Module>> &modules, Target t,
562                   bool allow_stripping_all_weak_functions = false) {
563     llvm::DataLayout data_layout = get_data_layout_for_target(t);
564     llvm::Triple triple = Internal::get_triple_for_target(t);
565 
566     // Set the layout and triple on the modules before linking, so
567     // llvm doesn't complain while combining them.
568     for (size_t i = 0; i < modules.size(); i++) {
569         if (t.os == Target::Windows &&
570             !Internal::starts_with(modules[i]->getName().str(), "windows_")) {
571             // When compiling for windows, all wchars are
572             // 16-bit. Generic modules may have it set to 32-bit. Drop
573             // any module flags on the generic modules and use the
574             // more correct ones on the windows-specific modules to
575             // avoid a conflict. This is safe as long as the generic
576             // modules never actually use a wchar.
577             if (auto *module_flags = modules[i]->getModuleFlagsMetadata()) {
578                 modules[i]->eraseNamedMetadata(module_flags);
579             }
580         }
581         modules[i]->setDataLayout(data_layout);
582         modules[i]->setTargetTriple(triple.str());
583     }
584 
585     // Link them all together
586     for (size_t i = 1; i < modules.size(); i++) {
587         bool failed = llvm::Linker::linkModules(*modules[0],
588                                                 std::move(modules[i]));
589         if (failed) {
590             internal_error << "Failure linking initial modules\n";
591         }
592     }
593 
594     // Now re-mark most weak symbols as linkonce. They are only weak to
595     // prevent llvm from stripping them during initial module
596     // assembly. This means they can be stripped later.
597 
598     // The symbols that we might want to call as a user even if not
599     // used in the Halide-generated code must remain weak. This is
600     // handled automatically by assuming any symbol starting with
601     // "halide_" that is weak will be retained. There are a few
602     // symbols for which this convention is not followed and these are
603     // in this set.
604     const std::set<string> retain = {"__stack_chk_guard",
605                                      "__stack_chk_fail"};
606 
607     // Enumerate the global variables.
608     for (auto &gv : modules[0]->globals()) {
609         // No variables are part of the public interface (even the ones labelled halide_)
610         convert_weak_to_linkonce(gv);
611     }
612 
613     // Enumerate the functions.
614     for (auto &f : *modules[0]) {
615         const std::string f_name = Internal::get_llvm_function_name(f);
616 
617         bool is_halide_extern_c_sym = Internal::starts_with(f_name, "halide_");
618         internal_assert(!is_halide_extern_c_sym || f.isWeakForLinker() || f.isDeclaration())
619             << " for function " << f_name << "\n";
620 
621         // We never want *any* Function marked as external-weak here;
622         // convert all of those to plain external.
623         if (f.getLinkage() == llvm::GlobalValue::ExternalWeakLinkage) {
624             f.setLinkage(llvm::GlobalValue::ExternalLinkage);
625         } else {
626             const bool can_strip = !is_halide_extern_c_sym && retain.count(f_name) == 0;
627             if (can_strip || allow_stripping_all_weak_functions) {
628                 convert_weak_to_linkonce(f);
629             }
630         }
631 
632         // Windows requires every symbol that's going to get merged
633         // has a comdat that specifies how. The linkage type alone
634         // isn't enough.
635         if (t.os == Target::Windows && f.isWeakForLinker()) {
636             llvm::Comdat *comdat = modules[0]->getOrInsertComdat(f_name);
637             comdat->setSelectionKind(llvm::Comdat::Any);
638             f.setComdat(comdat);
639         }
640     }
641 
642     // Now remove the force-usage global that prevented clang from
643     // dropping functions from the initial module.
644     llvm::GlobalValue *llvm_used = modules[0]->getNamedGlobal("llvm.used");
645     if (llvm_used) {
646         llvm_used->eraseFromParent();
647     }
648 
649     // Also drop the dummy runtime api usage. We only needed it so
650     // that the declarations are retained in the module during the
651     // linking procedure above.
652     llvm::GlobalValue *runtime_api =
653         modules[0]->getNamedGlobal("halide_runtime_api_functions");
654     if (runtime_api) {
655         runtime_api->eraseFromParent();
656     }
657 }
658 
659 }  // namespace
660 
661 namespace Internal {
662 
663 /** When JIT-compiling on 32-bit windows, we need to rewrite calls
664  *  to name-mangled win32 api calls to non-name-mangled versions.
665  */
undo_win32_name_mangling(llvm::Module * m)666 void undo_win32_name_mangling(llvm::Module *m) {
667     llvm::IRBuilder<> builder(m->getContext());
668     // For every function prototype...
669     for (llvm::Module::iterator iter = m->begin(); iter != m->end(); ++iter) {
670         llvm::Function &f = *iter;
671         string n = get_llvm_function_name(f);
672         // if it's a __stdcall call that starts with \01_, then we're making a win32 api call
673         if (f.getCallingConv() == llvm::CallingConv::X86_StdCall &&
674             f.empty() &&
675             n.size() > 2 && n[0] == 1 && n[1] == '_') {
676 
677             // Unmangle the name.
678             string unmangled_name = n.substr(2);
679             size_t at = unmangled_name.rfind('@');
680             unmangled_name = unmangled_name.substr(0, at);
681 
682             // Extern declare the unmangled version.
683             llvm::Function *unmangled = llvm::Function::Create(f.getFunctionType(), f.getLinkage(), unmangled_name, m);
684             unmangled->setCallingConv(f.getCallingConv());
685 
686             // Add a body to the mangled version that calls the unmangled version.
687             llvm::BasicBlock *block = llvm::BasicBlock::Create(m->getContext(), "entry", &f);
688             builder.SetInsertPoint(block);
689 
690             vector<llvm::Value *> args;
691             for (auto &arg : f.args()) {
692                 args.push_back(&arg);
693             }
694 
695             llvm::CallInst *c = builder.CreateCall(unmangled, args);
696             c->setCallingConv(f.getCallingConv());
697 
698             if (f.getReturnType()->isVoidTy()) {
699                 builder.CreateRetVoid();
700             } else {
701                 builder.CreateRet(c);
702             }
703         }
704     }
705 }
706 
add_underscore_to_posix_call(llvm::CallInst * call,llvm::Function * fn,llvm::Module * m)707 void add_underscore_to_posix_call(llvm::CallInst *call, llvm::Function *fn, llvm::Module *m) {
708     string new_name = "_" + fn->getName().str();
709     llvm::Function *alt = m->getFunction(new_name);
710     if (!alt) {
711         alt = llvm::Function::Create(fn->getFunctionType(),
712                                      llvm::GlobalValue::ExternalLinkage,
713                                      new_name, m);
714     }
715     internal_assert(alt->getName() == new_name);
716     call->setCalledFunction(alt);
717 }
718 
719 /** Windows uses _close, _open, _write, etc instead of the posix
720  * names. Defining stubs that redirect causes mis-compilations inside
721  * of mcjit, so we just rewrite uses of these functions to include an
722  * underscore. */
add_underscores_to_posix_calls_on_windows(llvm::Module * m)723 void add_underscores_to_posix_calls_on_windows(llvm::Module *m) {
724     string posix_fns[] = {"vsnprintf", "open", "close", "write", "fileno"};
725 
726     string *posix_fns_begin = posix_fns;
727     string *posix_fns_end = posix_fns + sizeof(posix_fns) / sizeof(posix_fns[0]);
728 
729     for (auto &fn : *m) {
730         for (auto &basic_block : fn) {
731             for (auto &instruction : basic_block) {
732                 if (llvm::CallInst *call = llvm::dyn_cast<llvm::CallInst>(&instruction)) {
733                     if (llvm::Function *called_fn = call->getCalledFunction()) {
734                         if (std::find(posix_fns_begin, posix_fns_end, called_fn->getName()) != posix_fns_end) {
735                             add_underscore_to_posix_call(call, called_fn, m);
736                         }
737                     }
738                 }
739             }
740         }
741     }
742 }
743 
link_with_wasm_jit_runtime(llvm::LLVMContext * c,const Target & t,std::unique_ptr<llvm::Module> extra_module)744 std::unique_ptr<llvm::Module> link_with_wasm_jit_runtime(llvm::LLVMContext *c, const Target &t,
745                                                          std::unique_ptr<llvm::Module> extra_module) {
746     bool bits_64 = (t.bits == 64);
747     bool debug = t.has_feature(Target::Debug);
748 
749     // We only need to include things that must be linked in as callable entrypoints;
750     // things that are 'alwaysinline' can be included here but are unnecessary.
751     vector<std::unique_ptr<llvm::Module>> modules;
752     modules.push_back(std::move(extra_module));
753     modules.push_back(get_initmod_fake_thread_pool(c, bits_64, debug));
754     modules.push_back(get_initmod_posix_allocator(c, bits_64, debug));
755     modules.push_back(get_initmod_halide_buffer_t(c, bits_64, debug));
756     modules.push_back(get_initmod_destructors(c, bits_64, debug));
757     // These two aren't necessary, since they are 100% alwaysinline
758     // modules.push_back(get_initmod_posix_math_ll(c));
759     // modules.push_back(get_initmod_wasm_math_ll(c));
760     modules.push_back(get_initmod_tracing(c, bits_64, debug));
761     modules.push_back(get_initmod_cache(c, bits_64, debug));
762     modules.push_back(get_initmod_to_string(c, bits_64, debug));
763     modules.push_back(get_initmod_alignment_32(c, bits_64, debug));
764     modules.push_back(get_initmod_device_interface(c, bits_64, debug));
765     modules.push_back(get_initmod_metadata(c, bits_64, debug));
766     modules.push_back(get_initmod_float16_t(c, bits_64, debug));
767     modules.push_back(get_initmod_errors(c, bits_64, debug));
768     modules.push_back(get_initmod_posix_abort(c, bits_64, debug));
769     modules.push_back(get_initmod_msan_stubs(c, bits_64, debug));
770 
771     // We don't want anything marked as weak for the wasm-jit runtime,
772     // so convert all of them to linkonce
773     constexpr bool allow_stripping_all_weak_functions = true;
774     link_modules(modules, t, allow_stripping_all_weak_functions);
775 
776     return std::move(modules[0]);
777 }
778 
779 /** Create an llvm module containing the support code for a given target. */
get_initial_module_for_target(Target t,llvm::LLVMContext * c,bool for_shared_jit_runtime,bool just_gpu)780 std::unique_ptr<llvm::Module> get_initial_module_for_target(Target t, llvm::LLVMContext *c, bool for_shared_jit_runtime, bool just_gpu) {
781     enum InitialModuleType {
782         ModuleAOT,
783         ModuleAOTNoRuntime,
784         ModuleJITShared,
785         ModuleJITInlined,
786         ModuleGPU
787     } module_type;
788 
789     if (t.has_feature(Target::JIT)) {
790         if (just_gpu) {
791             module_type = ModuleGPU;
792         } else if (for_shared_jit_runtime) {
793             module_type = ModuleJITShared;
794         } else {
795             module_type = ModuleJITInlined;
796         }
797     } else if (t.has_feature(Target::NoRuntime)) {
798         module_type = ModuleAOTNoRuntime;
799     } else {
800         module_type = ModuleAOT;
801     }
802 
803     //    Halide::Internal::debug(0) << "Getting initial module type " << (int)module_type << "\n";
804 
805     internal_assert(t.bits == 32 || t.bits == 64)
806         << "Bad target: " << t.to_string();
807     bool bits_64 = (t.bits == 64);
808     bool debug = t.has_feature(Target::Debug);
809     bool tsan = t.has_feature(Target::TSAN);
810 
811     vector<std::unique_ptr<llvm::Module>> modules;
812 
813     if (module_type != ModuleGPU) {
814         if (module_type != ModuleJITInlined && module_type != ModuleAOTNoRuntime) {
815             // Windows has a unique abort, but everyone else uses POSIX
816             if (t.os == Target::Windows) {
817                 modules.push_back(get_initmod_windows_abort(c, bits_64, debug));
818             } else {
819                 modules.push_back(get_initmod_posix_abort(c, bits_64, debug));
820             }
821 
822             // OS-dependent modules
823             if (t.os == Target::Linux) {
824                 modules.push_back(get_initmod_posix_allocator(c, bits_64, debug));
825                 modules.push_back(get_initmod_posix_error_handler(c, bits_64, debug));
826                 modules.push_back(get_initmod_posix_print(c, bits_64, debug));
827                 if (t.arch == Target::X86) {
828                     modules.push_back(get_initmod_linux_clock(c, bits_64, debug));
829                 } else {
830                     modules.push_back(get_initmod_posix_clock(c, bits_64, debug));
831                 }
832                 modules.push_back(get_initmod_posix_io(c, bits_64, debug));
833                 modules.push_back(get_initmod_linux_host_cpu_count(c, bits_64, debug));
834                 modules.push_back(get_initmod_linux_yield(c, bits_64, debug));
835                 if (tsan) {
836                     modules.push_back(get_initmod_posix_threads_tsan(c, bits_64, debug));
837                 } else {
838                     modules.push_back(get_initmod_posix_threads(c, bits_64, debug));
839                 }
840                 modules.push_back(get_initmod_posix_get_symbol(c, bits_64, debug));
841             } else if (t.os == Target::WebAssemblyRuntime) {
842                 modules.push_back(get_initmod_posix_allocator(c, bits_64, debug));
843                 modules.push_back(get_initmod_posix_error_handler(c, bits_64, debug));
844                 modules.push_back(get_initmod_posix_print(c, bits_64, debug));
845                 modules.push_back(get_initmod_posix_clock(c, bits_64, debug));
846                 modules.push_back(get_initmod_posix_io(c, bits_64, debug));
847                 modules.push_back(get_initmod_linux_host_cpu_count(c, bits_64, debug));
848                 modules.push_back(get_initmod_linux_yield(c, bits_64, debug));
849                 modules.push_back(get_initmod_fake_thread_pool(c, bits_64, debug));
850                 modules.push_back(get_initmod_fake_get_symbol(c, bits_64, debug));
851             } else if (t.os == Target::OSX) {
852                 modules.push_back(get_initmod_posix_allocator(c, bits_64, debug));
853                 modules.push_back(get_initmod_posix_error_handler(c, bits_64, debug));
854                 modules.push_back(get_initmod_posix_print(c, bits_64, debug));
855                 modules.push_back(get_initmod_osx_clock(c, bits_64, debug));
856                 modules.push_back(get_initmod_posix_io(c, bits_64, debug));
857                 modules.push_back(get_initmod_osx_host_cpu_count(c, bits_64, debug));
858                 modules.push_back(get_initmod_osx_yield(c, bits_64, debug));
859                 if (tsan) {
860                     modules.push_back(get_initmod_posix_threads_tsan(c, bits_64, debug));
861                 } else {
862                     modules.push_back(get_initmod_posix_threads(c, bits_64, debug));
863                 }
864                 modules.push_back(get_initmod_osx_get_symbol(c, bits_64, debug));
865                 modules.push_back(get_initmod_osx_host_cpu_count(c, bits_64, debug));
866             } else if (t.os == Target::Android) {
867                 modules.push_back(get_initmod_posix_allocator(c, bits_64, debug));
868                 modules.push_back(get_initmod_posix_error_handler(c, bits_64, debug));
869                 modules.push_back(get_initmod_posix_print(c, bits_64, debug));
870                 if (t.arch == Target::ARM) {
871                     modules.push_back(get_initmod_android_clock(c, bits_64, debug));
872                 } else {
873                     modules.push_back(get_initmod_posix_clock(c, bits_64, debug));
874                 }
875                 modules.push_back(get_initmod_android_io(c, bits_64, debug));
876                 modules.push_back(get_initmod_android_host_cpu_count(c, bits_64, debug));
877                 modules.push_back(get_initmod_linux_yield(c, bits_64, debug));  // TODO: verify
878                 if (tsan) {
879                     modules.push_back(get_initmod_posix_threads_tsan(c, bits_64, debug));
880                 } else {
881                     modules.push_back(get_initmod_posix_threads(c, bits_64, debug));
882                 }
883                 modules.push_back(get_initmod_posix_get_symbol(c, bits_64, debug));
884             } else if (t.os == Target::Windows) {
885                 modules.push_back(get_initmod_posix_allocator(c, bits_64, debug));
886                 modules.push_back(get_initmod_posix_error_handler(c, bits_64, debug));
887                 modules.push_back(get_initmod_posix_print(c, bits_64, debug));
888                 modules.push_back(get_initmod_windows_clock(c, bits_64, debug));
889                 modules.push_back(get_initmod_windows_io(c, bits_64, debug));
890                 modules.push_back(get_initmod_windows_yield(c, bits_64, debug));
891                 if (tsan) {
892                     modules.push_back(get_initmod_windows_threads_tsan(c, bits_64, debug));
893                 } else {
894                     modules.push_back(get_initmod_windows_threads(c, bits_64, debug));
895                 }
896                 modules.push_back(get_initmod_windows_get_symbol(c, bits_64, debug));
897             } else if (t.os == Target::IOS) {
898                 modules.push_back(get_initmod_posix_allocator(c, bits_64, debug));
899                 modules.push_back(get_initmod_posix_error_handler(c, bits_64, debug));
900                 modules.push_back(get_initmod_posix_print(c, bits_64, debug));
901                 modules.push_back(get_initmod_posix_clock(c, bits_64, debug));
902                 modules.push_back(get_initmod_ios_io(c, bits_64, debug));
903                 modules.push_back(get_initmod_osx_host_cpu_count(c, bits_64, debug));
904                 modules.push_back(get_initmod_osx_yield(c, bits_64, debug));
905                 if (tsan) {
906                     modules.push_back(get_initmod_posix_threads_tsan(c, bits_64, debug));
907                 } else {
908                     modules.push_back(get_initmod_posix_threads(c, bits_64, debug));
909                 }
910             } else if (t.os == Target::QuRT) {
911                 modules.push_back(get_initmod_qurt_allocator(c, bits_64, debug));
912                 modules.push_back(get_initmod_qurt_yield(c, bits_64, debug));
913                 if (tsan) {
914                     modules.push_back(get_initmod_qurt_threads_tsan(c, bits_64, debug));
915                 } else {
916                     modules.push_back(get_initmod_qurt_threads(c, bits_64, debug));
917                 }
918                 modules.push_back(get_initmod_qurt_init_fini(c, bits_64, debug));
919             } else if (t.os == Target::NoOS) {
920                 // The OS-specific symbols provided by the modules
921                 // above are expected to be provided by the containing
922                 // process instead at link time. Less aggressive than
923                 // NoRuntime, as OS-agnostic modules like tracing are
924                 // still included below.
925                 if (t.arch == Target::Hexagon) {
926                     modules.push_back(get_initmod_qurt_allocator(c, bits_64, debug));
927                 }
928                 modules.push_back(get_initmod_fake_thread_pool(c, bits_64, debug));
929             } else if (t.os == Target::Fuchsia) {
930                 modules.push_back(get_initmod_posix_allocator(c, bits_64, debug));
931                 modules.push_back(get_initmod_posix_error_handler(c, bits_64, debug));
932                 modules.push_back(get_initmod_posix_print(c, bits_64, debug));
933                 modules.push_back(get_initmod_fuchsia_clock(c, bits_64, debug));
934                 modules.push_back(get_initmod_posix_io(c, bits_64, debug));
935                 modules.push_back(get_initmod_fuchsia_host_cpu_count(c, bits_64, debug));
936                 modules.push_back(get_initmod_fuchsia_yield(c, bits_64, debug));
937                 if (tsan) {
938                     modules.push_back(get_initmod_posix_threads_tsan(c, bits_64, debug));
939                 } else {
940                     modules.push_back(get_initmod_posix_threads(c, bits_64, debug));
941                 }
942                 modules.push_back(get_initmod_posix_get_symbol(c, bits_64, debug));
943             }
944         }
945 
946         if (module_type != ModuleJITShared) {
947             // The first module for inline only case has to be C/C++ compiled otherwise the
948             // datalayout is not properly setup.
949             modules.push_back(get_initmod_halide_buffer_t(c, bits_64, debug));
950             modules.push_back(get_initmod_destructors(c, bits_64, debug));
951             modules.push_back(get_initmod_pseudostack(c, bits_64, debug));
952             // Math intrinsics vary slightly across platforms
953             if (t.os == Target::Windows) {
954                 if (t.bits == 32) {
955                     modules.push_back(get_initmod_win32_math_ll(c));
956                 } else {
957                     modules.push_back(get_initmod_posix_math_ll(c));
958                 }
959             } else {
960                 modules.push_back(get_initmod_posix_math_ll(c));
961             }
962         }
963 
964         if (module_type != ModuleJITInlined && module_type != ModuleAOTNoRuntime) {
965             // These modules are always used and shared
966             modules.push_back(get_initmod_gpu_device_selection(c, bits_64, debug));
967             if (t.arch != Target::Hexagon) {
968                 // These modules don't behave correctly on a real
969                 // Hexagon device (they do work in the simulator
970                 // though...).
971                 modules.push_back(get_initmod_tracing(c, bits_64, debug));
972                 modules.push_back(get_initmod_trace_helper(c, bits_64, debug));
973                 modules.push_back(get_initmod_write_debug_image(c, bits_64, debug));
974 
975                 // TODO: Support this module in the Hexagon backend,
976                 // currently generates assert at src/HexagonOffload.cpp:279
977                 modules.push_back(get_initmod_cache(c, bits_64, debug));
978             }
979             modules.push_back(get_initmod_to_string(c, bits_64, debug));
980 
981             if (t.arch == Target::Hexagon ||
982                 t.has_feature(Target::HVX_64) ||
983                 t.has_feature(Target::HVX_128)) {
984                 modules.push_back(get_initmod_alignment_128(c, bits_64, debug));
985             } else if (t.arch == Target::X86) {
986                 // AVX-512 requires 64-byte alignment. Could only increase alignment
987                 // if AVX-512 is in the target, but that falls afoul of linking
988                 // multiple versions of a filter for different levels of x86 -- weak
989                 // linking will pick one of the alignment modules unpredictably.
990                 // Another way to go is to query the CPU features and align by
991                 // 64 oonly if the procesor has AVX-512.
992                 // The choice to go 64 all the time is for simplicity and on the idea
993                 // that it won't be a noticeable cost in the majority of x86 usage.
994                 modules.push_back(get_initmod_alignment_64(c, bits_64, debug));
995             } else {
996                 modules.push_back(get_initmod_alignment_32(c, bits_64, debug));
997             }
998 
999             modules.push_back(get_initmod_allocation_cache(c, bits_64, debug));
1000             modules.push_back(get_initmod_device_interface(c, bits_64, debug));
1001             modules.push_back(get_initmod_metadata(c, bits_64, debug));
1002             modules.push_back(get_initmod_float16_t(c, bits_64, debug));
1003             modules.push_back(get_initmod_errors(c, bits_64, debug));
1004 
1005             // Some environments don't support the atomics the profiler requires.
1006             if (t.arch != Target::MIPS && t.os != Target::NoOS && t.os != Target::QuRT) {
1007                 if (t.os == Target::Windows) {
1008                     modules.push_back(get_initmod_windows_profiler(c, bits_64, debug));
1009                 } else {
1010                     modules.push_back(get_initmod_profiler(c, bits_64, debug));
1011                 }
1012             }
1013 
1014             if (t.has_feature(Target::MSAN)) {
1015                 modules.push_back(get_initmod_msan(c, bits_64, debug));
1016             } else {
1017                 modules.push_back(get_initmod_msan_stubs(c, bits_64, debug));
1018             }
1019         }
1020 
1021         if (module_type != ModuleJITShared) {
1022             // These modules are optional
1023             if (t.arch == Target::X86) {
1024                 modules.push_back(get_initmod_x86_ll(c));
1025             }
1026             if (t.arch == Target::ARM) {
1027                 if (t.bits == 64) {
1028                     modules.push_back(get_initmod_aarch64_ll(c));
1029                 } else if (t.has_feature(Target::ARMv7s)) {
1030                     modules.push_back(get_initmod_arm_ll(c));
1031                 } else if (!t.has_feature(Target::NoNEON)) {
1032                     modules.push_back(get_initmod_arm_ll(c));
1033                 } else {
1034                     modules.push_back(get_initmod_arm_no_neon_ll(c));
1035                 }
1036             }
1037             if (t.arch == Target::MIPS) {
1038                 modules.push_back(get_initmod_mips_ll(c));
1039             }
1040             if (t.arch == Target::POWERPC) {
1041                 modules.push_back(get_initmod_powerpc_ll(c));
1042             }
1043             if (t.arch == Target::Hexagon) {
1044                 modules.push_back(get_initmod_qurt_hvx(c, bits_64, debug));
1045                 if (t.has_feature(Target::HVX_64)) {
1046                     modules.push_back(get_initmod_hvx_64_ll(c));
1047                 } else if (t.has_feature(Target::HVX_128)) {
1048                     modules.push_back(get_initmod_hvx_128_ll(c));
1049                 }
1050                 if (t.features_any_of({Target::HVX_v65, Target::HVX_v66})) {
1051                     modules.push_back(get_initmod_qurt_hvx_vtcm(c, bits_64,
1052                                                                 debug));
1053                 }
1054 
1055             } else {
1056                 modules.push_back(get_initmod_prefetch(c, bits_64, debug));
1057             }
1058             if (t.has_feature(Target::SSE41)) {
1059                 modules.push_back(get_initmod_x86_sse41_ll(c));
1060             }
1061             if (t.has_feature(Target::AVX)) {
1062                 modules.push_back(get_initmod_x86_avx_ll(c));
1063             }
1064             if (t.has_feature(Target::AVX2)) {
1065                 modules.push_back(get_initmod_x86_avx2_ll(c));
1066             }
1067             if (t.has_feature(Target::Profile)) {
1068                 user_assert(t.os != Target::WebAssemblyRuntime) << "The profiler cannot be used in a threadless environment.";
1069                 modules.push_back(get_initmod_profiler_inlined(c, bits_64, debug));
1070             }
1071             if (t.arch == Target::WebAssembly) {
1072                 modules.push_back(get_initmod_wasm_math_ll(c));
1073             }
1074         }
1075 
1076         if (module_type == ModuleAOT) {
1077             // These modules are only used for AOT compilation
1078             modules.push_back(get_initmod_can_use_target(c, bits_64, debug));
1079             if (t.arch == Target::X86) {
1080                 modules.push_back(get_initmod_x86_cpu_features(c, bits_64, debug));
1081             }
1082             if (t.arch == Target::ARM) {
1083                 if (t.bits == 64) {
1084                     modules.push_back(get_initmod_aarch64_cpu_features(c, bits_64, debug));
1085                 } else {
1086                     modules.push_back(get_initmod_arm_cpu_features(c, bits_64, debug));
1087                 }
1088             }
1089             if (t.arch == Target::MIPS) {
1090                 modules.push_back(get_initmod_mips_cpu_features(c, bits_64, debug));
1091             }
1092             if (t.arch == Target::POWERPC) {
1093                 modules.push_back(get_initmod_powerpc_cpu_features(c, bits_64, debug));
1094             }
1095             if (t.arch == Target::Hexagon) {
1096                 modules.push_back(get_initmod_hexagon_cpu_features(c, bits_64, debug));
1097             }
1098             if (t.arch == Target::RISCV) {
1099                 modules.push_back(get_initmod_riscv_cpu_features(c, bits_64, debug));
1100             }
1101             if (t.arch == Target::WebAssembly) {
1102                 modules.push_back(get_initmod_wasm_cpu_features(c, bits_64, debug));
1103             }
1104         }
1105     }
1106 
1107     if (module_type == ModuleJITShared || module_type == ModuleGPU) {
1108         modules.push_back(get_initmod_module_jit_ref_count(c, bits_64, debug));
1109     } else if (module_type == ModuleAOT) {
1110         modules.push_back(get_initmod_module_aot_ref_count(c, bits_64, debug));
1111     }
1112 
1113     if (module_type == ModuleAOT || module_type == ModuleGPU) {
1114         if (t.has_feature(Target::CUDA)) {
1115             if (t.os == Target::Windows) {
1116                 modules.push_back(get_initmod_windows_cuda(c, bits_64, debug));
1117             } else {
1118                 modules.push_back(get_initmod_cuda(c, bits_64, debug));
1119             }
1120         }
1121         if (t.has_feature(Target::OpenCL)) {
1122             if (t.os == Target::Windows) {
1123                 modules.push_back(get_initmod_windows_opencl(c, bits_64, debug));
1124             } else {
1125                 modules.push_back(get_initmod_opencl(c, bits_64, debug));
1126             }
1127         }
1128         if (t.has_feature(Target::OpenGL)) {
1129             modules.push_back(get_initmod_opengl(c, bits_64, debug));
1130             if (t.os == Target::Linux) {
1131                 if (t.has_feature(Target::EGL)) {
1132                     modules.push_back(get_initmod_opengl_egl_context(c, bits_64, debug));
1133                 } else {
1134                     modules.push_back(get_initmod_opengl_glx_context(c, bits_64, debug));
1135                 }
1136             } else if (t.os == Target::OSX) {
1137                 modules.push_back(get_initmod_osx_opengl_context(c, bits_64, debug));
1138             } else if (t.os == Target::Android) {
1139                 modules.push_back(get_initmod_opengl_egl_context(c, bits_64, debug));
1140             } else {
1141                 // You're on your own to provide definitions of halide_opengl_get_proc_address and halide_opengl_create_context
1142             }
1143         }
1144         if (t.has_feature(Target::OpenGLCompute)) {
1145             modules.push_back(get_initmod_openglcompute(c, bits_64, debug));
1146             if (t.os == Target::Android) {
1147                 // Only platform that supports OpenGL Compute for now.
1148                 modules.push_back(get_initmod_opengl_egl_context(c, bits_64, debug));
1149             } else if (t.os == Target::Linux) {
1150                 if (t.has_feature(Target::EGL)) {
1151                     modules.push_back(get_initmod_opengl_egl_context(c, bits_64, debug));
1152                 } else {
1153                     modules.push_back(get_initmod_opengl_glx_context(c, bits_64, debug));
1154                 }
1155             } else if (t.os == Target::OSX) {
1156                 modules.push_back(get_initmod_osx_opengl_context(c, bits_64, debug));
1157             } else {
1158                 // You're on your own to provide definitions of halide_opengl_get_proc_address and halide_opengl_create_context
1159             }
1160         }
1161         if (t.has_feature(Target::Metal)) {
1162             modules.push_back(get_initmod_metal(c, bits_64, debug));
1163             if (t.arch == Target::ARM) {
1164                 modules.push_back(get_initmod_metal_objc_arm(c, bits_64, debug));
1165             } else if (t.arch == Target::X86) {
1166                 modules.push_back(get_initmod_metal_objc_x86(c, bits_64, debug));
1167             } else {
1168                 user_error << "Metal can only be used on ARM or X86 architectures.\n";
1169             }
1170         }
1171         if (t.has_feature(Target::D3D12Compute)) {
1172             user_assert(bits_64) << "D3D12Compute target only available on 64-bit targets for now.\n";
1173             user_assert(t.os == Target::Windows) << "D3D12Compute target only available on Windows targets.\n";
1174             modules.push_back(get_initmod_windows_d3d12compute_x86(c, bits_64, debug));
1175         }
1176         if (t.arch != Target::Hexagon && t.features_any_of({Target::HVX_64, Target::HVX_128})) {
1177             modules.push_back(get_initmod_module_jit_ref_count(c, bits_64, debug));
1178             modules.push_back(get_initmod_hexagon_host(c, bits_64, debug));
1179         }
1180         if (t.has_feature(Target::HexagonDma)) {
1181             modules.push_back(get_initmod_hexagon_cache_allocator(c, bits_64, debug));
1182             modules.push_back(get_initmod_hexagon_dma(c, bits_64, debug));
1183             modules.push_back(get_initmod_hexagon_dma_pool(c, bits_64, debug));
1184         }
1185     }
1186 
1187     if (module_type == ModuleAOT && t.has_feature(Target::Matlab)) {
1188         modules.push_back(get_initmod_matlab(c, bits_64, debug));
1189     }
1190 
1191     if (module_type == ModuleAOTNoRuntime ||
1192         module_type == ModuleJITInlined ||
1193         t.os == Target::NoOS) {
1194         modules.push_back(get_initmod_runtime_api(c, bits_64, debug));
1195     }
1196 
1197     link_modules(modules, t);
1198 
1199     if (t.os == Target::Windows &&
1200         t.bits == 32 &&
1201         (t.has_feature(Target::JIT))) {
1202         undo_win32_name_mangling(modules[0].get());
1203     }
1204 
1205     if (t.os == Target::Windows) {
1206         add_underscores_to_posix_calls_on_windows(modules[0].get());
1207     }
1208 
1209     return std::move(modules[0]);
1210 }
1211 
1212 #ifdef WITH_NVPTX
get_initial_module_for_ptx_device(Target target,llvm::LLVMContext * c)1213 std::unique_ptr<llvm::Module> get_initial_module_for_ptx_device(Target target, llvm::LLVMContext *c) {
1214     std::vector<std::unique_ptr<llvm::Module>> modules;
1215     modules.push_back(get_initmod_ptx_dev_ll(c));
1216 
1217     std::unique_ptr<llvm::Module> module;
1218 
1219     // This table is based on the guidance at:
1220     // http://docs.nvidia.com/cuda/libdevice-users-guide/basic-usage.html#linking-with-libdevice
1221     if (target.has_feature(Target::CUDACapability35)) {
1222         module = get_initmod_ptx_compute_35_ll(c);
1223     } else if (target.features_any_of({Target::CUDACapability32,
1224                                        Target::CUDACapability50})) {
1225         // For some reason sm_32 and sm_50 use libdevice 20
1226         module = get_initmod_ptx_compute_20_ll(c);
1227     } else if (target.has_feature(Target::CUDACapability30)) {
1228         module = get_initmod_ptx_compute_30_ll(c);
1229     } else {
1230         module = get_initmod_ptx_compute_20_ll(c);
1231     }
1232     modules.push_back(std::move(module));
1233 
1234     link_modules(modules, target);
1235 
1236     // For now, the PTX backend does not handle calling functions. So mark all functions
1237     // AvailableExternally to ensure they are inlined or deleted.
1238     for (llvm::Module::iterator iter = modules[0]->begin(); iter != modules[0]->end(); iter++) {
1239         llvm::Function &f = *iter;
1240 
1241         // This is intended to set all definitions (not extern declarations)
1242         // to "available externally" which should guarantee they do not exist
1243         // after the resulting module is finalized to code. That is they must
1244         // be inlined to be used.
1245         //
1246         // However libdevice has a few routines that are marked
1247         // "noinline" which must either be changed to alow inlining or
1248         // preserved in generated code. This preserves the intent of
1249         // keeping these routines out-of-line and hence called by
1250         // not marking them AvailableExternally.
1251 
1252         if (!f.isDeclaration() && !f.hasFnAttribute(llvm::Attribute::NoInline)) {
1253             f.setLinkage(llvm::GlobalValue::AvailableExternallyLinkage);
1254         }
1255     }
1256 
1257     llvm::Triple triple("nvptx64--");
1258     modules[0]->setTargetTriple(triple.str());
1259 
1260     llvm::DataLayout dl("e-i64:64-v16:16-v32:32-n16:32:64");
1261     modules[0]->setDataLayout(dl);
1262 
1263     return std::move(modules[0]);
1264 }
1265 #endif
1266 
add_bitcode_to_module(llvm::LLVMContext * context,llvm::Module & module,const std::vector<uint8_t> & bitcode,const std::string & name)1267 void add_bitcode_to_module(llvm::LLVMContext *context, llvm::Module &module,
1268                            const std::vector<uint8_t> &bitcode, const std::string &name) {
1269     llvm::StringRef sb = llvm::StringRef((const char *)&bitcode[0], bitcode.size());
1270     std::unique_ptr<llvm::Module> add_in = parse_bitcode_file(sb, context, name.c_str());
1271 
1272     bool failed = llvm::Linker::linkModules(module, std::move(add_in));
1273     if (failed) {
1274         internal_error << "Failure linking in additional module: " << name << "\n";
1275     }
1276 }
1277 
1278 }  // namespace Internal
1279 }  // namespace Halide
1280