1 #include <iostream>
2 #include <memory>
3 
4 #include "Closure.h"
5 #include "Elf.h"
6 #include "HexagonOffload.h"
7 #include "IRMutator.h"
8 #include "IROperator.h"
9 #include "InjectHostDevBufferCopies.h"
10 #include "LLVM_Headers.h"
11 #include "LLVM_Output.h"
12 #include "Param.h"
13 #include "Substitute.h"
14 
15 namespace Halide {
16 namespace Internal {
17 
18 using std::string;
19 using std::vector;
20 
21 namespace Elf {
22 
23 // Most of these constants were duplicated from LLVM's object parser code.
24 enum {
25     EV_CURRENT = 1,
26 };
27 
28 enum {
29     EM_HEXAGON = 164,
30 };
31 
32 // http://llvm.org/docs/doxygen/html/Support_2ELF_8h_source.html#l00558
33 enum {
34     EF_HEXAGON_MACH_V2 = 0x1,
35     EF_HEXAGON_MACH_V3 = 0x2,
36     EF_HEXAGON_MACH_V4 = 0x3,
37     EF_HEXAGON_MACH_V5 = 0x4,
38     EF_HEXAGON_MACH_V55 = 0x5,
39     EF_HEXAGON_MACH_V60 = 0x60,  // Deprecated
40     EF_HEXAGON_MACH_V61 = 0x61,  // Deprecated?
41     EF_HEXAGON_MACH_V62 = 0x62,
42     EF_HEXAGON_MACH_V65 = 0x65,
43     EF_HEXAGON_MACH_V66 = 0x66,
44 };
45 
46 enum {
47     DT_HEXAGON_VER = 0x70000001,
48 };
49 
50 // https://llvm.org/svn/llvm-project/llvm/trunk/include/llvm/Support/ELFRelocs/Hexagon.def
51 enum {
52     R_HEX_NONE = 0,
53     R_HEX_B22_PCREL = 1,
54     R_HEX_B15_PCREL = 2,
55     R_HEX_B7_PCREL = 3,
56     R_HEX_LO16 = 4,
57     R_HEX_HI16 = 5,
58     R_HEX_32 = 6,
59     R_HEX_16 = 7,
60     R_HEX_8 = 8,
61     R_HEX_GPREL16_0 = 9,
62     R_HEX_GPREL16_1 = 10,
63     R_HEX_GPREL16_2 = 11,
64     R_HEX_GPREL16_3 = 12,
65     R_HEX_HL16 = 13,
66     R_HEX_B13_PCREL = 14,
67     R_HEX_B9_PCREL = 15,
68     R_HEX_B32_PCREL_X = 16,
69     R_HEX_32_6_X = 17,
70     R_HEX_B22_PCREL_X = 18,
71     R_HEX_B15_PCREL_X = 19,
72     R_HEX_B13_PCREL_X = 20,
73     R_HEX_B9_PCREL_X = 21,
74     R_HEX_B7_PCREL_X = 22,
75     R_HEX_16_X = 23,
76     R_HEX_12_X = 24,
77     R_HEX_11_X = 25,
78     R_HEX_10_X = 26,
79     R_HEX_9_X = 27,
80     R_HEX_8_X = 28,
81     R_HEX_7_X = 29,
82     R_HEX_6_X = 30,
83     R_HEX_32_PCREL = 31,
84     R_HEX_COPY = 32,
85     R_HEX_GLOB_DAT = 33,
86     R_HEX_JMP_SLOT = 34,
87     R_HEX_RELATIVE = 35,
88     R_HEX_PLT_B22_PCREL = 36,
89     R_HEX_GOTREL_LO16 = 37,
90     R_HEX_GOTREL_HI16 = 38,
91     R_HEX_GOTREL_32 = 39,
92     R_HEX_GOT_LO16 = 40,
93     R_HEX_GOT_HI16 = 41,
94     R_HEX_GOT_32 = 42,
95     R_HEX_GOT_16 = 43,
96     R_HEX_DTPMOD_32 = 44,
97     R_HEX_DTPREL_HI16 = 46,
98     R_HEX_DTPREL_32 = 47,
99     R_HEX_DTPREL_16 = 48,
100     R_HEX_GD_PLT_B22_PCREL = 49,
101     R_HEX_GD_GOT_LO16 = 50,
102     R_HEX_GD_GOT_HI16 = 51,
103     R_HEX_GD_GOT_32 = 52,
104     R_HEX_GD_GOT_16 = 53,
105     R_HEX_IE_LO16 = 54,
106     R_HEX_IE_HI16 = 55,
107     R_HEX_IE_32 = 56,
108     R_HEX_IE_GOT_LO16 = 57,
109     R_HEX_IE_GOT_HI16 = 58,
110     R_HEX_IE_GOT_32 = 59,
111     R_HEX_IE_GOT_16 = 60,
112     R_HEX_TPREL_LO16 = 61,
113     R_HEX_TPREL_HI16 = 62,
114     R_HEX_TPREL_32 = 63,
115     R_HEX_TPREL_16 = 64,
116     R_HEX_6_PCREL_X = 65,
117     R_HEX_GOTREL_32_6_X = 66,
118     R_HEX_GOTREL_16_X = 67,
119     R_HEX_GOTREL_11_X = 68,
120     R_HEX_GOT_32_6_X = 69,
121     R_HEX_GOT_16_X = 70,
122     R_HEX_GOT_11_X = 71,
123     R_HEX_DTPREL_32_6_X = 72,
124     R_HEX_DTPREL_16_X = 73,
125     R_HEX_DTPREL_11_X = 74,
126     R_HEX_GD_GOT_32_6_X = 75,
127     R_HEX_GD_GOT_16_X = 76,
128     R_HEX_GD_GOT_11_X = 77,
129     R_HEX_IE_32_6_X = 78,
130     R_HEX_IE_16_X = 79,
131     R_HEX_IE_GOT_32_6_X = 80,
132     R_HEX_IE_GOT_16_X = 81,
133     R_HEX_IE_GOT_11_X = 82,
134     R_HEX_TPREL_32_6_X = 83,
135     R_HEX_TPREL_16_X = 84,
136     R_HEX_TPREL_11_X = 85,
137     R_HEX_LD_PLT_B22_PCREL = 86,
138     R_HEX_LD_GOT_LO16 = 87,
139     R_HEX_LD_GOT_HI16 = 88,
140     R_HEX_LD_GOT_32 = 89,
141     R_HEX_LD_GOT_16 = 90,
142     R_HEX_LD_GOT_32_6_X = 91,
143     R_HEX_LD_GOT_16_X = 92,
144     R_HEX_LD_GOT_11_X = 93,
145 };
146 
147 // This logic comes from support from Qualcomm.
maybe_branch_inst(uint32_t reloc_type)148 bool maybe_branch_inst(uint32_t reloc_type) {
149     switch (reloc_type) {
150     case R_HEX_PLT_B22_PCREL:
151     case R_HEX_B22_PCREL:
152     case R_HEX_B22_PCREL_X:
153     case R_HEX_B15_PCREL:
154     case R_HEX_B15_PCREL_X:
155     case R_HEX_B13_PCREL:
156     case R_HEX_B13_PCREL_X:
157     case R_HEX_B9_PCREL:
158     case R_HEX_B9_PCREL_X:
159     case R_HEX_B7_PCREL:
160     case R_HEX_B7_PCREL_X:
161     case R_HEX_B32_PCREL_X:
162     case R_HEX_32_PCREL:
163     case R_HEX_6_PCREL_X:
164 
165     case R_HEX_LO16:
166     case R_HEX_HI16:
167     case R_HEX_16:
168     case R_HEX_8:
169     case R_HEX_32_6_X:
170     case R_HEX_16_X:
171     case R_HEX_12_X:
172     case R_HEX_11_X:
173     case R_HEX_10_X:
174     case R_HEX_9_X:
175     case R_HEX_8_X:
176     case R_HEX_7_X:
177     case R_HEX_6_X:
178     case R_HEX_32:
179         return true;
180     default:
181         return false;
182     }
183 }
184 
hex(uint32_t x)185 std::string hex(uint32_t x) {
186     char buffer[100];
187     snprintf(buffer, sizeof(buffer), "0x%08x", x);
188     return buffer;
189 }
190 
section_type_string(Section::Type type)191 std::string section_type_string(Section::Type type) {
192     switch (type) {
193     case Section::SHT_NULL:
194         return "SHT_NULL";
195     case Section::SHT_PROGBITS:
196         return "SHT_PROGBITS";
197     case Section::SHT_SYMTAB:
198         return "SHT_SYMTAB";
199     case Section::SHT_STRTAB:
200         return "SHT_STRTAB";
201     case Section::SHT_RELA:
202         return "SHT_RELA";
203     case Section::SHT_HASH:
204         return "SHT_HASH";
205     case Section::SHT_DYNAMIC:
206         return "SHT_DYNAMIC";
207     case Section::SHT_NOTE:
208         return "SHT_NOTE";
209     case Section::SHT_NOBITS:
210         return "SHT_NOBITS";
211     case Section::SHT_REL:
212         return "SHT_REL";
213     case Section::SHT_SHLIB:
214         return "SHT_SHLIB";
215     case Section::SHT_DYNSYM:
216         return "SHT_DYNSYM";
217     case Section::SHT_LOPROC:
218         return "SHT_LOPROC";
219     case Section::SHT_HIPROC:
220         return "SHT_HIPROC";
221     case Section::SHT_LOUSER:
222         return "SHT_LOUSER";
223     case Section::SHT_HIUSER:
224         return "SHT_HIUSER";
225     default:
226         return "UNKNOWN TYPE";
227     }
228 }
print_sections(const Object & obj)229 std::string print_sections(const Object &obj) {
230     std::ostringstream oss;
231     if (obj.sections_size() == 0) {
232         oss << "No sections in object\n";
233         return oss.str();
234     }
235     for (const Section &s : obj.sections()) {
236         oss << s.get_name() << ", Type = " << section_type_string(s.get_type()) << ", Size = " << hex(s.get_size()) << ", Alignment = " << s.get_alignment() << "\n";
237     }
238     return oss.str();
239 }
240 
do_reloc(char * addr,uint32_t mask,uintptr_t val,bool is_signed,bool verify)241 void do_reloc(char *addr, uint32_t mask, uintptr_t val, bool is_signed, bool verify) {
242     uint32_t inst = *((uint32_t *)addr);
243     debug(4) << "Relocation in instruction: " << hex(inst) << "\n";
244     debug(4) << "val: " << hex(val) << "\n";
245     debug(4) << "mask: " << hex(mask) << "\n";
246 
247     if (!mask) {
248         // The mask depends on the instruction. To implement
249         // relocations for new instructions see
250         // instruction_encodings.txt
251         // First print the bits so I can search for it in the
252         // instruction encodings.
253         debug(4) << "Instruction bits: ";
254         for (int i = 31; i >= 0; i--) {
255             debug(4) << (int)((inst >> i) & 1);
256         }
257         debug(4) << "\n";
258 
259         if ((inst & (3 << 14)) == 0) {
260             // Some instructions are actually pairs of 16-bit
261             // subinstructions. See section 3.7 in the
262             // programmer's reference.
263             debug(4) << "Duplex!\n";
264 
265             int iclass = ((inst >> 29) << 1) | ((inst >> 13) & 1);
266             debug(4) << "Class: " << hex(iclass) << "\n";
267             debug(4) << "Hi: ";
268             for (int i = 28; i >= 16; i--) {
269                 debug(4) << (int)((inst >> i) & 1);
270             }
271             debug(4) << "\n";
272             debug(4) << "Lo: ";
273             for (int i = 12; i >= 0; i--) {
274                 debug(4) << (int)((inst >> i) & 1);
275             }
276             debug(4) << "\n";
277 
278             // We only know how to do the ones where the high
279             // subinstruction is an immediate assignment. (marked
280             // as A in table 9-4 in the programmer's reference
281             // manual).
282             internal_assert(iclass >= 3 && iclass <= 7);
283 
284             // Pull out the subinstructions. They're the low 13
285             // bits of each half-word.
286             uint32_t hi = (inst >> 16) & ((1 << 13) - 1);
287             //uint32_t lo = inst & ((1 << 13) - 1);
288 
289             // We only understand the ones where hi starts with 010
290             internal_assert((hi >> 10) == 2);
291 
292             // Low 6 bits of val go in the following bits.
293             mask = 63 << 20;
294 
295         } else if ((inst >> 24) == 72) {
296             // Example instruction encoding that has this high byte (ignoring bits 1 and 2):
297             // 0100 1ii0  000i iiii  PPit tttt  iiii iiii
298             debug(4) << "Instruction-specific case A\n";
299             mask = 0x061f20ff;
300         } else if ((inst >> 24) == 73) {
301             // 0100 1ii1  000i iiii  PPii iiii  iiid dddd
302             debug(4) << "Instruction-specific case B\n";
303             mask = 0x061f3fe0;
304         } else if ((inst >> 24) == 120) {
305             // 0111 1000  ii-i iiii  PPii iiii  iiid dddd
306             debug(4) << "Instruction-specific case C\n";
307             mask = 0x00df3fe0;
308         } else if ((inst >> 16) == 27209) {
309             // 0110 1010  0100 1001  PP-i iiii  i--d dddd
310             mask = 0x00001f80;
311         } else if ((inst >> 25) == 72) {
312             // 1001 0ii0  101s ssss  PPii iiii  iiid dddd
313             // 1001 0ii1  000s ssss  PPii iiii  iiid dddd
314             mask = 0x06003fe0;
315         } else if ((inst >> 24) == 115 || (inst >> 24) == 124) {
316             // 0111 0011 -10sssss PP1iiiii iiiddddd
317             // 0111 0011 -11sssss PP1iiiii iiiddddd
318             // 0111 0011 0uusssss PP0iiiii iiiddddd
319             // 0111 0011 1uusssss PP0iiiii iiiddddd
320             // 0111 0011 -00sssss PP1iiiii iiiddddd
321             // 0111 0011 -01sssss PP1iiiii iiiddddd
322             // 0111 1100 0IIIIIII PPIiiiii iiiddddd
323             // 0111 0011 -11sssss PP1iiiii iiiddddd
324             mask = 0x00001fe0;
325 
326         } else if ((inst >> 24) == 126) {
327             // 0111 1110 0uu0 iiii PP0i iiii iiid dddd
328             // 0111 1110 0uu0 iiii PP1i iiii iiid dddd
329             // 0111 1110 0uu1 iiii PP0i iiii iiid dddd
330             // 0111 1110 0uu1 iiii PP1i iiii iiid dddd
331             mask = 0x000f1fe0;
332         } else if ((inst >> 24) == 65 || (inst >> 24) == 77) {
333             // 0100 0001 000s ssss PP0t tiii iiid dddd
334             // 0100 0001 001s ssss PP0t tiii iiid dddd
335             // 0100 0001 010s ssss PP0t tiii iiid dddd
336             // 0100 0001 011s ssss PP0t tiii iiid dddd
337             // 0100 0001 100s ssss PP0t tiii iiid dddd
338             // 0100 0001 110s ssss PP0t tiii iiid dddd
339             // TODO: Add instructions to comment for mask 77.
340             mask = 0x000007e0;
341         } else if ((inst >> 21) == 540) {
342             // 0100 0011 100s ssss PP0t tiii iiid dddd
343             mask = 0x000007e0;
344         } else if ((inst >> 28) == 11) {
345             // 1011 iiii iiis ssss PPii iiii iiid dddd
346             mask = 0x0fe03fe0;
347         } else {
348             internal_error << "Unhandled instruction type! Instruction = " << inst << "\n";
349         }
350     }
351 
352     uintptr_t old_val = val;
353     bool consumed_every_bit = false;
354     for (int i = 0; i < 32; i++) {
355         if (mask & (1 << i)) {
356             internal_assert((inst & (1 << i)) == 0);
357 
358             // Consume a bit of val
359             int next_bit = val & 1;
360             if (is_signed) {
361                 consumed_every_bit |= ((intptr_t)val) == -1;
362                 val = ((intptr_t)val) >> 1;
363             } else {
364                 val = ((uintptr_t)val) >> 1;
365             }
366             consumed_every_bit |= (val == 0);
367             inst |= (next_bit << i);
368         }
369     }
370 
371     internal_assert(!verify || consumed_every_bit)
372         << "Relocation overflow inst=" << hex(inst)
373         << "mask=" << hex(mask) << " val=" << hex(old_val) << "\n";
374 
375     debug(4) << "Relocated instruction: " << hex(inst) << "\n";
376 
377     *((uint32_t *)addr) = inst;
378 }
379 
do_relocation(uint32_t fixup_offset,char * fixup_addr,uint32_t type,const Symbol * sym,uint32_t sym_offset,int32_t addend,Elf::Section & got)380 void do_relocation(uint32_t fixup_offset, char *fixup_addr, uint32_t type,
381                    const Symbol *sym, uint32_t sym_offset, int32_t addend,
382                    Elf::Section &got) {
383     // Hexagon relocations are specified in section 11.5 in
384     // the Hexagon Application Binary Interface spec.
385 
386     // Now we can define the variables from Table 11-5.
387     uint32_t S = sym_offset;
388     uint32_t P = fixup_offset;
389     intptr_t A = addend;
390     uint32_t GP = 0;
391 
392     uint32_t G = got.contents_size();
393     for (const Relocation &r : got.relocations()) {
394         if (r.get_symbol() == sym) {
395             G = r.get_offset();
396             debug(2) << "Reusing G=" << G << " for symbol " << sym->get_name() << "\n";
397             break;
398         }
399     }
400 
401     // Define some constants from table 11-3
402     const uint32_t Word32 = 0xffffffff;
403     const uint32_t Word16 = 0xffff;
404     const uint32_t Word8 = 0xff;
405     const uint32_t Word32_B22 = 0x01ff3ffe;
406     const uint32_t Word32_B15 = 0x00df20fe;
407     const uint32_t Word32_B13 = 0x00202ffe;
408     const uint32_t Word32_B9 = 0x003000fe;
409     const uint32_t Word32_B7 = 0x00001f18;
410     const uint32_t Word32_GP = 0;  // The mask is instruction-specific
411     const uint32_t Word32_X26 = 0x0fff3fff;
412     const uint32_t Word32_U6 = 0;  // The mask is instruction-specific
413     const uint32_t Word32_R6 = 0x000007e0;
414     const uint32_t Word32_LO = 0x00c03fff;
415     const bool truncate = false, verify = true;
416     const bool _unsigned = false, _signed = true;
417 
418     bool needs_got_entry = false;
419 
420     switch (type) {
421     case R_HEX_B22_PCREL:
422         do_reloc(fixup_addr, Word32_B22, intptr_t(S + A - P) >> 2, _signed, verify);
423         break;
424     case R_HEX_B15_PCREL:
425         // Untested
426         do_reloc(fixup_addr, Word32_B15, intptr_t(S + A - P) >> 2, _signed, verify);
427         break;
428     case R_HEX_B7_PCREL:
429         do_reloc(fixup_addr, Word32_B7, intptr_t(S + A - P) >> 2, _signed, verify);
430         break;
431     case R_HEX_LO16:
432         internal_error << "Not pic code " << type << "\n";
433         do_reloc(fixup_addr, Word32_LO, uintptr_t(S + A), _unsigned, truncate);
434         break;
435     case R_HEX_HI16:
436         internal_error << "Not pic code " << type << "\n";
437         do_reloc(fixup_addr, Word32_LO, uintptr_t(S + A) >> 16, _unsigned, truncate);
438         break;
439     case R_HEX_32:
440         internal_error << "Not pic code " << type << "\n";
441         do_reloc(fixup_addr, Word32, intptr_t(S + A), _unsigned, truncate);
442         break;
443     case R_HEX_16:
444         internal_error << "Not pic code " << type << "\n";
445         do_reloc(fixup_addr, Word16, uintptr_t(S + A), _unsigned, truncate);
446         break;
447     case R_HEX_8:
448         internal_error << "Not pic code " << type << "\n";
449         do_reloc(fixup_addr, Word8, uintptr_t(S + A), _unsigned, truncate);
450         break;
451     case R_HEX_GPREL16_0:
452         internal_error << "Not pic code " << type << "\n";
453         do_reloc(fixup_addr, Word32_GP, uintptr_t(S + A - GP), _unsigned, verify);
454         break;
455     case R_HEX_GPREL16_1:
456         internal_error << "Not pic code " << type << "\n";
457         do_reloc(fixup_addr, Word32_GP, uintptr_t(S + A - GP) >> 1, _unsigned, verify);
458         break;
459     case R_HEX_GPREL16_2:
460         internal_error << "Not pic code " << type << "\n";
461         do_reloc(fixup_addr, Word32_GP, uintptr_t(S + A - GP) >> 2, _unsigned, verify);
462         break;
463     case R_HEX_GPREL16_3:
464         internal_error << "Not pic code " << type << "\n";
465         do_reloc(fixup_addr, Word32_GP, uintptr_t(S + A - GP) >> 3, _unsigned, verify);
466         break;
467     case R_HEX_HL16:
468         internal_error << "Not pic code " << type << "\n";
469         do_reloc(fixup_addr, Word32_LO, uintptr_t(S + A) >> 16, _unsigned, truncate);
470         do_reloc(fixup_addr + 4, Word32_LO, uintptr_t(S + A), _unsigned, truncate);
471         break;
472     case R_HEX_B13_PCREL:
473         do_reloc(fixup_addr, Word32_B13, intptr_t(S + A - P) >> 2, _signed, verify);
474         break;
475     case R_HEX_B9_PCREL:
476         do_reloc(fixup_addr, Word32_B9, intptr_t(S + A - P) >> 2, _signed, verify);
477         break;
478     case R_HEX_B32_PCREL_X:
479         do_reloc(fixup_addr, Word32_X26, intptr_t(S + A - P) >> 6, _signed, truncate);
480         break;
481     case R_HEX_32_6_X:
482         internal_error << "Not pic code " << type << "\n";
483         do_reloc(fixup_addr, Word32_X26, uintptr_t(S + A) >> 6, _unsigned, verify);
484         break;
485     case R_HEX_B22_PCREL_X:
486         do_reloc(fixup_addr, Word32_B22, intptr_t(S + A - P) & 0x3f, _signed, verify);
487         break;
488     case R_HEX_B15_PCREL_X:
489         do_reloc(fixup_addr, Word32_B15, intptr_t(S + A - P) & 0x3f, _signed, verify);
490         break;
491     case R_HEX_B13_PCREL_X:
492         do_reloc(fixup_addr, Word32_B13, intptr_t(S + A - P) & 0x3f, _signed, verify);
493         break;
494     case R_HEX_B9_PCREL_X:
495         do_reloc(fixup_addr, Word32_B9, intptr_t(S + A - P) & 0x3f, _signed, verify);
496         break;
497     case R_HEX_B7_PCREL_X:
498         do_reloc(fixup_addr, Word32_B7, intptr_t(S + A - P) & 0x3f, _signed, verify);
499         break;
500     case R_HEX_16_X:
501         internal_error << "Not pic code " << type << "\n";
502         do_reloc(fixup_addr, Word32_U6, uintptr_t(S + A), _unsigned, truncate);
503         break;
504     case R_HEX_12_X:
505         internal_error << "Not pic code " << type << "\n";
506         do_reloc(fixup_addr, Word32_R6, uintptr_t(S + A), _unsigned, truncate);
507         break;
508     case R_HEX_11_X:
509     case R_HEX_10_X:
510     case R_HEX_9_X:
511     case R_HEX_8_X:
512     case R_HEX_7_X:
513     case R_HEX_6_X:
514         internal_error << "Not pic code " << type << "\n";
515         do_reloc(fixup_addr, Word32_U6, uintptr_t(S + A), _unsigned, truncate);
516         break;
517     case R_HEX_32_PCREL:
518         do_reloc(fixup_addr, Word32, intptr_t(S + A - P), _signed, verify);
519         break;
520     case R_HEX_6_PCREL_X:
521         do_reloc(fixup_addr, Word32_U6, uintptr_t(S + A - P), _unsigned, truncate);
522         break;
523     case R_HEX_GOT_32_6_X:
524         do_reloc(fixup_addr, Word32_X26, intptr_t(G) >> 6, _signed, truncate);
525         needs_got_entry = true;
526         break;
527     case R_HEX_GOT_16_X:
528         do_reloc(fixup_addr, Word32_U6, intptr_t(G), _signed, truncate);
529         needs_got_entry = true;
530         break;
531     case R_HEX_GOT_11_X:
532         do_reloc(fixup_addr, Word32_U6, uintptr_t(G), _unsigned, truncate);
533         needs_got_entry = true;
534         break;
535 
536     default:
537         internal_error << "Unhandled relocation type " << type << "\n";
538     }
539 
540     if (needs_got_entry && G == got.contents_size()) {
541         debug(2) << "Adding GOT entry " << G << " for symbol " << sym->get_name() << "\n";
542         got.append_contents((uint32_t)0);
543         got.add_relocation(Relocation(R_HEX_GLOB_DAT, G, 0, sym));
544     }
545 }
546 
547 class HexagonLinker : public Linker {
548 public:
549     uint32_t flags;
550 
HexagonLinker(const Target & target)551     HexagonLinker(const Target &target) {
552         if (target.has_feature(Target::HVX_v66)) {
553             flags = Elf::EF_HEXAGON_MACH_V66;
554         } else if (target.has_feature(Target::HVX_v65)) {
555             flags = Elf::EF_HEXAGON_MACH_V65;
556         } else {
557             flags = Elf::EF_HEXAGON_MACH_V62;
558         }
559     }
560 
get_machine()561     uint16_t get_machine() override {
562         return EM_HEXAGON;
563     }
get_flags()564     uint32_t get_flags() override {
565         return flags;
566     }
get_version()567     uint32_t get_version() override {
568         return EV_CURRENT;
569     }
append_dynamic(Section & dynamic)570     void append_dynamic(Section &dynamic) override {
571         dynamic.append_contents((uint32_t)DT_HEXAGON_VER);
572         dynamic.append_contents((uint32_t)0x3);
573     }
574 
get_got_entry(Section & got,const Symbol & sym)575     uint64_t get_got_entry(Section &got, const Symbol &sym) override {
576         // Check if we already made a got entry for this symbol.
577         for (const Relocation &r : got.relocations()) {
578             if (r.get_symbol() == &sym && r.get_type() == R_HEX_GLOB_DAT) {
579                 internal_assert(r.get_addend() == 0);
580                 return r.get_offset();
581             }
582         }
583 
584         uint64_t got_offset = got.contents_size();
585         got.append_contents((uint32_t)0);
586         got.add_relocation(Elf::Relocation(R_HEX_GLOB_DAT, got_offset, 0, &sym));
587         return got_offset;
588     }
589 
needs_plt_entry(const Relocation & r)590     bool needs_plt_entry(const Relocation &r) override {
591         return maybe_branch_inst(r.get_type());
592     }
593 
add_plt_entry(const Symbol & sym,Section & plt,Section & got,const Symbol & got_sym)594     Symbol add_plt_entry(const Symbol &sym, Section &plt, Section &got, const Symbol &got_sym) override {
595         if (got.contents_empty()) {
596             // The PLT hasn't been started, initialize it now.
597             plt.set_alignment(16);
598 
599             std::vector<char> padding(64, (char)0);
600             // TODO: Make a .plt0 entry that supports lazy binding.
601             plt.set_contents(padding.begin(), padding.end());
602         }
603 
604         static const uint8_t hexagon_plt1[] = {
605             0x00, 0x40, 0x00, 0x00,  // { immext (#0) (Relocation:R_HEX_B32_PCREL_X)
606             0x0e, 0xc0, 0x49, 0x6a,  //   r14 = add (pc, ##GOTn@PCREL) }  (Relocation:R_HEX_6_PCREL_X)
607             0x1c, 0xc0, 0x8e, 0x91,  //   r28 = memw (r14)
608             0x00, 0xc0, 0x9c, 0x52,  //   jumpr r28
609         };
610 
611         debug(2) << "Adding PLT entry for symbol " << sym.get_name() << "\n";
612 
613         // Add a GOT entry for this symbol.
614         uint64_t got_offset = got.contents_size();
615         got.append_contents((uint32_t)0);
616         got.add_relocation(Elf::Relocation(R_HEX_JMP_SLOT, got_offset, 0, &sym));
617 
618         // Add the PLT code.
619         uint32_t plt_offset = plt.get_size();
620         plt.append_contents(hexagon_plt1, hexagon_plt1 + sizeof(hexagon_plt1));
621 
622         plt.add_relocation(Relocation(R_HEX_B32_PCREL_X, plt_offset + 0, got_offset, &got_sym));
623         plt.add_relocation(Relocation(R_HEX_6_PCREL_X, plt_offset + 4, got_offset + 4, &got_sym));
624 
625         // Make a symbol for the PLT entry.
626         Symbol plt_sym("plt_" + sym.get_name());
627         plt_sym
628             .set_type(Symbol::STT_FUNC)
629             .set_binding(Symbol::STB_LOCAL)
630             .define(&plt, plt_offset, sizeof(hexagon_plt1));
631 
632         return plt_sym;
633     }
634 
relocate(uint64_t fixup_offset,char * fixup_addr,uint64_t type,const Elf::Symbol * sym,uint64_t sym_offset,int64_t addend,Elf::Section & got)635     Relocation relocate(uint64_t fixup_offset, char *fixup_addr, uint64_t type,
636                         const Elf::Symbol *sym, uint64_t sym_offset, int64_t addend,
637                         Elf::Section &got) override {
638         if (type == R_HEX_32) {
639             // Don't do this relocation, generate a new R_HEX_RELATIVE relocation instead.
640             return Relocation(R_HEX_RELATIVE, fixup_offset, sym_offset + addend, nullptr);
641         }
642         do_relocation(fixup_offset, fixup_addr, type, sym, sym_offset, addend, got);
643         return Relocation();
644     }
645 };
646 
647 }  // namespace Elf
648 
649 namespace {
650 
651 const std::string runtime_module_name = "halide_shared_runtime";
652 const std::string pipeline_module_name = "halide_hexagon_code";
653 
654 // Replace the parameter objects of loads/stores with a new parameter
655 // object.
656 class ReplaceParams : public IRMutator {
657     const std::map<std::string, Parameter> &replacements;
658 
659     using IRMutator::visit;
660 
visit(const Load * op)661     Expr visit(const Load *op) override {
662         auto i = replacements.find(op->name);
663         if (i != replacements.end()) {
664             return Load::make(op->type, op->name, mutate(op->index), op->image,
665                               i->second, mutate(op->predicate), op->alignment);
666         } else {
667             return IRMutator::visit(op);
668         }
669     }
670 
visit(const Store * op)671     Stmt visit(const Store *op) override {
672         auto i = replacements.find(op->name);
673         if (i != replacements.end()) {
674             return Store::make(op->name, mutate(op->value), mutate(op->index),
675                                i->second, mutate(op->predicate), op->alignment);
676         } else {
677             return IRMutator::visit(op);
678         }
679     }
680 
681 public:
ReplaceParams(const std::map<std::string,Parameter> & replacements)682     ReplaceParams(const std::map<std::string, Parameter> &replacements)
683         : replacements(replacements) {
684     }
685 };
686 
replace_params(const Stmt & s,const std::map<std::string,Parameter> & replacements)687 Stmt replace_params(const Stmt &s, const std::map<std::string, Parameter> &replacements) {
688     return ReplaceParams(replacements).mutate(s);
689 }
690 
691 class InjectHexagonRpc : public IRMutator {
692     std::map<std::string, Expr> state_bufs;
693 
694     Module &device_code;
695 
state_var(const std::string & name,Type type)696     Expr state_var(const std::string &name, Type type) {
697         return Let::make(name, state_var_ptr(name, type),
698                          Load::make(type_of<void *>(), name, 0,
699                                     Buffer<>(), Parameter(), const_true(), ModulusRemainder()));
700     }
701 
state_var_ptr(const std::string & name,Type type)702     Expr state_var_ptr(const std::string &name, Type type) {
703         Expr &buf = state_bufs[name];
704         if (!buf.defined()) {
705             auto storage = Buffer<void *>::make_scalar(name + "_buf");
706             storage() = nullptr;
707             buf = Variable::make(type_of<halide_buffer_t *>(), storage.name() + ".buffer", storage);
708         }
709         return Call::make(Handle(), Call::buffer_get_host, {buf}, Call::Extern);
710     }
711 
module_state()712     Expr module_state() {
713         return state_var("hexagon_module_state", type_of<void *>());
714     }
715 
module_state_ptr()716     Expr module_state_ptr() {
717         return state_var_ptr("hexagon_module_state", type_of<void *>());
718     }
719 
720     // Create a Buffer containing the given buffer/size, and return an
721     // expression for a pointer to the first element.
buffer_ptr(const uint8_t * buffer,size_t size,const char * name)722     Expr buffer_ptr(const uint8_t *buffer, size_t size, const char *name) {
723         Buffer<uint8_t> code((int)size, name);
724         memcpy(code.data(), buffer, (int)size);
725         Expr buf = Variable::make(type_of<halide_buffer_t *>(), string(name) + ".buffer", code);
726         return Call::make(Handle(), Call::buffer_get_host, {buf}, Call::Extern);
727     }
728 
729     using IRMutator::visit;
730 
visit(const For * loop)731     Stmt visit(const For *loop) override {
732         if (loop->device_api != DeviceAPI::Hexagon) {
733             return IRMutator::visit(loop);
734         }
735 
736         // Unrolling or loop partitioning might generate multiple
737         // loops with the same name, so we need to make them unique.
738         // There's a bit of a hack here: the offload_rpc. prefix is
739         // significant, it tells the Hexagon code generator to expect
740         // the arguments to be unpacked by the Hexagon remote-side RPC
741         // call, which doesn't work with standard buffers.
742         std::string hex_name = unique_name("offload_rpc." + loop->name);
743 
744         // After moving this to Hexagon, it doesn't need to be marked
745         // Hexagon anymore.
746         Stmt body;
747         if (is_one(loop->extent)) {
748             body = LetStmt::make(loop->name, loop->min, loop->body);
749         } else {
750             body = For::make(loop->name, loop->min, loop->extent, loop->for_type,
751                              DeviceAPI::None, loop->body);
752         }
753 
754         // Build a closure for the device code.
755         // TODO: Should this move the body of the loop to Hexagon,
756         // or the loop itself? Currently, this moves the loop itself.
757         Closure c(body);
758 
759         // A buffer parameter potentially generates 3 scalar parameters (min,
760         // extent, stride) per dimension. Pipelines with many buffers may
761         // generate extreme numbers of scalar parameters, which can cause
762         // problems for LLVM. This logic moves scalar parameters of the type
763         // matching the type of these scalars to a single buffer.
764         // TODO(dsharlet): Maybe this is Int(64) in some cases?
765         Type scalars_buffer_type = Int(32);
766         std::string scalars_buffer_name = "scalar_indices";
767         std::vector<Stmt> scalars_buffer_init;
768         for (auto i = c.vars.begin(); i != c.vars.end();) {
769             if (i->second == scalars_buffer_type) {
770                 int index = scalars_buffer_init.size();
771                 scalars_buffer_init.push_back(Store::make(scalars_buffer_name, Variable::make(scalars_buffer_type, i->first),
772                                                           index, Parameter(), const_true(), ModulusRemainder()));
773                 Expr replacement = Load::make(scalars_buffer_type, scalars_buffer_name, index, Buffer<>(),
774                                               Parameter(), const_true(), ModulusRemainder());
775                 body = LetStmt::make(i->first, replacement, body);
776 
777                 i = c.vars.erase(i);
778             } else {
779                 ++i;
780             }
781         }
782         if (!scalars_buffer_init.empty()) {
783             // If we put some scalars in the scalars buffer, add it to the closure.
784             Closure::Buffer scalars_buffer;
785             scalars_buffer.type = scalars_buffer_type;
786             scalars_buffer.dimensions = 1;
787             scalars_buffer.read = true;
788             scalars_buffer.write = false;
789             c.buffers[scalars_buffer_name] = scalars_buffer;
790         }
791         int scalars_buffer_extent = scalars_buffer_init.size();
792 
793         // Make an argument list, and generate a function in the
794         // device_code module. The hexagon runtime code expects
795         // the arguments to appear in the order of (input buffers,
796         // output buffers, input scalars).  Scalars must be last
797         // for the scalar arguments to shadow the symbols of the
798         // buffer that get generated by CodeGen_LLVM.
799         std::vector<LoweredArgument> input_buffers, output_buffers;
800         std::map<std::string, Parameter> replacement_params;
801         for (const auto &i : c.buffers) {
802             if (i.second.write) {
803                 Argument::Kind kind = Argument::OutputBuffer;
804                 output_buffers.emplace_back(i.first, kind, i.second.type, i.second.dimensions, ArgumentEstimates{});
805             } else {
806                 Argument::Kind kind = Argument::InputBuffer;
807                 input_buffers.emplace_back(i.first, kind, i.second.type, i.second.dimensions, ArgumentEstimates{});
808             }
809 
810             // Build a parameter to replace.
811             Parameter p(i.second.type, true, i.second.dimensions);
812             // Assert that buffers are aligned to one HVX vector.
813             const int alignment = 128;
814             p.set_host_alignment(alignment);
815             // The other parameter constraints are already
816             // accounted for by the closure grabbing those
817             // arguments, so we only need to provide the host
818             // alignment.
819             replacement_params[i.first] = p;
820 
821             // Add an assert to the body that validates the alignment of the
822             // buffer. These buffers are either allocated by FastRPC or
823             // halide_hexagon_device_interface buffers, either should be aligned
824             // to 128 bytes.
825             if (!device_code.target().has_feature(Target::NoAsserts)) {
826                 Expr host_ptr = reinterpret<uint64_t>(Variable::make(Handle(), i.first));
827                 Expr error = Call::make(Int(32), "halide_error_unaligned_host_ptr",
828                                         {i.first, alignment}, Call::Extern);
829                 body = Block::make(AssertStmt::make(host_ptr % alignment == 0, error), body);
830             }
831 
832             // Unpack buffer parameters into the scope. They come in as host/dev struct pairs.
833             Expr buf = Variable::make(Handle(), i.first + ".buffer");
834             Expr host_ptr = Call::make(Handle(), "_halide_hexagon_buffer_get_host", {buf}, Call::Extern);
835             Expr device_ptr = Call::make(Handle(), "_halide_hexagon_buffer_get_device", {buf}, Call::Extern);
836             body = LetStmt::make(i.first + ".device", device_ptr, body);
837             body = LetStmt::make(i.first, host_ptr, body);
838         }
839         body = replace_params(body, replacement_params);
840 
841         std::vector<LoweredArgument> args;
842         args.insert(args.end(), input_buffers.begin(), input_buffers.end());
843         args.insert(args.end(), output_buffers.begin(), output_buffers.end());
844         for (const auto &i : c.vars) {
845             LoweredArgument arg(i.first, Argument::InputScalar, i.second, 0, ArgumentEstimates{});
846             args.push_back(arg);
847         }
848         device_code.append(LoweredFunc(hex_name, args, body, LinkageType::ExternalPlusMetadata));
849 
850         // Generate a call to hexagon_device_run.
851         std::vector<Expr> arg_sizes;
852         std::vector<Expr> arg_ptrs;
853         std::vector<Expr> arg_flags;
854 
855         for (const auto &i : c.buffers) {
856             // Buffers are passed to the hexagon host runtime as just device
857             // handles (uint64) and host (uint8*) fields. They correspond
858             // to the 'hexagon_device_pointer' struct declared elsewhere;
859             // we don't use that struct here because it's simple enough that
860             // just using `make_struct`() for it is simpler.
861             if (i.first != scalars_buffer_name) {
862                 // If this isn't the scalars buffer, assume it has a '.buffer'
863                 // description in the IR.
864                 Expr buf = Variable::make(type_of<halide_buffer_t *>(), i.first + ".buffer");
865                 Expr device = Call::make(UInt(64), Call::buffer_get_device, {buf}, Call::Extern);
866                 Expr host = Call::make(Handle(), Call::buffer_get_host, {buf}, Call::Extern);
867                 Expr pseudo_buffer = Call::make(Handle(), Call::make_struct, {device, host}, Call::Intrinsic);
868                 arg_ptrs.push_back(pseudo_buffer);
869                 arg_sizes.emplace_back((uint64_t)(pseudo_buffer.type().bytes()));
870             } else {
871                 // If this is the scalars buffer, it doesn't have a .buffer
872                 // field. Rather than make one, It's easier to just skip the
873                 // buffer_get_host call and reference the allocation directly.
874                 // TODO: This is a bit of an ugly hack, it would be nice to find
875                 // a better way to identify buffers without a '.buffer' description.
876                 Expr host = Variable::make(Handle(), i.first);
877                 Expr pseudo_buffer = Call::make(Handle(), Call::make_struct, {make_zero(UInt(64)), host}, Call::Intrinsic);
878                 arg_ptrs.push_back(pseudo_buffer);
879                 arg_sizes.emplace_back((uint64_t)scalars_buffer_extent * scalars_buffer_type.bytes());
880             }
881 
882             // In the flags parameter, bit 0 set indicates the
883             // buffer is read, bit 1 set indicates the buffer is
884             // written. If neither are set, the argument is a scalar.
885             int flags = 0;
886             if (i.second.read) flags |= 0x1;
887             if (i.second.write) flags |= 0x2;
888             arg_flags.emplace_back(flags);
889         }
890         for (const auto &i : c.vars) {
891             Expr arg = Variable::make(i.second, i.first);
892             Expr arg_ptr = Call::make(type_of<void *>(), Call::make_struct, {arg}, Call::Intrinsic);
893             arg_sizes.emplace_back((uint64_t)i.second.bytes());
894             arg_ptrs.push_back(arg_ptr);
895             arg_flags.emplace_back(0x0);
896         }
897 
898         // The argument list is terminated with an argument of size 0.
899         arg_sizes.emplace_back((uint64_t)0);
900 
901         std::string pipeline_name = hex_name + "_argv";
902         std::vector<Expr> params;
903         params.push_back(module_state());
904         params.emplace_back(pipeline_name);
905         params.push_back(state_var_ptr(hex_name, type_of<int>()));
906         params.push_back(Call::make(type_of<uint64_t *>(), Call::make_struct, arg_sizes, Call::Intrinsic));
907         params.push_back(Call::make(type_of<void **>(), Call::make_struct, arg_ptrs, Call::Intrinsic));
908         params.push_back(Call::make(type_of<int *>(), Call::make_struct, arg_flags, Call::Intrinsic));
909 
910         Stmt offload_call = call_extern_and_assert("halide_hexagon_run", params);
911         if (!scalars_buffer_init.empty()) {
912             offload_call = Block::make(Block::make(scalars_buffer_init), offload_call);
913         }
914         offload_call = Allocate::make(scalars_buffer_name, scalars_buffer_type, MemoryType::Auto,
915                                       {Expr(scalars_buffer_extent)}, const_true(), offload_call);
916         return offload_call;
917     }
918 
919 public:
InjectHexagonRpc(Module & device_code)920     InjectHexagonRpc(Module &device_code)
921         : device_code(device_code) {
922     }
923 
inject(Stmt s)924     Stmt inject(Stmt s) {
925         s = mutate(s);
926 
927         if (!device_code.functions().empty()) {
928             // Wrap the statement in calls to halide_initialize_kernels.
929             Expr runtime_buf_var = Variable::make(type_of<struct halide_buffer_t *>(), runtime_module_name + ".buffer");
930             Expr runtime_size = Call::make(Int(32), Call::buffer_get_extent, {runtime_buf_var, 0}, Call::Extern);
931             Expr runtime_ptr = Call::make(Handle(), Call::buffer_get_host, {runtime_buf_var}, Call::Extern);
932 
933             Expr code_buf_var = Variable::make(type_of<struct halide_buffer_t *>(), pipeline_module_name + ".buffer");
934             Expr code_size = Call::make(Int(32), Call::buffer_get_extent, {code_buf_var, 0}, Call::Extern);
935             Expr code_ptr = Call::make(Handle(), Call::buffer_get_host, {code_buf_var}, Call::Extern);
936             Stmt init_kernels = call_extern_and_assert("halide_hexagon_initialize_kernels",
937                                                        {module_state_ptr(), code_ptr, cast<uint64_t>(code_size), runtime_ptr, cast<uint64_t>(runtime_size)});
938             s = Block::make(init_kernels, s);
939         }
940 
941         // TODO: This can probably go away due to general debug info at the submodule compile level.
942         debug(1) << "Hexagon device code module: " << device_code << "\n";
943 
944         return s;
945     }
946 };
947 
948 }  // namespace
949 
inject_hexagon_rpc(Stmt s,const Target & host_target,Module & containing_module)950 Stmt inject_hexagon_rpc(Stmt s, const Target &host_target,
951                         Module &containing_module) {
952     // Make a new target for the device module.
953     Target target(Target::NoOS, Target::Hexagon, 32);
954     // There are two ways of offloading, on device and on host.
955     // In the former we have true QuRT available, while on the
956     // latter we simulate the Hexagon side code with a barebones
957     // Shim layer, ie. NO QURT!
958     if (host_target.arch == Target::ARM) {
959         target.os = Target::QuRT;
960     }
961 
962     // These feature flags are propagated from the host target to the
963     // device module.
964     //
965     // TODO: We'd like Target::Debug to be in this list too, but trunk
966     // llvm currently disagrees with hexagon clang as to what
967     // constitutes valid debug info.
968     static const Target::Feature shared_features[] = {
969         Target::Profile,
970         Target::NoAsserts,
971         Target::HVX_64,
972         Target::HVX_128,
973         Target::HVX_v62,
974         Target::HVX_v65,
975         Target::HVX_v66,
976         Target::DisableLLVMLoopOpt,
977     };
978     for (Target::Feature i : shared_features) {
979         if (host_target.has_feature(i)) {
980             target = target.with_feature(i);
981         }
982     }
983 
984     Module shared_runtime(runtime_module_name, target);
985     Module hexagon_module(pipeline_module_name, target.with_feature(Target::NoRuntime));
986     InjectHexagonRpc injector(hexagon_module);
987     s = injector.inject(s);
988 
989     if (!hexagon_module.functions().empty()) {
990         containing_module.append(hexagon_module);
991         containing_module.append(shared_runtime);
992     }
993 
994     return s;
995 }
996 
compile_module_to_hexagon_shared_object(const Module & device_code)997 Buffer<uint8_t> compile_module_to_hexagon_shared_object(const Module &device_code) {
998     llvm::LLVMContext context;
999     std::unique_ptr<llvm::Module> llvm_module(compile_module_to_llvm_module(device_code, context));
1000 
1001     // Write intermediate bitcode to disk if requested.
1002     // TODO: We really need something better than this. This won't
1003     // work in non-trivial JIT or AOT programs.
1004     std::string bitcode_dump_path = get_env_variable("HL_HEXAGON_DUMP_BITCODE");
1005     if (!bitcode_dump_path.empty()) {
1006         auto fd_ostream = make_raw_fd_ostream(bitcode_dump_path);
1007         compile_llvm_module_to_llvm_bitcode(*llvm_module, *fd_ostream);
1008         debug(0) << "Wrote Hexagon device bitcode to " << bitcode_dump_path;
1009     }
1010 
1011     llvm::SmallVector<char, 4096> object;
1012     llvm::raw_svector_ostream object_stream(object);
1013     compile_llvm_module_to_object(*llvm_module, object_stream);
1014 
1015     int min_debug_level = device_code.name() == runtime_module_name ? 3 : 2;
1016     if (debug::debug_level() >= min_debug_level) {
1017         debug(0) << "Hexagon device code assembly: "
1018                  << "\n";
1019         llvm::SmallString<4096> assembly;
1020         llvm::raw_svector_ostream assembly_stream(assembly);
1021         compile_llvm_module_to_assembly(*llvm_module, assembly_stream);
1022         debug(0) << assembly.c_str() << "\n";
1023     }
1024 
1025     auto obj = Elf::Object::parse_object(object.data(), object.size());
1026     internal_assert(obj);
1027 
1028     // Generate just one .text section.
1029     obj->merge_text_sections();
1030 
1031     // Make .bss a real section.
1032     auto bss = obj->find_section(".bss");
1033     if (bss != obj->sections_end()) {
1034         bss->set_alignment(128);
1035         // TODO: We should set the type to SHT_NOBITS
1036         // This will cause a difference in MemSize and FileSize like so:
1037         //        FileSize = (MemSize - size_of_bss)
1038         // When the Hexagon loader is used on 8998 and later targets,
1039         // the difference is filled with zeroes thereby initializing the .bss
1040         // section.
1041         bss->set_type(Elf::Section::SHT_PROGBITS);
1042         std::fill(bss->contents_begin(), bss->contents_end(), 0);
1043     }
1044 
1045     auto dtors = obj->find_section(".dtors");
1046     if (dtors != obj->sections_end()) {
1047         dtors->append_contents((uint32_t)0);
1048     }
1049 
1050     // We call the constructors in ctors backwards starting from special
1051     // symbol __CTOR_END__ until we reach a 0 (NULL pointer value). So,
1052     // prepend the .ctors section with 0.
1053     auto ctors = obj->find_section(".ctors");
1054     if (ctors != obj->sections_end()) {
1055         ctors->prepend_contents((uint32_t)0);
1056     }
1057 
1058     debug(2) << print_sections(*obj);
1059 
1060     // Link into a shared object.
1061     std::string soname = "lib" + device_code.name() + ".so";
1062     Elf::HexagonLinker linker(device_code.target());
1063     std::vector<std::string> dependencies = {
1064         "libhalide_hexagon_remote_skel.so",
1065     };
1066     std::vector<char> shared_object = obj->write_shared_object(&linker, dependencies, soname);
1067 
1068     std::string signer = get_env_variable("HL_HEXAGON_CODE_SIGNER");
1069     if (!signer.empty()) {
1070         // If signer is specified, shell out to a tool/script that will
1071         // sign the Hexagon code in a specific way. The tool is expected
1072         // to be of the form
1073         //
1074         //     signer /path/to/unsigned.so /path/to/signed.so
1075         //
1076         // where unsigned and signed paths must not be the same file.
1077         // If the signed file already exists, it will be overwritten.
1078 
1079         TemporaryFile input("hvx_unsigned", ".so");
1080         TemporaryFile output("hvx_signed", ".so");
1081 
1082         debug(1) << "Signing Hexagon code: " << input.pathname() << " -> " << output.pathname() << "\n";
1083 
1084         write_entire_file(input.pathname(), shared_object);
1085 
1086         debug(1) << "Signing tool: (" << signer << ")\n";
1087         std::string cmd = signer + " " + input.pathname() + " " + output.pathname();
1088         int result = system(cmd.c_str());
1089         internal_assert(result == 0)
1090             << "HL_HEXAGON_CODE_SIGNER failed: result = " << result
1091             << " for cmd (" << cmd << ")";
1092 
1093         shared_object = read_entire_file(output.pathname());
1094     }
1095 
1096     Halide::Buffer<uint8_t> result_buf(shared_object.size(), device_code.name());
1097     memcpy(result_buf.data(), shared_object.data(), shared_object.size());
1098 
1099     return result_buf;
1100 }
1101 
1102 }  // namespace Internal
1103 }  // namespace Halide
1104