1 #include <iostream>
2 #include <memory>
3
4 #include "Closure.h"
5 #include "Elf.h"
6 #include "HexagonOffload.h"
7 #include "IRMutator.h"
8 #include "IROperator.h"
9 #include "InjectHostDevBufferCopies.h"
10 #include "LLVM_Headers.h"
11 #include "LLVM_Output.h"
12 #include "Param.h"
13 #include "Substitute.h"
14
15 namespace Halide {
16 namespace Internal {
17
18 using std::string;
19 using std::vector;
20
21 namespace Elf {
22
23 // Most of these constants were duplicated from LLVM's object parser code.
24 enum {
25 EV_CURRENT = 1,
26 };
27
28 enum {
29 EM_HEXAGON = 164,
30 };
31
32 // http://llvm.org/docs/doxygen/html/Support_2ELF_8h_source.html#l00558
33 enum {
34 EF_HEXAGON_MACH_V2 = 0x1,
35 EF_HEXAGON_MACH_V3 = 0x2,
36 EF_HEXAGON_MACH_V4 = 0x3,
37 EF_HEXAGON_MACH_V5 = 0x4,
38 EF_HEXAGON_MACH_V55 = 0x5,
39 EF_HEXAGON_MACH_V60 = 0x60, // Deprecated
40 EF_HEXAGON_MACH_V61 = 0x61, // Deprecated?
41 EF_HEXAGON_MACH_V62 = 0x62,
42 EF_HEXAGON_MACH_V65 = 0x65,
43 EF_HEXAGON_MACH_V66 = 0x66,
44 };
45
46 enum {
47 DT_HEXAGON_VER = 0x70000001,
48 };
49
50 // https://llvm.org/svn/llvm-project/llvm/trunk/include/llvm/Support/ELFRelocs/Hexagon.def
51 enum {
52 R_HEX_NONE = 0,
53 R_HEX_B22_PCREL = 1,
54 R_HEX_B15_PCREL = 2,
55 R_HEX_B7_PCREL = 3,
56 R_HEX_LO16 = 4,
57 R_HEX_HI16 = 5,
58 R_HEX_32 = 6,
59 R_HEX_16 = 7,
60 R_HEX_8 = 8,
61 R_HEX_GPREL16_0 = 9,
62 R_HEX_GPREL16_1 = 10,
63 R_HEX_GPREL16_2 = 11,
64 R_HEX_GPREL16_3 = 12,
65 R_HEX_HL16 = 13,
66 R_HEX_B13_PCREL = 14,
67 R_HEX_B9_PCREL = 15,
68 R_HEX_B32_PCREL_X = 16,
69 R_HEX_32_6_X = 17,
70 R_HEX_B22_PCREL_X = 18,
71 R_HEX_B15_PCREL_X = 19,
72 R_HEX_B13_PCREL_X = 20,
73 R_HEX_B9_PCREL_X = 21,
74 R_HEX_B7_PCREL_X = 22,
75 R_HEX_16_X = 23,
76 R_HEX_12_X = 24,
77 R_HEX_11_X = 25,
78 R_HEX_10_X = 26,
79 R_HEX_9_X = 27,
80 R_HEX_8_X = 28,
81 R_HEX_7_X = 29,
82 R_HEX_6_X = 30,
83 R_HEX_32_PCREL = 31,
84 R_HEX_COPY = 32,
85 R_HEX_GLOB_DAT = 33,
86 R_HEX_JMP_SLOT = 34,
87 R_HEX_RELATIVE = 35,
88 R_HEX_PLT_B22_PCREL = 36,
89 R_HEX_GOTREL_LO16 = 37,
90 R_HEX_GOTREL_HI16 = 38,
91 R_HEX_GOTREL_32 = 39,
92 R_HEX_GOT_LO16 = 40,
93 R_HEX_GOT_HI16 = 41,
94 R_HEX_GOT_32 = 42,
95 R_HEX_GOT_16 = 43,
96 R_HEX_DTPMOD_32 = 44,
97 R_HEX_DTPREL_HI16 = 46,
98 R_HEX_DTPREL_32 = 47,
99 R_HEX_DTPREL_16 = 48,
100 R_HEX_GD_PLT_B22_PCREL = 49,
101 R_HEX_GD_GOT_LO16 = 50,
102 R_HEX_GD_GOT_HI16 = 51,
103 R_HEX_GD_GOT_32 = 52,
104 R_HEX_GD_GOT_16 = 53,
105 R_HEX_IE_LO16 = 54,
106 R_HEX_IE_HI16 = 55,
107 R_HEX_IE_32 = 56,
108 R_HEX_IE_GOT_LO16 = 57,
109 R_HEX_IE_GOT_HI16 = 58,
110 R_HEX_IE_GOT_32 = 59,
111 R_HEX_IE_GOT_16 = 60,
112 R_HEX_TPREL_LO16 = 61,
113 R_HEX_TPREL_HI16 = 62,
114 R_HEX_TPREL_32 = 63,
115 R_HEX_TPREL_16 = 64,
116 R_HEX_6_PCREL_X = 65,
117 R_HEX_GOTREL_32_6_X = 66,
118 R_HEX_GOTREL_16_X = 67,
119 R_HEX_GOTREL_11_X = 68,
120 R_HEX_GOT_32_6_X = 69,
121 R_HEX_GOT_16_X = 70,
122 R_HEX_GOT_11_X = 71,
123 R_HEX_DTPREL_32_6_X = 72,
124 R_HEX_DTPREL_16_X = 73,
125 R_HEX_DTPREL_11_X = 74,
126 R_HEX_GD_GOT_32_6_X = 75,
127 R_HEX_GD_GOT_16_X = 76,
128 R_HEX_GD_GOT_11_X = 77,
129 R_HEX_IE_32_6_X = 78,
130 R_HEX_IE_16_X = 79,
131 R_HEX_IE_GOT_32_6_X = 80,
132 R_HEX_IE_GOT_16_X = 81,
133 R_HEX_IE_GOT_11_X = 82,
134 R_HEX_TPREL_32_6_X = 83,
135 R_HEX_TPREL_16_X = 84,
136 R_HEX_TPREL_11_X = 85,
137 R_HEX_LD_PLT_B22_PCREL = 86,
138 R_HEX_LD_GOT_LO16 = 87,
139 R_HEX_LD_GOT_HI16 = 88,
140 R_HEX_LD_GOT_32 = 89,
141 R_HEX_LD_GOT_16 = 90,
142 R_HEX_LD_GOT_32_6_X = 91,
143 R_HEX_LD_GOT_16_X = 92,
144 R_HEX_LD_GOT_11_X = 93,
145 };
146
147 // This logic comes from support from Qualcomm.
maybe_branch_inst(uint32_t reloc_type)148 bool maybe_branch_inst(uint32_t reloc_type) {
149 switch (reloc_type) {
150 case R_HEX_PLT_B22_PCREL:
151 case R_HEX_B22_PCREL:
152 case R_HEX_B22_PCREL_X:
153 case R_HEX_B15_PCREL:
154 case R_HEX_B15_PCREL_X:
155 case R_HEX_B13_PCREL:
156 case R_HEX_B13_PCREL_X:
157 case R_HEX_B9_PCREL:
158 case R_HEX_B9_PCREL_X:
159 case R_HEX_B7_PCREL:
160 case R_HEX_B7_PCREL_X:
161 case R_HEX_B32_PCREL_X:
162 case R_HEX_32_PCREL:
163 case R_HEX_6_PCREL_X:
164
165 case R_HEX_LO16:
166 case R_HEX_HI16:
167 case R_HEX_16:
168 case R_HEX_8:
169 case R_HEX_32_6_X:
170 case R_HEX_16_X:
171 case R_HEX_12_X:
172 case R_HEX_11_X:
173 case R_HEX_10_X:
174 case R_HEX_9_X:
175 case R_HEX_8_X:
176 case R_HEX_7_X:
177 case R_HEX_6_X:
178 case R_HEX_32:
179 return true;
180 default:
181 return false;
182 }
183 }
184
hex(uint32_t x)185 std::string hex(uint32_t x) {
186 char buffer[100];
187 snprintf(buffer, sizeof(buffer), "0x%08x", x);
188 return buffer;
189 }
190
section_type_string(Section::Type type)191 std::string section_type_string(Section::Type type) {
192 switch (type) {
193 case Section::SHT_NULL:
194 return "SHT_NULL";
195 case Section::SHT_PROGBITS:
196 return "SHT_PROGBITS";
197 case Section::SHT_SYMTAB:
198 return "SHT_SYMTAB";
199 case Section::SHT_STRTAB:
200 return "SHT_STRTAB";
201 case Section::SHT_RELA:
202 return "SHT_RELA";
203 case Section::SHT_HASH:
204 return "SHT_HASH";
205 case Section::SHT_DYNAMIC:
206 return "SHT_DYNAMIC";
207 case Section::SHT_NOTE:
208 return "SHT_NOTE";
209 case Section::SHT_NOBITS:
210 return "SHT_NOBITS";
211 case Section::SHT_REL:
212 return "SHT_REL";
213 case Section::SHT_SHLIB:
214 return "SHT_SHLIB";
215 case Section::SHT_DYNSYM:
216 return "SHT_DYNSYM";
217 case Section::SHT_LOPROC:
218 return "SHT_LOPROC";
219 case Section::SHT_HIPROC:
220 return "SHT_HIPROC";
221 case Section::SHT_LOUSER:
222 return "SHT_LOUSER";
223 case Section::SHT_HIUSER:
224 return "SHT_HIUSER";
225 default:
226 return "UNKNOWN TYPE";
227 }
228 }
print_sections(const Object & obj)229 std::string print_sections(const Object &obj) {
230 std::ostringstream oss;
231 if (obj.sections_size() == 0) {
232 oss << "No sections in object\n";
233 return oss.str();
234 }
235 for (const Section &s : obj.sections()) {
236 oss << s.get_name() << ", Type = " << section_type_string(s.get_type()) << ", Size = " << hex(s.get_size()) << ", Alignment = " << s.get_alignment() << "\n";
237 }
238 return oss.str();
239 }
240
do_reloc(char * addr,uint32_t mask,uintptr_t val,bool is_signed,bool verify)241 void do_reloc(char *addr, uint32_t mask, uintptr_t val, bool is_signed, bool verify) {
242 uint32_t inst = *((uint32_t *)addr);
243 debug(4) << "Relocation in instruction: " << hex(inst) << "\n";
244 debug(4) << "val: " << hex(val) << "\n";
245 debug(4) << "mask: " << hex(mask) << "\n";
246
247 if (!mask) {
248 // The mask depends on the instruction. To implement
249 // relocations for new instructions see
250 // instruction_encodings.txt
251 // First print the bits so I can search for it in the
252 // instruction encodings.
253 debug(4) << "Instruction bits: ";
254 for (int i = 31; i >= 0; i--) {
255 debug(4) << (int)((inst >> i) & 1);
256 }
257 debug(4) << "\n";
258
259 if ((inst & (3 << 14)) == 0) {
260 // Some instructions are actually pairs of 16-bit
261 // subinstructions. See section 3.7 in the
262 // programmer's reference.
263 debug(4) << "Duplex!\n";
264
265 int iclass = ((inst >> 29) << 1) | ((inst >> 13) & 1);
266 debug(4) << "Class: " << hex(iclass) << "\n";
267 debug(4) << "Hi: ";
268 for (int i = 28; i >= 16; i--) {
269 debug(4) << (int)((inst >> i) & 1);
270 }
271 debug(4) << "\n";
272 debug(4) << "Lo: ";
273 for (int i = 12; i >= 0; i--) {
274 debug(4) << (int)((inst >> i) & 1);
275 }
276 debug(4) << "\n";
277
278 // We only know how to do the ones where the high
279 // subinstruction is an immediate assignment. (marked
280 // as A in table 9-4 in the programmer's reference
281 // manual).
282 internal_assert(iclass >= 3 && iclass <= 7);
283
284 // Pull out the subinstructions. They're the low 13
285 // bits of each half-word.
286 uint32_t hi = (inst >> 16) & ((1 << 13) - 1);
287 //uint32_t lo = inst & ((1 << 13) - 1);
288
289 // We only understand the ones where hi starts with 010
290 internal_assert((hi >> 10) == 2);
291
292 // Low 6 bits of val go in the following bits.
293 mask = 63 << 20;
294
295 } else if ((inst >> 24) == 72) {
296 // Example instruction encoding that has this high byte (ignoring bits 1 and 2):
297 // 0100 1ii0 000i iiii PPit tttt iiii iiii
298 debug(4) << "Instruction-specific case A\n";
299 mask = 0x061f20ff;
300 } else if ((inst >> 24) == 73) {
301 // 0100 1ii1 000i iiii PPii iiii iiid dddd
302 debug(4) << "Instruction-specific case B\n";
303 mask = 0x061f3fe0;
304 } else if ((inst >> 24) == 120) {
305 // 0111 1000 ii-i iiii PPii iiii iiid dddd
306 debug(4) << "Instruction-specific case C\n";
307 mask = 0x00df3fe0;
308 } else if ((inst >> 16) == 27209) {
309 // 0110 1010 0100 1001 PP-i iiii i--d dddd
310 mask = 0x00001f80;
311 } else if ((inst >> 25) == 72) {
312 // 1001 0ii0 101s ssss PPii iiii iiid dddd
313 // 1001 0ii1 000s ssss PPii iiii iiid dddd
314 mask = 0x06003fe0;
315 } else if ((inst >> 24) == 115 || (inst >> 24) == 124) {
316 // 0111 0011 -10sssss PP1iiiii iiiddddd
317 // 0111 0011 -11sssss PP1iiiii iiiddddd
318 // 0111 0011 0uusssss PP0iiiii iiiddddd
319 // 0111 0011 1uusssss PP0iiiii iiiddddd
320 // 0111 0011 -00sssss PP1iiiii iiiddddd
321 // 0111 0011 -01sssss PP1iiiii iiiddddd
322 // 0111 1100 0IIIIIII PPIiiiii iiiddddd
323 // 0111 0011 -11sssss PP1iiiii iiiddddd
324 mask = 0x00001fe0;
325
326 } else if ((inst >> 24) == 126) {
327 // 0111 1110 0uu0 iiii PP0i iiii iiid dddd
328 // 0111 1110 0uu0 iiii PP1i iiii iiid dddd
329 // 0111 1110 0uu1 iiii PP0i iiii iiid dddd
330 // 0111 1110 0uu1 iiii PP1i iiii iiid dddd
331 mask = 0x000f1fe0;
332 } else if ((inst >> 24) == 65 || (inst >> 24) == 77) {
333 // 0100 0001 000s ssss PP0t tiii iiid dddd
334 // 0100 0001 001s ssss PP0t tiii iiid dddd
335 // 0100 0001 010s ssss PP0t tiii iiid dddd
336 // 0100 0001 011s ssss PP0t tiii iiid dddd
337 // 0100 0001 100s ssss PP0t tiii iiid dddd
338 // 0100 0001 110s ssss PP0t tiii iiid dddd
339 // TODO: Add instructions to comment for mask 77.
340 mask = 0x000007e0;
341 } else if ((inst >> 21) == 540) {
342 // 0100 0011 100s ssss PP0t tiii iiid dddd
343 mask = 0x000007e0;
344 } else if ((inst >> 28) == 11) {
345 // 1011 iiii iiis ssss PPii iiii iiid dddd
346 mask = 0x0fe03fe0;
347 } else {
348 internal_error << "Unhandled instruction type! Instruction = " << inst << "\n";
349 }
350 }
351
352 uintptr_t old_val = val;
353 bool consumed_every_bit = false;
354 for (int i = 0; i < 32; i++) {
355 if (mask & (1 << i)) {
356 internal_assert((inst & (1 << i)) == 0);
357
358 // Consume a bit of val
359 int next_bit = val & 1;
360 if (is_signed) {
361 consumed_every_bit |= ((intptr_t)val) == -1;
362 val = ((intptr_t)val) >> 1;
363 } else {
364 val = ((uintptr_t)val) >> 1;
365 }
366 consumed_every_bit |= (val == 0);
367 inst |= (next_bit << i);
368 }
369 }
370
371 internal_assert(!verify || consumed_every_bit)
372 << "Relocation overflow inst=" << hex(inst)
373 << "mask=" << hex(mask) << " val=" << hex(old_val) << "\n";
374
375 debug(4) << "Relocated instruction: " << hex(inst) << "\n";
376
377 *((uint32_t *)addr) = inst;
378 }
379
do_relocation(uint32_t fixup_offset,char * fixup_addr,uint32_t type,const Symbol * sym,uint32_t sym_offset,int32_t addend,Elf::Section & got)380 void do_relocation(uint32_t fixup_offset, char *fixup_addr, uint32_t type,
381 const Symbol *sym, uint32_t sym_offset, int32_t addend,
382 Elf::Section &got) {
383 // Hexagon relocations are specified in section 11.5 in
384 // the Hexagon Application Binary Interface spec.
385
386 // Now we can define the variables from Table 11-5.
387 uint32_t S = sym_offset;
388 uint32_t P = fixup_offset;
389 intptr_t A = addend;
390 uint32_t GP = 0;
391
392 uint32_t G = got.contents_size();
393 for (const Relocation &r : got.relocations()) {
394 if (r.get_symbol() == sym) {
395 G = r.get_offset();
396 debug(2) << "Reusing G=" << G << " for symbol " << sym->get_name() << "\n";
397 break;
398 }
399 }
400
401 // Define some constants from table 11-3
402 const uint32_t Word32 = 0xffffffff;
403 const uint32_t Word16 = 0xffff;
404 const uint32_t Word8 = 0xff;
405 const uint32_t Word32_B22 = 0x01ff3ffe;
406 const uint32_t Word32_B15 = 0x00df20fe;
407 const uint32_t Word32_B13 = 0x00202ffe;
408 const uint32_t Word32_B9 = 0x003000fe;
409 const uint32_t Word32_B7 = 0x00001f18;
410 const uint32_t Word32_GP = 0; // The mask is instruction-specific
411 const uint32_t Word32_X26 = 0x0fff3fff;
412 const uint32_t Word32_U6 = 0; // The mask is instruction-specific
413 const uint32_t Word32_R6 = 0x000007e0;
414 const uint32_t Word32_LO = 0x00c03fff;
415 const bool truncate = false, verify = true;
416 const bool _unsigned = false, _signed = true;
417
418 bool needs_got_entry = false;
419
420 switch (type) {
421 case R_HEX_B22_PCREL:
422 do_reloc(fixup_addr, Word32_B22, intptr_t(S + A - P) >> 2, _signed, verify);
423 break;
424 case R_HEX_B15_PCREL:
425 // Untested
426 do_reloc(fixup_addr, Word32_B15, intptr_t(S + A - P) >> 2, _signed, verify);
427 break;
428 case R_HEX_B7_PCREL:
429 do_reloc(fixup_addr, Word32_B7, intptr_t(S + A - P) >> 2, _signed, verify);
430 break;
431 case R_HEX_LO16:
432 internal_error << "Not pic code " << type << "\n";
433 do_reloc(fixup_addr, Word32_LO, uintptr_t(S + A), _unsigned, truncate);
434 break;
435 case R_HEX_HI16:
436 internal_error << "Not pic code " << type << "\n";
437 do_reloc(fixup_addr, Word32_LO, uintptr_t(S + A) >> 16, _unsigned, truncate);
438 break;
439 case R_HEX_32:
440 internal_error << "Not pic code " << type << "\n";
441 do_reloc(fixup_addr, Word32, intptr_t(S + A), _unsigned, truncate);
442 break;
443 case R_HEX_16:
444 internal_error << "Not pic code " << type << "\n";
445 do_reloc(fixup_addr, Word16, uintptr_t(S + A), _unsigned, truncate);
446 break;
447 case R_HEX_8:
448 internal_error << "Not pic code " << type << "\n";
449 do_reloc(fixup_addr, Word8, uintptr_t(S + A), _unsigned, truncate);
450 break;
451 case R_HEX_GPREL16_0:
452 internal_error << "Not pic code " << type << "\n";
453 do_reloc(fixup_addr, Word32_GP, uintptr_t(S + A - GP), _unsigned, verify);
454 break;
455 case R_HEX_GPREL16_1:
456 internal_error << "Not pic code " << type << "\n";
457 do_reloc(fixup_addr, Word32_GP, uintptr_t(S + A - GP) >> 1, _unsigned, verify);
458 break;
459 case R_HEX_GPREL16_2:
460 internal_error << "Not pic code " << type << "\n";
461 do_reloc(fixup_addr, Word32_GP, uintptr_t(S + A - GP) >> 2, _unsigned, verify);
462 break;
463 case R_HEX_GPREL16_3:
464 internal_error << "Not pic code " << type << "\n";
465 do_reloc(fixup_addr, Word32_GP, uintptr_t(S + A - GP) >> 3, _unsigned, verify);
466 break;
467 case R_HEX_HL16:
468 internal_error << "Not pic code " << type << "\n";
469 do_reloc(fixup_addr, Word32_LO, uintptr_t(S + A) >> 16, _unsigned, truncate);
470 do_reloc(fixup_addr + 4, Word32_LO, uintptr_t(S + A), _unsigned, truncate);
471 break;
472 case R_HEX_B13_PCREL:
473 do_reloc(fixup_addr, Word32_B13, intptr_t(S + A - P) >> 2, _signed, verify);
474 break;
475 case R_HEX_B9_PCREL:
476 do_reloc(fixup_addr, Word32_B9, intptr_t(S + A - P) >> 2, _signed, verify);
477 break;
478 case R_HEX_B32_PCREL_X:
479 do_reloc(fixup_addr, Word32_X26, intptr_t(S + A - P) >> 6, _signed, truncate);
480 break;
481 case R_HEX_32_6_X:
482 internal_error << "Not pic code " << type << "\n";
483 do_reloc(fixup_addr, Word32_X26, uintptr_t(S + A) >> 6, _unsigned, verify);
484 break;
485 case R_HEX_B22_PCREL_X:
486 do_reloc(fixup_addr, Word32_B22, intptr_t(S + A - P) & 0x3f, _signed, verify);
487 break;
488 case R_HEX_B15_PCREL_X:
489 do_reloc(fixup_addr, Word32_B15, intptr_t(S + A - P) & 0x3f, _signed, verify);
490 break;
491 case R_HEX_B13_PCREL_X:
492 do_reloc(fixup_addr, Word32_B13, intptr_t(S + A - P) & 0x3f, _signed, verify);
493 break;
494 case R_HEX_B9_PCREL_X:
495 do_reloc(fixup_addr, Word32_B9, intptr_t(S + A - P) & 0x3f, _signed, verify);
496 break;
497 case R_HEX_B7_PCREL_X:
498 do_reloc(fixup_addr, Word32_B7, intptr_t(S + A - P) & 0x3f, _signed, verify);
499 break;
500 case R_HEX_16_X:
501 internal_error << "Not pic code " << type << "\n";
502 do_reloc(fixup_addr, Word32_U6, uintptr_t(S + A), _unsigned, truncate);
503 break;
504 case R_HEX_12_X:
505 internal_error << "Not pic code " << type << "\n";
506 do_reloc(fixup_addr, Word32_R6, uintptr_t(S + A), _unsigned, truncate);
507 break;
508 case R_HEX_11_X:
509 case R_HEX_10_X:
510 case R_HEX_9_X:
511 case R_HEX_8_X:
512 case R_HEX_7_X:
513 case R_HEX_6_X:
514 internal_error << "Not pic code " << type << "\n";
515 do_reloc(fixup_addr, Word32_U6, uintptr_t(S + A), _unsigned, truncate);
516 break;
517 case R_HEX_32_PCREL:
518 do_reloc(fixup_addr, Word32, intptr_t(S + A - P), _signed, verify);
519 break;
520 case R_HEX_6_PCREL_X:
521 do_reloc(fixup_addr, Word32_U6, uintptr_t(S + A - P), _unsigned, truncate);
522 break;
523 case R_HEX_GOT_32_6_X:
524 do_reloc(fixup_addr, Word32_X26, intptr_t(G) >> 6, _signed, truncate);
525 needs_got_entry = true;
526 break;
527 case R_HEX_GOT_16_X:
528 do_reloc(fixup_addr, Word32_U6, intptr_t(G), _signed, truncate);
529 needs_got_entry = true;
530 break;
531 case R_HEX_GOT_11_X:
532 do_reloc(fixup_addr, Word32_U6, uintptr_t(G), _unsigned, truncate);
533 needs_got_entry = true;
534 break;
535
536 default:
537 internal_error << "Unhandled relocation type " << type << "\n";
538 }
539
540 if (needs_got_entry && G == got.contents_size()) {
541 debug(2) << "Adding GOT entry " << G << " for symbol " << sym->get_name() << "\n";
542 got.append_contents((uint32_t)0);
543 got.add_relocation(Relocation(R_HEX_GLOB_DAT, G, 0, sym));
544 }
545 }
546
547 class HexagonLinker : public Linker {
548 public:
549 uint32_t flags;
550
HexagonLinker(const Target & target)551 HexagonLinker(const Target &target) {
552 if (target.has_feature(Target::HVX_v66)) {
553 flags = Elf::EF_HEXAGON_MACH_V66;
554 } else if (target.has_feature(Target::HVX_v65)) {
555 flags = Elf::EF_HEXAGON_MACH_V65;
556 } else {
557 flags = Elf::EF_HEXAGON_MACH_V62;
558 }
559 }
560
get_machine()561 uint16_t get_machine() override {
562 return EM_HEXAGON;
563 }
get_flags()564 uint32_t get_flags() override {
565 return flags;
566 }
get_version()567 uint32_t get_version() override {
568 return EV_CURRENT;
569 }
append_dynamic(Section & dynamic)570 void append_dynamic(Section &dynamic) override {
571 dynamic.append_contents((uint32_t)DT_HEXAGON_VER);
572 dynamic.append_contents((uint32_t)0x3);
573 }
574
get_got_entry(Section & got,const Symbol & sym)575 uint64_t get_got_entry(Section &got, const Symbol &sym) override {
576 // Check if we already made a got entry for this symbol.
577 for (const Relocation &r : got.relocations()) {
578 if (r.get_symbol() == &sym && r.get_type() == R_HEX_GLOB_DAT) {
579 internal_assert(r.get_addend() == 0);
580 return r.get_offset();
581 }
582 }
583
584 uint64_t got_offset = got.contents_size();
585 got.append_contents((uint32_t)0);
586 got.add_relocation(Elf::Relocation(R_HEX_GLOB_DAT, got_offset, 0, &sym));
587 return got_offset;
588 }
589
needs_plt_entry(const Relocation & r)590 bool needs_plt_entry(const Relocation &r) override {
591 return maybe_branch_inst(r.get_type());
592 }
593
add_plt_entry(const Symbol & sym,Section & plt,Section & got,const Symbol & got_sym)594 Symbol add_plt_entry(const Symbol &sym, Section &plt, Section &got, const Symbol &got_sym) override {
595 if (got.contents_empty()) {
596 // The PLT hasn't been started, initialize it now.
597 plt.set_alignment(16);
598
599 std::vector<char> padding(64, (char)0);
600 // TODO: Make a .plt0 entry that supports lazy binding.
601 plt.set_contents(padding.begin(), padding.end());
602 }
603
604 static const uint8_t hexagon_plt1[] = {
605 0x00, 0x40, 0x00, 0x00, // { immext (#0) (Relocation:R_HEX_B32_PCREL_X)
606 0x0e, 0xc0, 0x49, 0x6a, // r14 = add (pc, ##GOTn@PCREL) } (Relocation:R_HEX_6_PCREL_X)
607 0x1c, 0xc0, 0x8e, 0x91, // r28 = memw (r14)
608 0x00, 0xc0, 0x9c, 0x52, // jumpr r28
609 };
610
611 debug(2) << "Adding PLT entry for symbol " << sym.get_name() << "\n";
612
613 // Add a GOT entry for this symbol.
614 uint64_t got_offset = got.contents_size();
615 got.append_contents((uint32_t)0);
616 got.add_relocation(Elf::Relocation(R_HEX_JMP_SLOT, got_offset, 0, &sym));
617
618 // Add the PLT code.
619 uint32_t plt_offset = plt.get_size();
620 plt.append_contents(hexagon_plt1, hexagon_plt1 + sizeof(hexagon_plt1));
621
622 plt.add_relocation(Relocation(R_HEX_B32_PCREL_X, plt_offset + 0, got_offset, &got_sym));
623 plt.add_relocation(Relocation(R_HEX_6_PCREL_X, plt_offset + 4, got_offset + 4, &got_sym));
624
625 // Make a symbol for the PLT entry.
626 Symbol plt_sym("plt_" + sym.get_name());
627 plt_sym
628 .set_type(Symbol::STT_FUNC)
629 .set_binding(Symbol::STB_LOCAL)
630 .define(&plt, plt_offset, sizeof(hexagon_plt1));
631
632 return plt_sym;
633 }
634
relocate(uint64_t fixup_offset,char * fixup_addr,uint64_t type,const Elf::Symbol * sym,uint64_t sym_offset,int64_t addend,Elf::Section & got)635 Relocation relocate(uint64_t fixup_offset, char *fixup_addr, uint64_t type,
636 const Elf::Symbol *sym, uint64_t sym_offset, int64_t addend,
637 Elf::Section &got) override {
638 if (type == R_HEX_32) {
639 // Don't do this relocation, generate a new R_HEX_RELATIVE relocation instead.
640 return Relocation(R_HEX_RELATIVE, fixup_offset, sym_offset + addend, nullptr);
641 }
642 do_relocation(fixup_offset, fixup_addr, type, sym, sym_offset, addend, got);
643 return Relocation();
644 }
645 };
646
647 } // namespace Elf
648
649 namespace {
650
651 const std::string runtime_module_name = "halide_shared_runtime";
652 const std::string pipeline_module_name = "halide_hexagon_code";
653
654 // Replace the parameter objects of loads/stores with a new parameter
655 // object.
656 class ReplaceParams : public IRMutator {
657 const std::map<std::string, Parameter> &replacements;
658
659 using IRMutator::visit;
660
visit(const Load * op)661 Expr visit(const Load *op) override {
662 auto i = replacements.find(op->name);
663 if (i != replacements.end()) {
664 return Load::make(op->type, op->name, mutate(op->index), op->image,
665 i->second, mutate(op->predicate), op->alignment);
666 } else {
667 return IRMutator::visit(op);
668 }
669 }
670
visit(const Store * op)671 Stmt visit(const Store *op) override {
672 auto i = replacements.find(op->name);
673 if (i != replacements.end()) {
674 return Store::make(op->name, mutate(op->value), mutate(op->index),
675 i->second, mutate(op->predicate), op->alignment);
676 } else {
677 return IRMutator::visit(op);
678 }
679 }
680
681 public:
ReplaceParams(const std::map<std::string,Parameter> & replacements)682 ReplaceParams(const std::map<std::string, Parameter> &replacements)
683 : replacements(replacements) {
684 }
685 };
686
replace_params(const Stmt & s,const std::map<std::string,Parameter> & replacements)687 Stmt replace_params(const Stmt &s, const std::map<std::string, Parameter> &replacements) {
688 return ReplaceParams(replacements).mutate(s);
689 }
690
691 class InjectHexagonRpc : public IRMutator {
692 std::map<std::string, Expr> state_bufs;
693
694 Module &device_code;
695
state_var(const std::string & name,Type type)696 Expr state_var(const std::string &name, Type type) {
697 return Let::make(name, state_var_ptr(name, type),
698 Load::make(type_of<void *>(), name, 0,
699 Buffer<>(), Parameter(), const_true(), ModulusRemainder()));
700 }
701
state_var_ptr(const std::string & name,Type type)702 Expr state_var_ptr(const std::string &name, Type type) {
703 Expr &buf = state_bufs[name];
704 if (!buf.defined()) {
705 auto storage = Buffer<void *>::make_scalar(name + "_buf");
706 storage() = nullptr;
707 buf = Variable::make(type_of<halide_buffer_t *>(), storage.name() + ".buffer", storage);
708 }
709 return Call::make(Handle(), Call::buffer_get_host, {buf}, Call::Extern);
710 }
711
module_state()712 Expr module_state() {
713 return state_var("hexagon_module_state", type_of<void *>());
714 }
715
module_state_ptr()716 Expr module_state_ptr() {
717 return state_var_ptr("hexagon_module_state", type_of<void *>());
718 }
719
720 // Create a Buffer containing the given buffer/size, and return an
721 // expression for a pointer to the first element.
buffer_ptr(const uint8_t * buffer,size_t size,const char * name)722 Expr buffer_ptr(const uint8_t *buffer, size_t size, const char *name) {
723 Buffer<uint8_t> code((int)size, name);
724 memcpy(code.data(), buffer, (int)size);
725 Expr buf = Variable::make(type_of<halide_buffer_t *>(), string(name) + ".buffer", code);
726 return Call::make(Handle(), Call::buffer_get_host, {buf}, Call::Extern);
727 }
728
729 using IRMutator::visit;
730
visit(const For * loop)731 Stmt visit(const For *loop) override {
732 if (loop->device_api != DeviceAPI::Hexagon) {
733 return IRMutator::visit(loop);
734 }
735
736 // Unrolling or loop partitioning might generate multiple
737 // loops with the same name, so we need to make them unique.
738 // There's a bit of a hack here: the offload_rpc. prefix is
739 // significant, it tells the Hexagon code generator to expect
740 // the arguments to be unpacked by the Hexagon remote-side RPC
741 // call, which doesn't work with standard buffers.
742 std::string hex_name = unique_name("offload_rpc." + loop->name);
743
744 // After moving this to Hexagon, it doesn't need to be marked
745 // Hexagon anymore.
746 Stmt body;
747 if (is_one(loop->extent)) {
748 body = LetStmt::make(loop->name, loop->min, loop->body);
749 } else {
750 body = For::make(loop->name, loop->min, loop->extent, loop->for_type,
751 DeviceAPI::None, loop->body);
752 }
753
754 // Build a closure for the device code.
755 // TODO: Should this move the body of the loop to Hexagon,
756 // or the loop itself? Currently, this moves the loop itself.
757 Closure c(body);
758
759 // A buffer parameter potentially generates 3 scalar parameters (min,
760 // extent, stride) per dimension. Pipelines with many buffers may
761 // generate extreme numbers of scalar parameters, which can cause
762 // problems for LLVM. This logic moves scalar parameters of the type
763 // matching the type of these scalars to a single buffer.
764 // TODO(dsharlet): Maybe this is Int(64) in some cases?
765 Type scalars_buffer_type = Int(32);
766 std::string scalars_buffer_name = "scalar_indices";
767 std::vector<Stmt> scalars_buffer_init;
768 for (auto i = c.vars.begin(); i != c.vars.end();) {
769 if (i->second == scalars_buffer_type) {
770 int index = scalars_buffer_init.size();
771 scalars_buffer_init.push_back(Store::make(scalars_buffer_name, Variable::make(scalars_buffer_type, i->first),
772 index, Parameter(), const_true(), ModulusRemainder()));
773 Expr replacement = Load::make(scalars_buffer_type, scalars_buffer_name, index, Buffer<>(),
774 Parameter(), const_true(), ModulusRemainder());
775 body = LetStmt::make(i->first, replacement, body);
776
777 i = c.vars.erase(i);
778 } else {
779 ++i;
780 }
781 }
782 if (!scalars_buffer_init.empty()) {
783 // If we put some scalars in the scalars buffer, add it to the closure.
784 Closure::Buffer scalars_buffer;
785 scalars_buffer.type = scalars_buffer_type;
786 scalars_buffer.dimensions = 1;
787 scalars_buffer.read = true;
788 scalars_buffer.write = false;
789 c.buffers[scalars_buffer_name] = scalars_buffer;
790 }
791 int scalars_buffer_extent = scalars_buffer_init.size();
792
793 // Make an argument list, and generate a function in the
794 // device_code module. The hexagon runtime code expects
795 // the arguments to appear in the order of (input buffers,
796 // output buffers, input scalars). Scalars must be last
797 // for the scalar arguments to shadow the symbols of the
798 // buffer that get generated by CodeGen_LLVM.
799 std::vector<LoweredArgument> input_buffers, output_buffers;
800 std::map<std::string, Parameter> replacement_params;
801 for (const auto &i : c.buffers) {
802 if (i.second.write) {
803 Argument::Kind kind = Argument::OutputBuffer;
804 output_buffers.emplace_back(i.first, kind, i.second.type, i.second.dimensions, ArgumentEstimates{});
805 } else {
806 Argument::Kind kind = Argument::InputBuffer;
807 input_buffers.emplace_back(i.first, kind, i.second.type, i.second.dimensions, ArgumentEstimates{});
808 }
809
810 // Build a parameter to replace.
811 Parameter p(i.second.type, true, i.second.dimensions);
812 // Assert that buffers are aligned to one HVX vector.
813 const int alignment = 128;
814 p.set_host_alignment(alignment);
815 // The other parameter constraints are already
816 // accounted for by the closure grabbing those
817 // arguments, so we only need to provide the host
818 // alignment.
819 replacement_params[i.first] = p;
820
821 // Add an assert to the body that validates the alignment of the
822 // buffer. These buffers are either allocated by FastRPC or
823 // halide_hexagon_device_interface buffers, either should be aligned
824 // to 128 bytes.
825 if (!device_code.target().has_feature(Target::NoAsserts)) {
826 Expr host_ptr = reinterpret<uint64_t>(Variable::make(Handle(), i.first));
827 Expr error = Call::make(Int(32), "halide_error_unaligned_host_ptr",
828 {i.first, alignment}, Call::Extern);
829 body = Block::make(AssertStmt::make(host_ptr % alignment == 0, error), body);
830 }
831
832 // Unpack buffer parameters into the scope. They come in as host/dev struct pairs.
833 Expr buf = Variable::make(Handle(), i.first + ".buffer");
834 Expr host_ptr = Call::make(Handle(), "_halide_hexagon_buffer_get_host", {buf}, Call::Extern);
835 Expr device_ptr = Call::make(Handle(), "_halide_hexagon_buffer_get_device", {buf}, Call::Extern);
836 body = LetStmt::make(i.first + ".device", device_ptr, body);
837 body = LetStmt::make(i.first, host_ptr, body);
838 }
839 body = replace_params(body, replacement_params);
840
841 std::vector<LoweredArgument> args;
842 args.insert(args.end(), input_buffers.begin(), input_buffers.end());
843 args.insert(args.end(), output_buffers.begin(), output_buffers.end());
844 for (const auto &i : c.vars) {
845 LoweredArgument arg(i.first, Argument::InputScalar, i.second, 0, ArgumentEstimates{});
846 args.push_back(arg);
847 }
848 device_code.append(LoweredFunc(hex_name, args, body, LinkageType::ExternalPlusMetadata));
849
850 // Generate a call to hexagon_device_run.
851 std::vector<Expr> arg_sizes;
852 std::vector<Expr> arg_ptrs;
853 std::vector<Expr> arg_flags;
854
855 for (const auto &i : c.buffers) {
856 // Buffers are passed to the hexagon host runtime as just device
857 // handles (uint64) and host (uint8*) fields. They correspond
858 // to the 'hexagon_device_pointer' struct declared elsewhere;
859 // we don't use that struct here because it's simple enough that
860 // just using `make_struct`() for it is simpler.
861 if (i.first != scalars_buffer_name) {
862 // If this isn't the scalars buffer, assume it has a '.buffer'
863 // description in the IR.
864 Expr buf = Variable::make(type_of<halide_buffer_t *>(), i.first + ".buffer");
865 Expr device = Call::make(UInt(64), Call::buffer_get_device, {buf}, Call::Extern);
866 Expr host = Call::make(Handle(), Call::buffer_get_host, {buf}, Call::Extern);
867 Expr pseudo_buffer = Call::make(Handle(), Call::make_struct, {device, host}, Call::Intrinsic);
868 arg_ptrs.push_back(pseudo_buffer);
869 arg_sizes.emplace_back((uint64_t)(pseudo_buffer.type().bytes()));
870 } else {
871 // If this is the scalars buffer, it doesn't have a .buffer
872 // field. Rather than make one, It's easier to just skip the
873 // buffer_get_host call and reference the allocation directly.
874 // TODO: This is a bit of an ugly hack, it would be nice to find
875 // a better way to identify buffers without a '.buffer' description.
876 Expr host = Variable::make(Handle(), i.first);
877 Expr pseudo_buffer = Call::make(Handle(), Call::make_struct, {make_zero(UInt(64)), host}, Call::Intrinsic);
878 arg_ptrs.push_back(pseudo_buffer);
879 arg_sizes.emplace_back((uint64_t)scalars_buffer_extent * scalars_buffer_type.bytes());
880 }
881
882 // In the flags parameter, bit 0 set indicates the
883 // buffer is read, bit 1 set indicates the buffer is
884 // written. If neither are set, the argument is a scalar.
885 int flags = 0;
886 if (i.second.read) flags |= 0x1;
887 if (i.second.write) flags |= 0x2;
888 arg_flags.emplace_back(flags);
889 }
890 for (const auto &i : c.vars) {
891 Expr arg = Variable::make(i.second, i.first);
892 Expr arg_ptr = Call::make(type_of<void *>(), Call::make_struct, {arg}, Call::Intrinsic);
893 arg_sizes.emplace_back((uint64_t)i.second.bytes());
894 arg_ptrs.push_back(arg_ptr);
895 arg_flags.emplace_back(0x0);
896 }
897
898 // The argument list is terminated with an argument of size 0.
899 arg_sizes.emplace_back((uint64_t)0);
900
901 std::string pipeline_name = hex_name + "_argv";
902 std::vector<Expr> params;
903 params.push_back(module_state());
904 params.emplace_back(pipeline_name);
905 params.push_back(state_var_ptr(hex_name, type_of<int>()));
906 params.push_back(Call::make(type_of<uint64_t *>(), Call::make_struct, arg_sizes, Call::Intrinsic));
907 params.push_back(Call::make(type_of<void **>(), Call::make_struct, arg_ptrs, Call::Intrinsic));
908 params.push_back(Call::make(type_of<int *>(), Call::make_struct, arg_flags, Call::Intrinsic));
909
910 Stmt offload_call = call_extern_and_assert("halide_hexagon_run", params);
911 if (!scalars_buffer_init.empty()) {
912 offload_call = Block::make(Block::make(scalars_buffer_init), offload_call);
913 }
914 offload_call = Allocate::make(scalars_buffer_name, scalars_buffer_type, MemoryType::Auto,
915 {Expr(scalars_buffer_extent)}, const_true(), offload_call);
916 return offload_call;
917 }
918
919 public:
InjectHexagonRpc(Module & device_code)920 InjectHexagonRpc(Module &device_code)
921 : device_code(device_code) {
922 }
923
inject(Stmt s)924 Stmt inject(Stmt s) {
925 s = mutate(s);
926
927 if (!device_code.functions().empty()) {
928 // Wrap the statement in calls to halide_initialize_kernels.
929 Expr runtime_buf_var = Variable::make(type_of<struct halide_buffer_t *>(), runtime_module_name + ".buffer");
930 Expr runtime_size = Call::make(Int(32), Call::buffer_get_extent, {runtime_buf_var, 0}, Call::Extern);
931 Expr runtime_ptr = Call::make(Handle(), Call::buffer_get_host, {runtime_buf_var}, Call::Extern);
932
933 Expr code_buf_var = Variable::make(type_of<struct halide_buffer_t *>(), pipeline_module_name + ".buffer");
934 Expr code_size = Call::make(Int(32), Call::buffer_get_extent, {code_buf_var, 0}, Call::Extern);
935 Expr code_ptr = Call::make(Handle(), Call::buffer_get_host, {code_buf_var}, Call::Extern);
936 Stmt init_kernels = call_extern_and_assert("halide_hexagon_initialize_kernels",
937 {module_state_ptr(), code_ptr, cast<uint64_t>(code_size), runtime_ptr, cast<uint64_t>(runtime_size)});
938 s = Block::make(init_kernels, s);
939 }
940
941 // TODO: This can probably go away due to general debug info at the submodule compile level.
942 debug(1) << "Hexagon device code module: " << device_code << "\n";
943
944 return s;
945 }
946 };
947
948 } // namespace
949
inject_hexagon_rpc(Stmt s,const Target & host_target,Module & containing_module)950 Stmt inject_hexagon_rpc(Stmt s, const Target &host_target,
951 Module &containing_module) {
952 // Make a new target for the device module.
953 Target target(Target::NoOS, Target::Hexagon, 32);
954 // There are two ways of offloading, on device and on host.
955 // In the former we have true QuRT available, while on the
956 // latter we simulate the Hexagon side code with a barebones
957 // Shim layer, ie. NO QURT!
958 if (host_target.arch == Target::ARM) {
959 target.os = Target::QuRT;
960 }
961
962 // These feature flags are propagated from the host target to the
963 // device module.
964 //
965 // TODO: We'd like Target::Debug to be in this list too, but trunk
966 // llvm currently disagrees with hexagon clang as to what
967 // constitutes valid debug info.
968 static const Target::Feature shared_features[] = {
969 Target::Profile,
970 Target::NoAsserts,
971 Target::HVX_64,
972 Target::HVX_128,
973 Target::HVX_v62,
974 Target::HVX_v65,
975 Target::HVX_v66,
976 Target::DisableLLVMLoopOpt,
977 };
978 for (Target::Feature i : shared_features) {
979 if (host_target.has_feature(i)) {
980 target = target.with_feature(i);
981 }
982 }
983
984 Module shared_runtime(runtime_module_name, target);
985 Module hexagon_module(pipeline_module_name, target.with_feature(Target::NoRuntime));
986 InjectHexagonRpc injector(hexagon_module);
987 s = injector.inject(s);
988
989 if (!hexagon_module.functions().empty()) {
990 containing_module.append(hexagon_module);
991 containing_module.append(shared_runtime);
992 }
993
994 return s;
995 }
996
compile_module_to_hexagon_shared_object(const Module & device_code)997 Buffer<uint8_t> compile_module_to_hexagon_shared_object(const Module &device_code) {
998 llvm::LLVMContext context;
999 std::unique_ptr<llvm::Module> llvm_module(compile_module_to_llvm_module(device_code, context));
1000
1001 // Write intermediate bitcode to disk if requested.
1002 // TODO: We really need something better than this. This won't
1003 // work in non-trivial JIT or AOT programs.
1004 std::string bitcode_dump_path = get_env_variable("HL_HEXAGON_DUMP_BITCODE");
1005 if (!bitcode_dump_path.empty()) {
1006 auto fd_ostream = make_raw_fd_ostream(bitcode_dump_path);
1007 compile_llvm_module_to_llvm_bitcode(*llvm_module, *fd_ostream);
1008 debug(0) << "Wrote Hexagon device bitcode to " << bitcode_dump_path;
1009 }
1010
1011 llvm::SmallVector<char, 4096> object;
1012 llvm::raw_svector_ostream object_stream(object);
1013 compile_llvm_module_to_object(*llvm_module, object_stream);
1014
1015 int min_debug_level = device_code.name() == runtime_module_name ? 3 : 2;
1016 if (debug::debug_level() >= min_debug_level) {
1017 debug(0) << "Hexagon device code assembly: "
1018 << "\n";
1019 llvm::SmallString<4096> assembly;
1020 llvm::raw_svector_ostream assembly_stream(assembly);
1021 compile_llvm_module_to_assembly(*llvm_module, assembly_stream);
1022 debug(0) << assembly.c_str() << "\n";
1023 }
1024
1025 auto obj = Elf::Object::parse_object(object.data(), object.size());
1026 internal_assert(obj);
1027
1028 // Generate just one .text section.
1029 obj->merge_text_sections();
1030
1031 // Make .bss a real section.
1032 auto bss = obj->find_section(".bss");
1033 if (bss != obj->sections_end()) {
1034 bss->set_alignment(128);
1035 // TODO: We should set the type to SHT_NOBITS
1036 // This will cause a difference in MemSize and FileSize like so:
1037 // FileSize = (MemSize - size_of_bss)
1038 // When the Hexagon loader is used on 8998 and later targets,
1039 // the difference is filled with zeroes thereby initializing the .bss
1040 // section.
1041 bss->set_type(Elf::Section::SHT_PROGBITS);
1042 std::fill(bss->contents_begin(), bss->contents_end(), 0);
1043 }
1044
1045 auto dtors = obj->find_section(".dtors");
1046 if (dtors != obj->sections_end()) {
1047 dtors->append_contents((uint32_t)0);
1048 }
1049
1050 // We call the constructors in ctors backwards starting from special
1051 // symbol __CTOR_END__ until we reach a 0 (NULL pointer value). So,
1052 // prepend the .ctors section with 0.
1053 auto ctors = obj->find_section(".ctors");
1054 if (ctors != obj->sections_end()) {
1055 ctors->prepend_contents((uint32_t)0);
1056 }
1057
1058 debug(2) << print_sections(*obj);
1059
1060 // Link into a shared object.
1061 std::string soname = "lib" + device_code.name() + ".so";
1062 Elf::HexagonLinker linker(device_code.target());
1063 std::vector<std::string> dependencies = {
1064 "libhalide_hexagon_remote_skel.so",
1065 };
1066 std::vector<char> shared_object = obj->write_shared_object(&linker, dependencies, soname);
1067
1068 std::string signer = get_env_variable("HL_HEXAGON_CODE_SIGNER");
1069 if (!signer.empty()) {
1070 // If signer is specified, shell out to a tool/script that will
1071 // sign the Hexagon code in a specific way. The tool is expected
1072 // to be of the form
1073 //
1074 // signer /path/to/unsigned.so /path/to/signed.so
1075 //
1076 // where unsigned and signed paths must not be the same file.
1077 // If the signed file already exists, it will be overwritten.
1078
1079 TemporaryFile input("hvx_unsigned", ".so");
1080 TemporaryFile output("hvx_signed", ".so");
1081
1082 debug(1) << "Signing Hexagon code: " << input.pathname() << " -> " << output.pathname() << "\n";
1083
1084 write_entire_file(input.pathname(), shared_object);
1085
1086 debug(1) << "Signing tool: (" << signer << ")\n";
1087 std::string cmd = signer + " " + input.pathname() + " " + output.pathname();
1088 int result = system(cmd.c_str());
1089 internal_assert(result == 0)
1090 << "HL_HEXAGON_CODE_SIGNER failed: result = " << result
1091 << " for cmd (" << cmd << ")";
1092
1093 shared_object = read_entire_file(output.pathname());
1094 }
1095
1096 Halide::Buffer<uint8_t> result_buf(shared_object.size(), device_code.name());
1097 memcpy(result_buf.data(), shared_object.data(), shared_object.size());
1098
1099 return result_buf;
1100 }
1101
1102 } // namespace Internal
1103 } // namespace Halide
1104