1 /* This Source Code Form is subject to the terms of the Mozilla Public
2  * License, v. 2.0. If a copy of the MPL was not distributed with this
3  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
4 
5 #undef NDEBUG
6 #include <assert.h>
7 #include <cstring>
8 #include <cstdlib>
9 #include <cstdio>
10 #include "elfxx.h"
11 #include "mozilla/CheckedInt.h"
12 
13 #define ver "0"
14 #define elfhack_data ".elfhack.data.v" ver
15 #define elfhack_text ".elfhack.text.v" ver
16 
17 #ifndef R_ARM_V4BX
18 #  define R_ARM_V4BX 0x28
19 #endif
20 #ifndef R_ARM_CALL
21 #  define R_ARM_CALL 0x1c
22 #endif
23 #ifndef R_ARM_JUMP24
24 #  define R_ARM_JUMP24 0x1d
25 #endif
26 #ifndef R_ARM_THM_JUMP24
27 #  define R_ARM_THM_JUMP24 0x1e
28 #endif
29 
30 char* rundir = nullptr;
31 
32 template <typename T>
33 struct wrapped {
34   T value;
35 };
36 
37 class Elf_Addr_Traits {
38  public:
39   typedef wrapped<Elf32_Addr> Type32;
40   typedef wrapped<Elf64_Addr> Type64;
41 
42   template <class endian, typename R, typename T>
swap(T & t,R & r)43   static inline void swap(T& t, R& r) {
44     r.value = endian::swap(t.value);
45   }
46 };
47 
48 typedef serializable<Elf_Addr_Traits> Elf_Addr;
49 
50 class Elf_RelHack_Traits {
51  public:
52   typedef Elf32_Rel Type32;
53   typedef Elf32_Rel Type64;
54 
55   template <class endian, typename R, typename T>
swap(T & t,R & r)56   static inline void swap(T& t, R& r) {
57     r.r_offset = endian::swap(t.r_offset);
58     r.r_info = endian::swap(t.r_info);
59   }
60 };
61 
62 typedef serializable<Elf_RelHack_Traits> Elf_RelHack;
63 
64 class ElfRelHack_Section : public ElfSection {
65  public:
ElfRelHack_Section(Elf_Shdr & s)66   ElfRelHack_Section(Elf_Shdr& s) : ElfSection(s, nullptr, nullptr) {
67     name = elfhack_data;
68   };
69 
serialize(std::ofstream & file,char ei_class,char ei_data)70   void serialize(std::ofstream& file, char ei_class, char ei_data) {
71     for (std::vector<Elf_RelHack>::iterator i = rels.begin(); i != rels.end();
72          ++i)
73       (*i).serialize(file, ei_class, ei_data);
74   }
75 
isRelocatable()76   bool isRelocatable() { return true; }
77 
push_back(Elf_RelHack & r)78   void push_back(Elf_RelHack& r) {
79     rels.push_back(r);
80     shdr.sh_size = rels.size() * shdr.sh_entsize;
81   }
82 
83  private:
84   std::vector<Elf_RelHack> rels;
85 };
86 
87 class ElfRelHackCode_Section : public ElfSection {
88  public:
ElfRelHackCode_Section(Elf_Shdr & s,Elf & e,ElfRelHack_Section & relhack_section,unsigned int init,unsigned int mprotect_cb,unsigned int sysconf_cb)89   ElfRelHackCode_Section(Elf_Shdr& s, Elf& e,
90                          ElfRelHack_Section& relhack_section, unsigned int init,
91                          unsigned int mprotect_cb, unsigned int sysconf_cb)
92       : ElfSection(s, nullptr, nullptr),
93         parent(e),
94         relhack_section(relhack_section),
95         init(init),
96         init_trampoline(nullptr),
97         mprotect_cb(mprotect_cb),
98         sysconf_cb(sysconf_cb) {
99     std::string file(rundir);
100     file += "/inject/";
101     switch (parent.getMachine()) {
102       case EM_386:
103         file += "x86";
104         break;
105       case EM_X86_64:
106         file += "x86_64";
107         break;
108       case EM_ARM:
109         file += "arm";
110         break;
111       default:
112         throw std::runtime_error("unsupported architecture");
113     }
114     file += ".o";
115     std::ifstream inject(file.c_str(), std::ios::in | std::ios::binary);
116     elf = new Elf(inject);
117     if (elf->getType() != ET_REL)
118       throw std::runtime_error("object for injected code is not ET_REL");
119     if (elf->getMachine() != parent.getMachine())
120       throw std::runtime_error(
121           "architecture of object for injected code doesn't match");
122 
123     ElfSymtab_Section* symtab = nullptr;
124 
125     // Find the symbol table.
126     for (ElfSection* section = elf->getSection(1); section != nullptr;
127          section = section->getNext()) {
128       if (section->getType() == SHT_SYMTAB)
129         symtab = (ElfSymtab_Section*)section;
130     }
131     if (symtab == nullptr)
132       throw std::runtime_error(
133           "Couldn't find a symbol table for the injected code");
134 
135     relro = parent.getSegmentByType(PT_GNU_RELRO);
136 
137     // Find the init symbol
138     entry_point = -1;
139     std::string symbol = "init";
140     if (!init) symbol += "_noinit";
141     if (relro) symbol += "_relro";
142     Elf_SymValue* sym = symtab->lookup(symbol.c_str());
143     if (!sym)
144       throw std::runtime_error(
145           "Couldn't find an 'init' symbol in the injected code");
146 
147     entry_point = sym->value.getValue();
148 
149     // Get all relevant sections from the injected code object.
150     add_code_section(sym->value.getSection());
151 
152     // If the original init function is located too far away, we're going to
153     // need to use a trampoline. See comment in inject.c.
154     // Theoretically, we should check for (init - instr) > 0xffffff, where instr
155     // is the virtual address of the instruction that calls the original init,
156     // but we don't have it at this point, so punt to just init.
157     if (init > 0xffffff && parent.getMachine() == EM_ARM) {
158       Elf_SymValue* trampoline = symtab->lookup("init_trampoline");
159       if (!trampoline) {
160         throw std::runtime_error(
161             "Couldn't find an 'init_trampoline' symbol in the injected code");
162       }
163 
164       init_trampoline = trampoline->value.getSection();
165       add_code_section(init_trampoline);
166     }
167 
168     // Adjust code sections offsets according to their size
169     std::vector<ElfSection*>::iterator c = code.begin();
170     (*c)->getShdr().sh_addr = 0;
171     for (ElfSection* last = *(c++); c != code.end(); ++c) {
172       unsigned int addr = last->getShdr().sh_addr + last->getSize();
173       if (addr & ((*c)->getAddrAlign() - 1))
174         addr = (addr | ((*c)->getAddrAlign() - 1)) + 1;
175       (*c)->getShdr().sh_addr = addr;
176       // We need to align this section depending on the greater
177       // alignment required by code sections.
178       if (shdr.sh_addralign < (*c)->getAddrAlign())
179         shdr.sh_addralign = (*c)->getAddrAlign();
180       last = *c;
181     }
182     shdr.sh_size = code.back()->getAddr() + code.back()->getSize();
183     data = static_cast<char*>(malloc(shdr.sh_size));
184     if (!data) {
185       throw std::runtime_error("Could not malloc ElfSection data");
186     }
187     char* buf = data;
188     for (c = code.begin(); c != code.end(); ++c) {
189       memcpy(buf, (*c)->getData(), (*c)->getSize());
190       buf += (*c)->getSize();
191     }
192     name = elfhack_text;
193   }
194 
~ElfRelHackCode_Section()195   ~ElfRelHackCode_Section() { delete elf; }
196 
serialize(std::ofstream & file,char ei_class,char ei_data)197   void serialize(std::ofstream& file, char ei_class, char ei_data) override {
198     // Readjust code offsets
199     for (std::vector<ElfSection*>::iterator c = code.begin(); c != code.end();
200          ++c)
201       (*c)->getShdr().sh_addr += getAddr();
202 
203     // Apply relocations
204     for (std::vector<ElfSection*>::iterator c = code.begin(); c != code.end();
205          ++c) {
206       for (ElfSection* rel = elf->getSection(1); rel != nullptr;
207            rel = rel->getNext())
208         if (((rel->getType() == SHT_REL) || (rel->getType() == SHT_RELA)) &&
209             (rel->getInfo().section == *c)) {
210           if (rel->getType() == SHT_REL)
211             apply_relocations((ElfRel_Section<Elf_Rel>*)rel, *c);
212           else
213             apply_relocations((ElfRel_Section<Elf_Rela>*)rel, *c);
214         }
215     }
216 
217     ElfSection::serialize(file, ei_class, ei_data);
218   }
219 
isRelocatable()220   bool isRelocatable() override { return false; }
221 
getEntryPoint()222   unsigned int getEntryPoint() { return entry_point; }
223 
insertBefore(ElfSection * section,bool dirty=true)224   void insertBefore(ElfSection* section, bool dirty = true) override {
225     // Adjust the address so that this section is adjacent to the one it's
226     // being inserted before. This avoids creating holes which subsequently
227     // might lead the PHDR-adjusting code to create unnecessary additional
228     // PT_LOADs.
229     shdr.sh_addr =
230         (section->getAddr() - shdr.sh_size) & ~(shdr.sh_addralign - 1);
231     ElfSection::insertBefore(section, dirty);
232   }
233 
234  private:
add_code_section(ElfSection * section)235   void add_code_section(ElfSection* section) {
236     if (section) {
237       /* Don't add section if it's already been added in the past */
238       for (auto s = code.begin(); s != code.end(); ++s) {
239         if (section == *s) return;
240       }
241       code.push_back(section);
242       find_code(section);
243     }
244   }
245 
246   /* Look at the relocations associated to the given section to find other
247    * sections that it requires */
find_code(ElfSection * section)248   void find_code(ElfSection* section) {
249     for (ElfSection* s = elf->getSection(1); s != nullptr; s = s->getNext()) {
250       if (((s->getType() == SHT_REL) || (s->getType() == SHT_RELA)) &&
251           (s->getInfo().section == section)) {
252         if (s->getType() == SHT_REL)
253           scan_relocs_for_code((ElfRel_Section<Elf_Rel>*)s);
254         else
255           scan_relocs_for_code((ElfRel_Section<Elf_Rela>*)s);
256       }
257     }
258   }
259 
260   template <typename Rel_Type>
scan_relocs_for_code(ElfRel_Section<Rel_Type> * rel)261   void scan_relocs_for_code(ElfRel_Section<Rel_Type>* rel) {
262     ElfSymtab_Section* symtab = (ElfSymtab_Section*)rel->getLink();
263     for (auto r = rel->rels.begin(); r != rel->rels.end(); ++r) {
264       ElfSection* section =
265           symtab->syms[ELF32_R_SYM(r->r_info)].value.getSection();
266       add_code_section(section);
267     }
268   }
269 
270   class pc32_relocation {
271    public:
operator ()(unsigned int base_addr,Elf32_Off offset,Elf32_Word addend,unsigned int addr)272     Elf32_Addr operator()(unsigned int base_addr, Elf32_Off offset,
273                           Elf32_Word addend, unsigned int addr) {
274       return addr + addend - offset - base_addr;
275     }
276   };
277 
278   class arm_plt32_relocation {
279    public:
operator ()(unsigned int base_addr,Elf32_Off offset,Elf32_Word addend,unsigned int addr)280     Elf32_Addr operator()(unsigned int base_addr, Elf32_Off offset,
281                           Elf32_Word addend, unsigned int addr) {
282       // We don't care about sign_extend because the only case where this is
283       // going to be used only jumps forward.
284       Elf32_Addr tmp = (Elf32_Addr)(addr - offset - base_addr) >> 2;
285       tmp = (addend + tmp) & 0x00ffffff;
286       return (addend & 0xff000000) | tmp;
287     }
288   };
289 
290   class arm_thm_jump24_relocation {
291    public:
operator ()(unsigned int base_addr,Elf32_Off offset,Elf32_Word addend,unsigned int addr)292     Elf32_Addr operator()(unsigned int base_addr, Elf32_Off offset,
293                           Elf32_Word addend, unsigned int addr) {
294       /* Follows description of b.w and bl instructions as per
295          ARM Architecture Reference Manual ARM® v7-A and ARM® v7-R edition,
296          A8.6.16 We limit ourselves to Encoding T4 of b.w and Encoding T1 of bl.
297          We don't care about sign_extend because the only case where this is
298          going to be used only jumps forward. */
299       Elf32_Addr tmp = (Elf32_Addr)(addr - offset - base_addr);
300       unsigned int word0 = addend & 0xffff, word1 = addend >> 16;
301 
302       /* Encoding T4 of B.W is 10x1 ; Encoding T1 of BL is 11x1. */
303       unsigned int type = (word1 & 0xd000) >> 12;
304       if (((word0 & 0xf800) != 0xf000) || ((type & 0x9) != 0x9))
305         throw std::runtime_error(
306             "R_ARM_THM_JUMP24/R_ARM_THM_CALL relocation only supported for B.W "
307             "<label> and BL <label>");
308 
309       /* When the target address points to ARM code, switch a BL to a
310        * BLX. This however can't be done with a B.W without adding a
311        * trampoline, which is not supported as of now. */
312       if ((addr & 0x1) == 0) {
313         if (type == 0x9)
314           throw std::runtime_error(
315               "R_ARM_THM_JUMP24/R_ARM_THM_CALL relocation only supported for "
316               "BL <label> when label points to ARM code");
317         /* The address of the target is always relative to a 4-bytes
318          * aligned address, so if the address of the BL instruction is
319          * not 4-bytes aligned, adjust for it. */
320         if ((base_addr + offset) & 0x2) tmp += 2;
321         /* Encoding T2 of BLX is 11x0. */
322         type = 0xc;
323       }
324 
325       unsigned int s = (word0 & (1 << 10)) >> 10;
326       unsigned int j1 = (word1 & (1 << 13)) >> 13;
327       unsigned int j2 = (word1 & (1 << 11)) >> 11;
328       unsigned int i1 = j1 ^ s ? 0 : 1;
329       unsigned int i2 = j2 ^ s ? 0 : 1;
330 
331       tmp += ((s << 24) | (i1 << 23) | (i2 << 22) | ((word0 & 0x3ff) << 12) |
332               ((word1 & 0x7ff) << 1));
333 
334       s = (tmp & (1 << 24)) >> 24;
335       j1 = ((tmp & (1 << 23)) >> 23) ^ !s;
336       j2 = ((tmp & (1 << 22)) >> 22) ^ !s;
337 
338       return 0xf000 | (s << 10) | ((tmp & (0x3ff << 12)) >> 12) | (type << 28) |
339              (j1 << 29) | (j2 << 27) | ((tmp & 0xffe) << 15);
340     }
341   };
342 
343   class gotoff_relocation {
344    public:
operator ()(unsigned int base_addr,Elf32_Off offset,Elf32_Word addend,unsigned int addr)345     Elf32_Addr operator()(unsigned int base_addr, Elf32_Off offset,
346                           Elf32_Word addend, unsigned int addr) {
347       return addr + addend;
348     }
349   };
350 
351   template <class relocation_type>
apply_relocation(ElfSection * the_code,char * base,Elf_Rel * r,unsigned int addr)352   void apply_relocation(ElfSection* the_code, char* base, Elf_Rel* r,
353                         unsigned int addr) {
354     relocation_type relocation;
355     Elf32_Addr value;
356     memcpy(&value, base + r->r_offset, 4);
357     value = relocation(the_code->getAddr(), r->r_offset, value, addr);
358     memcpy(base + r->r_offset, &value, 4);
359   }
360 
361   template <class relocation_type>
apply_relocation(ElfSection * the_code,char * base,Elf_Rela * r,unsigned int addr)362   void apply_relocation(ElfSection* the_code, char* base, Elf_Rela* r,
363                         unsigned int addr) {
364     relocation_type relocation;
365     Elf32_Addr value =
366         relocation(the_code->getAddr(), r->r_offset, r->r_addend, addr);
367     memcpy(base + r->r_offset, &value, 4);
368   }
369 
370   template <typename Rel_Type>
apply_relocations(ElfRel_Section<Rel_Type> * rel,ElfSection * the_code)371   void apply_relocations(ElfRel_Section<Rel_Type>* rel, ElfSection* the_code) {
372     assert(rel->getType() == Rel_Type::sh_type);
373     char* buf = data + (the_code->getAddr() - code.front()->getAddr());
374     // TODO: various checks on the sections
375     ElfSymtab_Section* symtab = (ElfSymtab_Section*)rel->getLink();
376     for (typename std::vector<Rel_Type>::iterator r = rel->rels.begin();
377          r != rel->rels.end(); ++r) {
378       // TODO: various checks on the symbol
379       const char* name = symtab->syms[ELF32_R_SYM(r->r_info)].name;
380       unsigned int addr;
381       if (symtab->syms[ELF32_R_SYM(r->r_info)].value.getSection() == nullptr) {
382         if (strcmp(name, "relhack") == 0) {
383           addr = relhack_section.getAddr();
384         } else if (strcmp(name, "elf_header") == 0) {
385           // TODO: change this ungly hack to something better
386           ElfSection* ehdr = parent.getSection(1)->getPrevious()->getPrevious();
387           addr = ehdr->getAddr();
388         } else if (strcmp(name, "original_init") == 0) {
389           if (init_trampoline) {
390             addr = init_trampoline->getAddr();
391           } else {
392             addr = init;
393           }
394         } else if (strcmp(name, "real_original_init") == 0) {
395           addr = init;
396         } else if (relro && strcmp(name, "mprotect_cb") == 0) {
397           addr = mprotect_cb;
398         } else if (relro && strcmp(name, "sysconf_cb") == 0) {
399           addr = sysconf_cb;
400         } else if (relro && strcmp(name, "relro_start") == 0) {
401           addr = relro->getAddr();
402         } else if (relro && strcmp(name, "relro_end") == 0) {
403           addr = (relro->getAddr() + relro->getMemSize());
404         } else if (strcmp(name, "_GLOBAL_OFFSET_TABLE_") == 0) {
405           // We actually don't need a GOT, but need it as a reference for
406           // GOTOFF relocations. We'll just use the start of the ELF file
407           addr = 0;
408         } else if (strcmp(name, "") == 0) {
409           // This is for R_ARM_V4BX, until we find something better
410           addr = -1;
411         } else {
412           throw std::runtime_error("Unsupported symbol in relocation");
413         }
414       } else {
415         ElfSection* section =
416             symtab->syms[ELF32_R_SYM(r->r_info)].value.getSection();
417         assert((section->getType() == SHT_PROGBITS) &&
418                (section->getFlags() & SHF_EXECINSTR));
419         addr = symtab->syms[ELF32_R_SYM(r->r_info)].value.getValue();
420       }
421       // Do the relocation
422 #define REL(machine, type) (EM_##machine | (R_##machine##_##type << 8))
423       switch (elf->getMachine() | (ELF32_R_TYPE(r->r_info) << 8)) {
424         case REL(X86_64, PC32):
425         case REL(X86_64, PLT32):
426         case REL(386, PC32):
427         case REL(386, GOTPC):
428         case REL(ARM, GOTPC):
429         case REL(ARM, REL32):
430           apply_relocation<pc32_relocation>(the_code, buf, &*r, addr);
431           break;
432         case REL(ARM, CALL):
433         case REL(ARM, JUMP24):
434         case REL(ARM, PLT32):
435           apply_relocation<arm_plt32_relocation>(the_code, buf, &*r, addr);
436           break;
437         case REL(ARM, THM_PC22 /* THM_CALL */):
438         case REL(ARM, THM_JUMP24):
439           apply_relocation<arm_thm_jump24_relocation>(the_code, buf, &*r, addr);
440           break;
441         case REL(386, GOTOFF):
442         case REL(ARM, GOTOFF):
443           apply_relocation<gotoff_relocation>(the_code, buf, &*r, addr);
444           break;
445         case REL(ARM, V4BX):
446           // Ignore R_ARM_V4BX relocations
447           break;
448         default:
449           throw std::runtime_error("Unsupported relocation type");
450       }
451     }
452   }
453 
454   Elf *elf, &parent;
455   ElfRelHack_Section& relhack_section;
456   std::vector<ElfSection*> code;
457   unsigned int init;
458   ElfSection* init_trampoline;
459   unsigned int mprotect_cb;
460   unsigned int sysconf_cb;
461   int entry_point;
462   ElfSegment* relro;
463 };
464 
get_addend(Elf_Rel * rel,Elf * elf)465 unsigned int get_addend(Elf_Rel* rel, Elf* elf) {
466   ElfLocation loc(rel->r_offset, elf);
467   Elf_Addr addr(loc.getBuffer(), Elf_Addr::size(elf->getClass()),
468                 elf->getClass(), elf->getData());
469   return addr.value;
470 }
471 
get_addend(Elf_Rela * rel,Elf * elf)472 unsigned int get_addend(Elf_Rela* rel, Elf* elf) { return rel->r_addend; }
473 
set_relative_reloc(Elf_Rel * rel,Elf * elf,unsigned int value)474 void set_relative_reloc(Elf_Rel* rel, Elf* elf, unsigned int value) {
475   ElfLocation loc(rel->r_offset, elf);
476   Elf_Addr addr;
477   addr.value = value;
478   addr.serialize(const_cast<char*>(loc.getBuffer()),
479                  Elf_Addr::size(elf->getClass()), elf->getClass(),
480                  elf->getData());
481 }
482 
set_relative_reloc(Elf_Rela * rel,Elf * elf,unsigned int value)483 void set_relative_reloc(Elf_Rela* rel, Elf* elf, unsigned int value) {
484   // ld puts the value of relocated relocations both in the addend and
485   // at r_offset. For consistency, keep it that way.
486   set_relative_reloc((Elf_Rel*)rel, elf, value);
487   rel->r_addend = value;
488 }
489 
maybe_split_segment(Elf * elf,ElfSegment * segment)490 void maybe_split_segment(Elf* elf, ElfSegment* segment) {
491   std::list<ElfSection*>::iterator it = segment->begin();
492   for (ElfSection* last = *(it++); it != segment->end(); last = *(it++)) {
493     // When two consecutive non-SHT_NOBITS sections are apart by more
494     // than the alignment of the section, the second can be moved closer
495     // to the first, but this requires the segment to be split.
496     if (((*it)->getType() != SHT_NOBITS) && (last->getType() != SHT_NOBITS) &&
497         ((*it)->getOffset() - last->getOffset() - last->getSize() >
498          segment->getAlign())) {
499       // Probably very wrong.
500       Elf_Phdr phdr;
501       phdr.p_type = PT_LOAD;
502       phdr.p_vaddr = 0;
503       phdr.p_paddr = phdr.p_vaddr + segment->getVPDiff();
504       phdr.p_flags = segment->getFlags();
505       phdr.p_align = segment->getAlign();
506       phdr.p_filesz = (unsigned int)-1;
507       phdr.p_memsz = (unsigned int)-1;
508       ElfSegment* newSegment = new ElfSegment(&phdr);
509       elf->insertSegmentAfter(segment, newSegment);
510       for (; it != segment->end(); ++it) {
511         newSegment->addSection(*it);
512       }
513       for (it = newSegment->begin(); it != newSegment->end(); ++it) {
514         segment->removeSection(*it);
515       }
516       break;
517     }
518   }
519 }
520 
521 // EH_FRAME constants
522 static const char DW_EH_PE_absptr = 0x00;
523 static const char DW_EH_PE_omit = 0xff;
524 
525 // Data size
526 static const char DW_EH_PE_LEB128 = 0x01;
527 static const char DW_EH_PE_data2 = 0x02;
528 static const char DW_EH_PE_data4 = 0x03;
529 static const char DW_EH_PE_data8 = 0x04;
530 
531 // Data signedness
532 static const char DW_EH_PE_signed = 0x08;
533 
534 // Modifiers
535 static const char DW_EH_PE_pcrel = 0x10;
536 
537 // Return the data size part of the encoding value
encoding_data_size(char encoding)538 static char encoding_data_size(char encoding) { return encoding & 0x07; }
539 
540 // Advance `step` bytes in the buffer at `data` with size `size`, returning
541 // the advanced buffer pointer and remaining size.
542 // Returns true if step <= size.
advance_buffer(char ** data,size_t * size,size_t step)543 static bool advance_buffer(char** data, size_t* size, size_t step) {
544   if (step > *size) return false;
545 
546   *data += step;
547   *size -= step;
548   return true;
549 }
550 
551 // Advance in the given buffer, skipping the full length of the variable-length
552 // encoded LEB128 type in CIE/FDE data.
skip_LEB128(char ** data,size_t * size)553 static bool skip_LEB128(char** data, size_t* size) {
554   if (!*size) return false;
555 
556   while (*size && (*(*data)++ & (char)0x80)) {
557     (*size)--;
558   }
559   return true;
560 }
561 
562 // Advance in the given buffer, skipping the full length of a pointer encoded
563 // with the given encoding.
skip_eh_frame_pointer(char ** data,size_t * size,char encoding)564 static bool skip_eh_frame_pointer(char** data, size_t* size, char encoding) {
565   switch (encoding_data_size(encoding)) {
566     case DW_EH_PE_data2:
567       return advance_buffer(data, size, 2);
568     case DW_EH_PE_data4:
569       return advance_buffer(data, size, 4);
570     case DW_EH_PE_data8:
571       return advance_buffer(data, size, 8);
572     case DW_EH_PE_LEB128:
573       return skip_LEB128(data, size);
574   }
575   throw std::runtime_error("unreachable");
576 }
577 
578 // Specialized implementations for adjust_eh_frame_pointer().
579 template <typename T>
adjust_eh_frame_sized_pointer(char ** data,size_t * size,ElfSection * eh_frame,unsigned int origAddr,Elf * elf)580 static bool adjust_eh_frame_sized_pointer(char** data, size_t* size,
581                                           ElfSection* eh_frame,
582                                           unsigned int origAddr, Elf* elf) {
583   if (*size < sizeof(T)) return false;
584 
585   serializable<FixedSizeData<T>> pointer(*data, *size, elf->getClass(),
586                                          elf->getData());
587   mozilla::CheckedInt<T> value = pointer.value;
588   if (origAddr < eh_frame->getAddr()) {
589     unsigned int diff = eh_frame->getAddr() - origAddr;
590     value -= diff;
591   } else {
592     unsigned int diff = origAddr - eh_frame->getAddr();
593     value += diff;
594   }
595   if (!value.isValid())
596     throw std::runtime_error("Overflow while adjusting eh_frame");
597   pointer.value = value.value();
598   pointer.serialize(*data, *size, elf->getClass(), elf->getData());
599   return advance_buffer(data, size, sizeof(T));
600 }
601 
602 // In the given eh_frame section, adjust the pointer with the given encoding,
603 // pointed to by the given buffer (`data`, `size`), considering the eh_frame
604 // section was originally at `origAddr`. Also advances in the buffer.
adjust_eh_frame_pointer(char ** data,size_t * size,char encoding,ElfSection * eh_frame,unsigned int origAddr,Elf * elf)605 static bool adjust_eh_frame_pointer(char** data, size_t* size, char encoding,
606                                     ElfSection* eh_frame, unsigned int origAddr,
607                                     Elf* elf) {
608   if ((encoding & 0x70) != DW_EH_PE_pcrel)
609     return skip_eh_frame_pointer(data, size, encoding);
610 
611   if (encoding & DW_EH_PE_signed) {
612     switch (encoding_data_size(encoding)) {
613       case DW_EH_PE_data2:
614         return adjust_eh_frame_sized_pointer<int16_t>(data, size, eh_frame,
615                                                       origAddr, elf);
616       case DW_EH_PE_data4:
617         return adjust_eh_frame_sized_pointer<int32_t>(data, size, eh_frame,
618                                                       origAddr, elf);
619       case DW_EH_PE_data8:
620         return adjust_eh_frame_sized_pointer<int64_t>(data, size, eh_frame,
621                                                       origAddr, elf);
622     }
623   } else {
624     switch (encoding_data_size(encoding)) {
625       case DW_EH_PE_data2:
626         return adjust_eh_frame_sized_pointer<uint16_t>(data, size, eh_frame,
627                                                        origAddr, elf);
628       case DW_EH_PE_data4:
629         return adjust_eh_frame_sized_pointer<uint32_t>(data, size, eh_frame,
630                                                        origAddr, elf);
631       case DW_EH_PE_data8:
632         return adjust_eh_frame_sized_pointer<uint64_t>(data, size, eh_frame,
633                                                        origAddr, elf);
634     }
635   }
636 
637   throw std::runtime_error("Unsupported eh_frame pointer encoding");
638 }
639 
640 // The eh_frame section may contain "PC"-relative pointers. If we move the
641 // section, those need to be adjusted. Other type of pointers are relative to
642 // sections we don't touch.
adjust_eh_frame(ElfSection * eh_frame,unsigned int origAddr,Elf * elf)643 static void adjust_eh_frame(ElfSection* eh_frame, unsigned int origAddr,
644                             Elf* elf) {
645   if (eh_frame->getAddr() == origAddr)  // nothing to do;
646     return;
647 
648   char* data = const_cast<char*>(eh_frame->getData());
649   size_t size = eh_frame->getSize();
650   char LSDAencoding = DW_EH_PE_omit;
651   char FDEencoding = DW_EH_PE_absptr;
652   bool hasZ = false;
653 
654   // Decoding of eh_frame based on https://www.airs.com/blog/archives/460
655   while (size) {
656     if (size < sizeof(uint32_t)) goto malformed;
657 
658     serializable<FixedSizeData<uint32_t>> entryLength(
659         data, size, elf->getClass(), elf->getData());
660     if (!advance_buffer(&data, &size, sizeof(uint32_t))) goto malformed;
661 
662     char* cursor = data;
663     size_t length = entryLength.value;
664 
665     if (length == 0) {
666       continue;
667     }
668 
669     if (size < sizeof(uint32_t)) goto malformed;
670 
671     serializable<FixedSizeData<uint32_t>> id(data, size, elf->getClass(),
672                                              elf->getData());
673     if (!advance_buffer(&cursor, &length, sizeof(uint32_t))) goto malformed;
674 
675     if (id.value == 0) {
676       // This is a Common Information Entry
677       if (length < 2) goto malformed;
678       // Reset LSDA and FDE encodings, and hasZ for subsequent FDEs.
679       LSDAencoding = DW_EH_PE_omit;
680       FDEencoding = DW_EH_PE_absptr;
681       hasZ = false;
682       // CIE version. Should only be 1 or 3.
683       char version = *cursor++;
684       length--;
685       if (version != 1 && version != 3) {
686         throw std::runtime_error("Unsupported eh_frame version");
687       }
688       // NUL terminated string.
689       const char* augmentationString = cursor;
690       size_t l = strnlen(augmentationString, length - 1);
691       if (l == length - 1) goto malformed;
692       if (!advance_buffer(&cursor, &length, l + 1)) goto malformed;
693       // Skip code alignment factor (LEB128)
694       if (!skip_LEB128(&cursor, &length)) goto malformed;
695       // Skip data alignment factor (LEB128)
696       if (!skip_LEB128(&cursor, &length)) goto malformed;
697       // Skip return address register (single byte in CIE version 1, LEB128
698       // in CIE version 3)
699       if (version == 1) {
700         if (!advance_buffer(&cursor, &length, 1)) goto malformed;
701       } else {
702         if (!skip_LEB128(&cursor, &length)) goto malformed;
703       }
704       // Past this, it's data driven by the contents of the augmentation string.
705       for (size_t i = 0; i < l; i++) {
706         if (!length) goto malformed;
707         switch (augmentationString[i]) {
708           case 'z':
709             if (!skip_LEB128(&cursor, &length)) goto malformed;
710             hasZ = true;
711             break;
712           case 'L':
713             LSDAencoding = *cursor++;
714             length--;
715             break;
716           case 'R':
717             FDEencoding = *cursor++;
718             length--;
719             break;
720           case 'P': {
721             char encoding = *cursor++;
722             length--;
723             if (!adjust_eh_frame_pointer(&cursor, &length, encoding, eh_frame,
724                                          origAddr, elf))
725               goto malformed;
726           } break;
727           default:
728             goto malformed;
729         }
730       }
731     } else {
732       // This is a Frame Description Entry
733       // Starting address
734       if (!adjust_eh_frame_pointer(&cursor, &length, FDEencoding, eh_frame,
735                                    origAddr, elf))
736         goto malformed;
737 
738       if (LSDAencoding != DW_EH_PE_omit) {
739         // Skip number of bytes, same size as the starting address.
740         if (!skip_eh_frame_pointer(&cursor, &length, FDEencoding))
741           goto malformed;
742         if (hasZ) {
743           if (!skip_LEB128(&cursor, &length)) goto malformed;
744         }
745         // pointer to the LSDA.
746         if (!adjust_eh_frame_pointer(&cursor, &length, LSDAencoding, eh_frame,
747                                      origAddr, elf))
748           goto malformed;
749       }
750     }
751 
752     data += entryLength.value;
753     size -= entryLength.value;
754   }
755   return;
756 
757 malformed:
758   throw std::runtime_error("malformed .eh_frame");
759 }
760 
761 template <typename Rel_Type>
do_relocation_section(Elf * elf,unsigned int rel_type,unsigned int rel_type2,bool force)762 int do_relocation_section(Elf* elf, unsigned int rel_type,
763                           unsigned int rel_type2, bool force) {
764   ElfDynamic_Section* dyn = elf->getDynSection();
765   if (dyn == nullptr) {
766     fprintf(stderr, "Couldn't find SHT_DYNAMIC section\n");
767     return -1;
768   }
769 
770   ElfRel_Section<Rel_Type>* section =
771       (ElfRel_Section<Rel_Type>*)dyn->getSectionForType(Rel_Type::d_tag);
772   if (section == nullptr) {
773     fprintf(stderr, "No relocations\n");
774     return -1;
775   }
776   assert(section->getType() == Rel_Type::sh_type);
777 
778   Elf32_Shdr relhack32_section = {
779       0,
780       SHT_PROGBITS,
781       SHF_ALLOC,
782       0,
783       (Elf32_Off)-1,
784       0,
785       SHN_UNDEF,
786       0,
787       Elf_RelHack::size(elf->getClass()),
788       Elf_RelHack::size(elf->getClass())};  // TODO: sh_addralign should be an
789                                             // alignment, not size
790   Elf32_Shdr relhackcode32_section = {0,
791                                       SHT_PROGBITS,
792                                       SHF_ALLOC | SHF_EXECINSTR,
793                                       0,
794                                       (Elf32_Off)-1,
795                                       0,
796                                       SHN_UNDEF,
797                                       0,
798                                       1,
799                                       0};
800 
801   unsigned int entry_sz = Elf_Addr::size(elf->getClass());
802 
803   // The injected code needs to be executed before any init code in the
804   // binary. There are three possible cases:
805   // - The binary has no init code at all. In this case, we will add a
806   //   DT_INIT entry pointing to the injected code.
807   // - The binary has a DT_INIT entry. In this case, we will interpose:
808   //   we change DT_INIT to point to the injected code, and have the
809   //   injected code call the original DT_INIT entry point.
810   // - The binary has no DT_INIT entry, but has a DT_INIT_ARRAY. In this
811   //   case, we interpose as well, by replacing the first entry in the
812   //   array to point to the injected code, and have the injected code
813   //   call the original first entry.
814   // The binary may have .ctors instead of DT_INIT_ARRAY, for its init
815   // functions, but this falls into the second case above, since .ctors
816   // are actually run by DT_INIT code.
817   ElfValue* value = dyn->getValueForType(DT_INIT);
818   unsigned int original_init = value ? value->getValue() : 0;
819   ElfSection* init_array = nullptr;
820   if (!value || !value->getValue()) {
821     value = dyn->getValueForType(DT_INIT_ARRAYSZ);
822     if (value && value->getValue() >= entry_sz)
823       init_array = dyn->getSectionForType(DT_INIT_ARRAY);
824   }
825 
826   Elf_Shdr relhack_section(relhack32_section);
827   Elf_Shdr relhackcode_section(relhackcode32_section);
828   ElfRelHack_Section* relhack = new ElfRelHack_Section(relhack_section);
829 
830   ElfSymtab_Section* symtab = (ElfSymtab_Section*)section->getLink();
831   Elf_SymValue* sym = symtab->lookup("__cxa_pure_virtual");
832 
833   std::vector<Rel_Type> new_rels;
834   Elf_RelHack relhack_entry;
835   relhack_entry.r_offset = relhack_entry.r_info = 0;
836   std::vector<Rel_Type> init_array_relocs;
837   size_t init_array_insert = 0;
838   for (typename std::vector<Rel_Type>::iterator i = section->rels.begin();
839        i != section->rels.end(); ++i) {
840     // We don't need to keep R_*_NONE relocations
841     if (!ELF32_R_TYPE(i->r_info)) continue;
842     ElfLocation loc(i->r_offset, elf);
843     // __cxa_pure_virtual is a function used in vtables to point at pure
844     // virtual methods. The __cxa_pure_virtual function usually abort()s.
845     // These functions are however normally never called. In the case
846     // where they would, jumping to the null address instead of calling
847     // __cxa_pure_virtual is going to work just as well. So we can remove
848     // relocations for the __cxa_pure_virtual symbol and null out the
849     // content at the offset pointed by the relocation.
850     if (sym) {
851       if (sym->defined) {
852         // If we are statically linked to libstdc++, the
853         // __cxa_pure_virtual symbol is defined in our lib, and we
854         // have relative relocations (rel_type) for it.
855         if (ELF32_R_TYPE(i->r_info) == rel_type) {
856           Elf_Addr addr(loc.getBuffer(), entry_sz, elf->getClass(),
857                         elf->getData());
858           if (addr.value == sym->value.getValue()) {
859             memset((char*)loc.getBuffer(), 0, entry_sz);
860             continue;
861           }
862         }
863       } else {
864         // If we are dynamically linked to libstdc++, the
865         // __cxa_pure_virtual symbol is undefined in our lib, and we
866         // have absolute relocations (rel_type2) for it.
867         if ((ELF32_R_TYPE(i->r_info) == rel_type2) &&
868             (sym == &symtab->syms[ELF32_R_SYM(i->r_info)])) {
869           memset((char*)loc.getBuffer(), 0, entry_sz);
870           continue;
871         }
872       }
873     }
874     // Keep track of the relocations associated with the init_array section.
875     if (init_array && i->r_offset >= init_array->getAddr() &&
876         i->r_offset < init_array->getAddr() + init_array->getSize()) {
877       init_array_relocs.push_back(*i);
878       init_array_insert = new_rels.size();
879     } else if (!(loc.getSection()->getFlags() & SHF_WRITE) ||
880                (ELF32_R_TYPE(i->r_info) != rel_type)) {
881       // Don't pack relocations happening in non writable sections.
882       // Our injected code is likely not to be allowed to write there.
883       new_rels.push_back(*i);
884     } else {
885       // With Elf_Rel, the value pointed by the relocation offset is the addend.
886       // With Elf_Rela, the addend is in the relocation entry, but the elfhacked
887       // relocation info doesn't contain it. Elfhack relies on the value pointed
888       // by the relocation offset to also contain the addend. Which is true with
889       // BFD ld and gold, but not lld, which leaves that nulled out. So if that
890       // value is nulled out, we update it to the addend.
891       Elf_Addr addr(loc.getBuffer(), entry_sz, elf->getClass(), elf->getData());
892       unsigned int addend = get_addend(&*i, elf);
893       if (addr.value == 0) {
894         addr.value = addend;
895         addr.serialize(const_cast<char*>(loc.getBuffer()), entry_sz,
896                        elf->getClass(), elf->getData());
897       } else if (addr.value != addend) {
898         fprintf(stderr,
899                 "Relocation addend inconsistent with content. Skipping\n");
900         return -1;
901       }
902       if (i->r_offset ==
903           relhack_entry.r_offset + relhack_entry.r_info * entry_sz) {
904         relhack_entry.r_info++;
905       } else {
906         if (relhack_entry.r_offset) relhack->push_back(relhack_entry);
907         relhack_entry.r_offset = i->r_offset;
908         relhack_entry.r_info = 1;
909       }
910     }
911   }
912   if (relhack_entry.r_offset) relhack->push_back(relhack_entry);
913   // Last entry must be nullptr
914   relhack_entry.r_offset = relhack_entry.r_info = 0;
915   relhack->push_back(relhack_entry);
916 
917   if (init_array) {
918     // Some linkers create a DT_INIT_ARRAY section that, for all purposes,
919     // is empty: it only contains 0x0 or 0xffffffff pointers with no
920     // relocations. In some other cases, there can be null pointers with no
921     // relocations in the middle of the section. Example: crtend_so.o in the
922     // Android NDK contains a sized .init_array with a null pointer and no
923     // relocation, which ends up in all Android libraries, and in some cases it
924     // ends up in the middle of the final .init_array section. If we have such a
925     // reusable slot at the beginning of .init_array, we just use it. It we have
926     // one in the middle of .init_array, we slide its content to move the "hole"
927     // at the beginning and use it there (we need our injected code to run
928     // before any other). Otherwise, replace the first entry and keep the
929     // original pointer.
930     std::sort(init_array_relocs.begin(), init_array_relocs.end(),
931               [](Rel_Type& a, Rel_Type& b) { return a.r_offset < b.r_offset; });
932     size_t expected = init_array->getAddr();
933     const size_t zero = 0;
934     const size_t all = SIZE_MAX;
935     const char* data = init_array->getData();
936     size_t length = Elf_Addr::size(elf->getClass());
937     size_t off = 0;
938     for (; off < init_array_relocs.size(); off++) {
939       auto& r = init_array_relocs[off];
940       if (r.r_offset >= expected + length &&
941           (memcmp(data + off * length, &zero, length) == 0 ||
942            memcmp(data + off * length, &all, length) == 0)) {
943         // We found a hole, move the preceding entries.
944         while (off) {
945           auto& p = init_array_relocs[--off];
946           if (ELF32_R_TYPE(p.r_info) == rel_type) {
947             unsigned int addend = get_addend(&p, elf);
948             p.r_offset += length;
949             set_relative_reloc(&p, elf, addend);
950           } else {
951             fprintf(stderr,
952                     "Unsupported relocation type in DT_INIT_ARRAY. Skipping\n");
953             return -1;
954           }
955         }
956         break;
957       }
958       expected = r.r_offset + length;
959     }
960 
961     if (off == 0) {
962       // We either found a hole above, and can now use the first entry,
963       // or the init_array section is effectively empty (see further above)
964       // and we also can use the first entry.
965       // Either way, code further below will take care of actually setting
966       // the right r_info and r_added for the relocation.
967       Rel_Type rel;
968       rel.r_offset = init_array->getAddr();
969       init_array_relocs.insert(init_array_relocs.begin(), rel);
970     } else {
971       // Use relocated value of DT_INIT_ARRAY's first entry for the
972       // function to be called by the injected code.
973       auto& rel = init_array_relocs[0];
974       unsigned int addend = get_addend(&rel, elf);
975       if (ELF32_R_TYPE(rel.r_info) == rel_type) {
976         original_init = addend;
977       } else if (ELF32_R_TYPE(rel.r_info) == rel_type2) {
978         ElfSymtab_Section* symtab = (ElfSymtab_Section*)section->getLink();
979         original_init =
980             symtab->syms[ELF32_R_SYM(rel.r_info)].value.getValue() + addend;
981       } else {
982         fprintf(stderr,
983                 "Unsupported relocation type for DT_INIT_ARRAY's first entry. "
984                 "Skipping\n");
985         return -1;
986       }
987     }
988 
989     new_rels.insert(std::next(new_rels.begin(), init_array_insert),
990                     init_array_relocs.begin(), init_array_relocs.end());
991   }
992 
993   unsigned int mprotect_cb = 0;
994   unsigned int sysconf_cb = 0;
995   // If there is a relro segment, our injected code will run after the linker
996   // sets the corresponding pages read-only. We need to make our code change
997   // that to read-write before applying relocations, which means it needs to
998   // call mprotect. To do that, we need to find a reference to the mprotect
999   // symbol. In case the library already has one, we use that, but otherwise, we
1000   // add the symbol. Then the injected code needs to be able to call the
1001   // corresponding function, which means it needs access to a pointer to it. We
1002   // get such a pointer by making the linker apply a relocation for the symbol
1003   // at an address our code can read. The problem here is that there is not much
1004   // relocated space where we can put such a pointer, so we abuse the bss
1005   // section temporarily (it will be restored to a null value before any code
1006   // can actually use it)
1007   if (elf->getSegmentByType(PT_GNU_RELRO)) {
1008     ElfSection* gnu_versym = dyn->getSectionForType(DT_VERSYM);
1009     auto lookup = [&symtab, &gnu_versym](const char* symbol) {
1010       Elf_SymValue* sym_value = symtab->lookup(symbol, STT(FUNC));
1011       if (!sym_value) {
1012         symtab->syms.emplace_back();
1013         sym_value = &symtab->syms.back();
1014         symtab->grow(symtab->syms.size() * symtab->getEntSize());
1015         sym_value->name =
1016             ((ElfStrtab_Section*)symtab->getLink())->getStr(symbol);
1017         sym_value->info = ELF32_ST_INFO(STB_GLOBAL, STT_FUNC);
1018         sym_value->other = STV_DEFAULT;
1019         new (&sym_value->value) ElfLocation(nullptr, 0, ElfLocation::ABSOLUTE);
1020         sym_value->size = 0;
1021         sym_value->defined = false;
1022 
1023         // The DT_VERSYM data (in the .gnu.version section) has the same number
1024         // of entries as the symbols table. Since we added one entry there, we
1025         // need to add one entry here. Zeroes in the extra data means no version
1026         // for that symbol, which is the simplest thing to do.
1027         if (gnu_versym) {
1028           gnu_versym->grow(gnu_versym->getSize() + gnu_versym->getEntSize());
1029         }
1030       }
1031       return sym_value;
1032     };
1033 
1034     Elf_SymValue* mprotect = lookup("mprotect");
1035     Elf_SymValue* sysconf = lookup("sysconf");
1036 
1037     // Add relocations for the mprotect and sysconf symbols.
1038     auto add_relocation_to = [&new_rels, &symtab, rel_type2](
1039                                  Elf_SymValue* symbol, unsigned int location) {
1040       new_rels.emplace_back();
1041       Rel_Type& rel = new_rels.back();
1042       memset(&rel, 0, sizeof(rel));
1043       rel.r_info = ELF32_R_INFO(
1044           std::distance(symtab->syms.begin(),
1045                         std::vector<Elf_SymValue>::iterator(symbol)),
1046           rel_type2);
1047       rel.r_offset = location;
1048       return location;
1049     };
1050 
1051     // Find the beginning of the bss section, and use an aligned location in
1052     // there for the relocation.
1053     for (ElfSection* s = elf->getSection(1); s != nullptr; s = s->getNext()) {
1054       if (s->getType() != SHT_NOBITS ||
1055           (s->getFlags() & (SHF_TLS | SHF_WRITE)) != SHF_WRITE) {
1056         continue;
1057       }
1058       size_t ptr_size = Elf_Addr::size(elf->getClass());
1059       size_t usable_start = (s->getAddr() + ptr_size - 1) & ~(ptr_size - 1);
1060       size_t usable_end = (s->getAddr() + s->getSize()) & ~(ptr_size - 1);
1061       if (usable_end - usable_start >= 2 * ptr_size) {
1062         mprotect_cb = add_relocation_to(mprotect, usable_start);
1063         sysconf_cb = add_relocation_to(sysconf, usable_start + ptr_size);
1064         break;
1065       }
1066     }
1067 
1068     if (mprotect_cb == 0 || sysconf_cb == 0) {
1069       fprintf(stderr, "Couldn't find .bss. Skipping\n");
1070       return -1;
1071     }
1072   }
1073 
1074   size_t old_size = section->getSize();
1075 
1076   section->rels.assign(new_rels.begin(), new_rels.end());
1077   section->shrink(new_rels.size() * section->getEntSize());
1078 
1079   ElfRelHackCode_Section* relhackcode =
1080       new ElfRelHackCode_Section(relhackcode_section, *elf, *relhack,
1081                                  original_init, mprotect_cb, sysconf_cb);
1082   // Find the first executable section, and insert the relhack code before
1083   // that. The relhack data is inserted between .rel.dyn and .rel.plt.
1084   ElfSection* first_executable = nullptr;
1085   for (ElfSection* s = elf->getSection(1); s != nullptr; s = s->getNext()) {
1086     if (s->getFlags() & SHF_EXECINSTR) {
1087       first_executable = s;
1088       break;
1089     }
1090   }
1091 
1092   if (!first_executable) {
1093     fprintf(stderr, "Couldn't find executable section. Skipping\n");
1094     return -1;
1095   }
1096 
1097   relhack->insertBefore(section);
1098   relhackcode->insertBefore(first_executable);
1099 
1100   // Don't try further if we can't gain from the relocation section size change.
1101   // We account for the fact we're going to split the PT_LOAD before the
1102   // injected code section, so the overhead of the page alignment for section
1103   // needs to be accounted for.
1104   size_t align = first_executable->getSegmentByType(PT_LOAD)->getAlign();
1105   size_t new_size = relhack->getSize() + section->getSize() +
1106                     relhackcode->getSize() +
1107                     (relhackcode->getAddr() & (align - 1));
1108   if (!force && (new_size >= old_size || old_size - new_size < align)) {
1109     fprintf(stderr, "No gain. Skipping\n");
1110     return -1;
1111   }
1112 
1113   // .eh_frame/.eh_frame_hdr may be between the relocation sections and the
1114   // executable sections. When that happens, we may end up creating a separate
1115   // PT_LOAD for just both of them because they are not considered relocatable.
1116   // But they are, in fact, kind of relocatable, albeit with some manual work.
1117   // Which we'll do here.
1118   ElfSegment* eh_frame_segment = elf->getSegmentByType(PT_GNU_EH_FRAME);
1119   ElfSection* eh_frame_hdr =
1120       eh_frame_segment ? eh_frame_segment->getFirstSection() : nullptr;
1121   // The .eh_frame section usually follows the eh_frame_hdr section.
1122   ElfSection* eh_frame = eh_frame_hdr ? eh_frame_hdr->getNext() : nullptr;
1123   ElfSection* first = eh_frame_hdr;
1124   ElfSection* second = eh_frame;
1125   if (eh_frame && strcmp(eh_frame->getName(), ".eh_frame")) {
1126     // But sometimes it appears *before* the eh_frame_hdr section.
1127     eh_frame = eh_frame_hdr->getPrevious();
1128     first = eh_frame;
1129     second = eh_frame_hdr;
1130   }
1131   if (eh_frame_hdr && (!eh_frame || strcmp(eh_frame->getName(), ".eh_frame"))) {
1132     throw std::runtime_error(
1133         "Expected to find an .eh_frame section adjacent to .eh_frame_hdr");
1134   }
1135   if (eh_frame && first->getAddr() > relhack->getAddr() &&
1136       second->getAddr() < first_executable->getAddr()) {
1137     // The distance between both sections needs to be preserved because
1138     // eh_frame_hdr contains relative offsets to eh_frame. Well, they could be
1139     // relocated too, but it's not worth the effort for the few number of bytes
1140     // this would save.
1141     unsigned int distance = second->getAddr() - first->getAddr();
1142     unsigned int origAddr = eh_frame->getAddr();
1143     ElfSection* previous = first->getPrevious();
1144     first->getShdr().sh_addr = (previous->getAddr() + previous->getSize() +
1145                                 first->getAddrAlign() - 1) &
1146                                ~(first->getAddrAlign() - 1);
1147     second->getShdr().sh_addr =
1148         (first->getAddr() + std::min(first->getSize(), distance) +
1149          second->getAddrAlign() - 1) &
1150         ~(second->getAddrAlign() - 1);
1151     // Re-adjust to keep the original distance.
1152     // If the first section has a smaller alignment requirement than the second,
1153     // the second will be farther away, so we need to adjust the first.
1154     // If the second section has a smaller alignment requirement than the first,
1155     // it will already be at the right distance.
1156     first->getShdr().sh_addr = second->getAddr() - distance;
1157     assert(distance == second->getAddr() - first->getAddr());
1158     first->markDirty();
1159     adjust_eh_frame(eh_frame, origAddr, elf);
1160   }
1161 
1162   // Adjust PT_LOAD segments
1163   for (ElfSegment* segment = elf->getSegmentByType(PT_LOAD); segment;
1164        segment = elf->getSegmentByType(PT_LOAD, segment)) {
1165     maybe_split_segment(elf, segment);
1166   }
1167 
1168   // Ensure Elf sections will be at their final location.
1169   elf->normalize();
1170   ElfLocation* init =
1171       new ElfLocation(relhackcode, relhackcode->getEntryPoint());
1172   if (init_array) {
1173     // Adjust the first DT_INIT_ARRAY entry to point at the injected code
1174     // by transforming its relocation into a relative one pointing to the
1175     // address of the injected code.
1176     Rel_Type* rel = &section->rels[init_array_insert];
1177     rel->r_info = ELF32_R_INFO(0, rel_type);  // Set as a relative relocation
1178     set_relative_reloc(rel, elf, init->getValue());
1179   } else if (!dyn->setValueForType(DT_INIT, init)) {
1180     fprintf(stderr, "Can't grow .dynamic section to set DT_INIT. Skipping\n");
1181     return -1;
1182   }
1183   // TODO: adjust the value according to the remaining number of relative
1184   // relocations
1185   if (dyn->getValueForType(Rel_Type::d_tag_count))
1186     dyn->setValueForType(Rel_Type::d_tag_count, new ElfPlainValue(0));
1187 
1188   return 0;
1189 }
1190 
backup_file(const char * name)1191 static inline int backup_file(const char* name) {
1192   std::string fname(name);
1193   fname += ".bak";
1194   return rename(name, fname.c_str());
1195 }
1196 
do_file(const char * name,bool backup=false,bool force=false)1197 void do_file(const char* name, bool backup = false, bool force = false) {
1198   std::ifstream file(name, std::ios::in | std::ios::binary);
1199   Elf elf(file);
1200   unsigned int size = elf.getSize();
1201   fprintf(stderr, "%s: ", name);
1202   if (elf.getType() != ET_DYN) {
1203     fprintf(stderr, "Not a shared object. Skipping\n");
1204     return;
1205   }
1206 
1207   for (ElfSection* section = elf.getSection(1); section != nullptr;
1208        section = section->getNext()) {
1209     if (section->getName() &&
1210         (strncmp(section->getName(), ".elfhack.", 9) == 0)) {
1211       fprintf(stderr, "Already elfhacked. Skipping\n");
1212       return;
1213     }
1214   }
1215 
1216   int exit = -1;
1217   switch (elf.getMachine()) {
1218     case EM_386:
1219       exit =
1220           do_relocation_section<Elf_Rel>(&elf, R_386_RELATIVE, R_386_32, force);
1221       break;
1222     case EM_X86_64:
1223       exit = do_relocation_section<Elf_Rela>(&elf, R_X86_64_RELATIVE,
1224                                              R_X86_64_64, force);
1225       break;
1226     case EM_ARM:
1227       exit = do_relocation_section<Elf_Rel>(&elf, R_ARM_RELATIVE, R_ARM_ABS32,
1228                                             force);
1229       break;
1230   }
1231   if (exit == 0) {
1232     if (!force && (elf.getSize() >= size)) {
1233       fprintf(stderr, "No gain. Skipping\n");
1234     } else if (backup && backup_file(name) != 0) {
1235       fprintf(stderr, "Couln't create backup file\n");
1236     } else {
1237       std::ofstream ofile(name,
1238                           std::ios::out | std::ios::binary | std::ios::trunc);
1239       elf.write(ofile);
1240       fprintf(stderr, "Reduced by %d bytes\n", size - elf.getSize());
1241     }
1242   }
1243 }
1244 
undo_file(const char * name,bool backup=false)1245 void undo_file(const char* name, bool backup = false) {
1246   std::ifstream file(name, std::ios::in | std::ios::binary);
1247   Elf elf(file);
1248   unsigned int size = elf.getSize();
1249   fprintf(stderr, "%s: ", name);
1250   if (elf.getType() != ET_DYN) {
1251     fprintf(stderr, "Not a shared object. Skipping\n");
1252     return;
1253   }
1254 
1255   ElfSection *data = nullptr, *text = nullptr;
1256   for (ElfSection* section = elf.getSection(1); section != nullptr;
1257        section = section->getNext()) {
1258     if (section->getName() && (strcmp(section->getName(), elfhack_data) == 0))
1259       data = section;
1260     if (section->getName() && (strcmp(section->getName(), elfhack_text) == 0))
1261       text = section;
1262   }
1263 
1264   if (!data || !text) {
1265     fprintf(stderr, "Not elfhacked. Skipping\n");
1266     return;
1267   }
1268 
1269   // When both elfhack sections are in the same segment, try to merge
1270   // the segment that contains them both and the following segment.
1271   // When the elfhack sections are in separate segments, try to merge
1272   // those segments.
1273   ElfSegment* first = data->getSegmentByType(PT_LOAD);
1274   ElfSegment* second = text->getSegmentByType(PT_LOAD);
1275   if (first == second) {
1276     second = elf.getSegmentByType(PT_LOAD, first);
1277   }
1278 
1279   // Only merge the segments when their flags match.
1280   if (second->getFlags() != first->getFlags()) {
1281     fprintf(stderr, "Couldn't merge PT_LOAD segments. Skipping\n");
1282     return;
1283   }
1284   // Move sections from the second PT_LOAD to the first, and remove the
1285   // second PT_LOAD segment.
1286   for (std::list<ElfSection*>::iterator section = second->begin();
1287        section != second->end(); ++section)
1288     first->addSection(*section);
1289 
1290   elf.removeSegment(second);
1291   elf.normalize();
1292 
1293   if (backup && backup_file(name) != 0) {
1294     fprintf(stderr, "Couln't create backup file\n");
1295   } else {
1296     std::ofstream ofile(name,
1297                         std::ios::out | std::ios::binary | std::ios::trunc);
1298     elf.write(ofile);
1299     fprintf(stderr, "Grown by %d bytes\n", elf.getSize() - size);
1300   }
1301 }
1302 
main(int argc,char * argv[])1303 int main(int argc, char* argv[]) {
1304   int arg;
1305   bool backup = false;
1306   bool force = false;
1307   bool revert = false;
1308   char* lastSlash = rindex(argv[0], '/');
1309   if (lastSlash != nullptr) rundir = strndup(argv[0], lastSlash - argv[0]);
1310   for (arg = 1; arg < argc; arg++) {
1311     if (strcmp(argv[arg], "-f") == 0)
1312       force = true;
1313     else if (strcmp(argv[arg], "-b") == 0)
1314       backup = true;
1315     else if (strcmp(argv[arg], "-r") == 0)
1316       revert = true;
1317     else if (revert) {
1318       undo_file(argv[arg], backup);
1319     } else
1320       do_file(argv[arg], backup, force);
1321   }
1322 
1323   free(rundir);
1324   return 0;
1325 }
1326