1#!/usr/bin/env python
2# Copyright (c) 2011 The Native Client Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6"""This takes two command-line arguments:
7      INFILE                  raw linked ELF file name
8      OUTFILE                 output file name
9
10It makes a copy of INFILE, and changes the ELF PHDR in place in the copy.
11Then it moves the copy to OUTFILE.
12
13nacl_helper_bootstrap's large (~1G) bss segment could cause the kernel
14to refuse to load the program because it didn't think there was enough
15free memory in the system for so large an allocation of anonymous memory
16
17To avoid the second problem, the bootstrap program no longer has a large
18bss.  Instead, it has a special ELF segment (i.e. PT_LOAD header) that
19specifies no memory access, and a large (~1G) mapping size from the file.
20This mapping is way off the end of the file, but the kernel doesn't mind
21that, and since it's all a file mapping, the kernel does not do its normal
22memory accounting for consuming a large amount of anonymous memory.
23
24Unfortunately, it's impossible to get the linker to produce exactly the
25right PT_LOAD header by itself.  Using a custom linker script, we get the
26layout exactly how we want it and a PT_LOAD header that is almost right.
27We then use a build-time helper program to munge one field of the PT_LOAD
28to make it exactly what we need.
29"""
30
31import argparse
32import ctypes
33import mmap
34import shutil
35import sys
36
37
38class Error(Exception):
39  pass
40
41
42class ElfFormatError(Error):
43
44  def __init__(self, message, offset=None):
45    if offset is not None:
46      message += ' (offset=%d)' % (offset,)
47    super(ElfFormatError, self).__init__(message)
48
49
50class ElfStructMixIn:
51
52  def GetBytes(self):
53    return buffer(self)[:]
54
55  @classmethod
56  def FromString(cls, bytes):
57    inst = cls()
58    assert len(bytes) >= ctypes.sizeof(inst)
59    ctypes.memmove(ctypes.addressof(inst), bytes, ctypes.sizeof(inst))
60    return inst
61
62
63class EHDRIdent(ctypes.Structure, ElfStructMixIn):
64  _fields_ = [
65      ('ei_magic', (ctypes.c_char * 4)),
66      ('ei_class', ctypes.c_byte),
67      ('ei_data', ctypes.c_byte),
68  ]
69
70
71EHDR_FIELDS = {
72  'name': 'EHDR',
73
74  32: [
75    ('e_ident', (ctypes.c_byte * 16)),
76    ('c_type', ctypes.c_uint16),
77    ('e_machine', ctypes.c_uint16),
78    ('e_version', ctypes.c_uint32),
79    ('e_entry', ctypes.c_uint32),
80    ('e_phoff', ctypes.c_uint32),
81    ('e_shoff', ctypes.c_uint32),
82    ('e_flags', ctypes.c_uint32),
83    ('e_ehsize', ctypes.c_uint16),
84    ('e_phentsize', ctypes.c_uint16),
85    ('e_phnum', ctypes.c_uint16),
86    ('e_shentsize', ctypes.c_uint16),
87    ('e_shnum', ctypes.c_uint16),
88    ('e_shstrndx', ctypes.c_uint16),
89  ],
90
91  64: [
92    ('e_ident', (ctypes.c_byte * 16)),
93    ('c_type', ctypes.c_uint16),
94    ('e_machine', ctypes.c_uint16),
95    ('e_version', ctypes.c_uint32),
96    ('e_entry', ctypes.c_uint64),
97    ('e_phoff', ctypes.c_uint64),
98    ('e_shoff', ctypes.c_uint64),
99    ('e_flags', ctypes.c_uint32),
100    ('e_ehsize', ctypes.c_uint16),
101    ('e_phentsize', ctypes.c_uint16),
102    ('e_phnum', ctypes.c_uint16),
103    ('e_shentsize', ctypes.c_uint16),
104    ('e_shnum', ctypes.c_uint16),
105    ('e_shstrndx', ctypes.c_uint16),
106  ],
107}
108
109PHDR_FIELDS = {
110  'name': 'PHDR',
111
112  32: [
113    ('p_type', ctypes.c_uint32),
114    ('p_offset', ctypes.c_uint32),
115    ('p_vaddr', ctypes.c_uint32),
116    ('p_paddr', ctypes.c_uint32),
117    ('p_filesz', ctypes.c_uint32),
118    ('p_memsz', ctypes.c_uint32),
119    ('p_flags', ctypes.c_uint32),
120    ('p_align', ctypes.c_uint32),
121  ],
122
123  64: [
124    ('p_type', ctypes.c_uint32),
125    ('p_flags', ctypes.c_uint32),
126    ('p_offset', ctypes.c_uint64),
127    ('p_vaddr', ctypes.c_uint64),
128    ('p_paddr', ctypes.c_uint64),
129    ('p_filesz', ctypes.c_uint64),
130    ('p_memsz', ctypes.c_uint64),
131    ('p_align', ctypes.c_uint64),
132  ],
133}
134
135
136def HexDump(value):
137  return ''.join('%02X' % (x,) for x in value)
138
139
140class Elf(object):
141
142  ELF_MAGIC = '\x7FELF'
143
144  PT_LOAD = 1
145
146  def __init__(self, elf_map, elf_class):
147    self._elf_map = elf_map
148    self._elf_class = elf_class
149
150  @classmethod
151  def LoadMap(cls, elf_map):
152    elf_ehdr = EHDRIdent.FromString(elf_map[0:])
153    if elf_ehdr.ei_magic != cls.ELF_MAGIC:
154      raise ElfFormatError('Missing ELF magic number (%s)' % (
155                           HexDump(elf_ehdr.ei_magic),), 0)
156
157    if elf_ehdr.ei_class == 1:
158      elf_class = 32
159    elif elf_ehdr.ei_class == 2:
160      elf_class = 64
161    else:
162      raise ElfFormatError('Unhandled ELF "ei_class" (%d)' %
163                           (elf_ehdr.ei_class,), 4)
164
165    if elf_ehdr.ei_data != 1:
166      raise ElfFormatError('Wrong endian "ei_data" (%d): expected 1 (little)' %
167                           (elf_ehdr.ei_data,), 5)
168
169    return cls(elf_map, elf_class)
170
171  def Structure(self, fields):
172    name = fields.get('name', 'AnonymousStructure')
173    name = '%s%sLITTLE' % (name, self._elf_class)
174
175    class Result(ctypes.LittleEndianStructure, ElfStructMixIn):
176      _pack_ = 1
177      _fields_ = fields[self._elf_class]
178    Result.__name__ = name
179    return Result
180
181  def GetEhdr(self):
182    return self.Structure(EHDR_FIELDS).FromString(self._elf_map[0:])
183
184  def _GetPhdrOffset(self, index):
185    ehdr = self.GetEhdr()
186    if index >= ehdr.e_phnum:
187      raise IndexError('Index out of bounds (e_phnum=%d)' % (ehdr.e_phnum,))
188    return ehdr.e_phoff + (ehdr.e_phentsize * index)
189
190  def GetPhdr(self, index):
191    return self.Structure(PHDR_FIELDS).FromString(
192        self._elf_map[self._GetPhdrOffset(index):])
193
194  def SetPhdr(self, index, phdr):
195    phdr_off = self._GetPhdrOffset(index)
196    self._elf_map[phdr_off:(phdr_off + ctypes.sizeof(phdr))] = phdr.GetBytes()
197
198
199def RunMain(args):
200  parser = argparse.ArgumentParser()
201  parser.add_argument('infile', metavar='PATH',
202      help='The ELF binary to be read')
203  parser.add_argument('outfile', metavar='PATH',
204      help='The munged ELF binary to be written')
205  parser.add_argument('-s', '--phdr_index', metavar='OFFSET', type=int,
206                      default=2,
207                      help='The zero-index program header to modify'
208                           '(default=%(default)s)')
209  args = parser.parse_args(args)
210
211  # Copy the input file to a temporary file, so that we can write it in place.
212  tmpfile = args.outfile + '.tmp'
213  shutil.copy(args.infile, tmpfile)
214  # Create the ELF map of the temporary file and edit the PHDR in place.
215  with open(tmpfile, 'r+b') as fd:
216    # Map the file.
217    elf_map = mmap.mmap(fd.fileno(), 0)
218    elf = Elf.LoadMap(elf_map)
219    phdr = elf.GetPhdr(args.phdr_index)
220    if phdr.p_type != Elf.PT_LOAD:
221      raise Error('Invalid segment number; not PT_LOAD (%d)' % phdr.p_type)
222    if phdr.p_filesz != 0:
223      raise Error("Program header %d has nonzero p_filesz" % args.phdr_index)
224    phdr.p_filesz = phdr.p_memsz
225    elf.SetPhdr(args.phdr_index, phdr)
226    elf_map.flush()
227  # Move the munged temporary file to the output location.
228  shutil.move(tmpfile, args.outfile)
229  return 0
230
231
232def main(args):
233  try:
234    return RunMain(args)
235  except Error as e:
236    sys.stderr.write('nacl_bootstrap_munge_phdr: ' + str(e) + '\n')
237    return 1
238
239
240if __name__ == '__main__':
241  sys.exit(main(sys.argv[1:]))
242