1#=========================================================================
2# elf
3#=========================================================================
4# A simple translator between ELF files and a sparse memory image object.
5# Note that the translator is far from complete but is sufficient for use
6# in our research processors. I found this document pretty useful for
7# understanding the ELF32 format:
8#
9#  http://docs.oracle.com/cd/E19457-01/801-6737/801-6737.pdf
10#
11# Note that this implementation is inspired by the ELF object file reader
12# here:
13#
14#  http://www.tinyos.net/tinyos-1.x/tools/src/mspgcc-pybsl/elf.py
15#
16# which includes this copyright:
17#
18#  (C) 2003 cliechti@gmx.net
19#  Python license
20#
21# Shunning: I ported it to Python3 today and it is a bit interesting due
22# to the recent bytes/str disambiguation in Python3. Basically, there
23# are several changes worth noting:
24# - Currently file_obj.read() will return _bytes_ instead of str, so if
25#   we want to use the variable as string, we need to perform
26#   bytes.decode().
27# - struct.unpack only works for string/bytearray (I guess?) so I had to
28#   use bytearray(bytes_obj) to avoid any changes to struct.unpack and the
29#   format string.
30# - Whenever we write the elf file, we have to convert the strings back
31#   to bytes. Just keep in mind that for string purposes we need to decode
32#   and encode back.
33#
34# Author : Christopher Batten, Shunning Jiang
35# Date   : Feb 26, 2020
36
37import struct
38
39from .SparseMemoryImage import SparseMemoryImage
40
41#-------------------------------------------------------------------------
42# ELF File Format Types
43#-------------------------------------------------------------------------
44# These are the sizes for various ELF32 data types used in describing
45# various structures below.
46#
47#            size alignment
48# elf_addr   4    4  Unsigned program address
49# elf_half   2    2  Unsigned medium integer
50# elf_off    4    4  Unsigned file offset
51# elf_sword  4    4  Signed   large integer
52# elf_word   4    4  Unsigned large integer
53# elf_byte   1    1  Unsigned small integer
54
55#=========================================================================
56# ElfHeader
57#=========================================================================
58# Class encapsulating an ELF32 header which implements the following
59# C-structure.
60#
61# define EI_NIDENT 16
62# typedef struct {
63#   elf_byte e_ident[EI_NIDENT];
64#   elf_half e_type;
65#   elf_half e_machine;
66#   elf_word e_version;
67#   elf_addr e_entry;
68#   elf_off  e_phoff;
69#   elf_off  e_shoff;
70#   elf_word e_flags;
71#   elf_half e_ehsize;
72#   elf_half e_phentsize;
73#   elf_half e_phnum;
74#   elf_half e_shentsize;
75#   elf_half e_shnum;
76#   elf_half e_shstrndx;
77# } elf_ehdr;
78
79class ElfHeader:
80
81  FORMAT = "<16sHHIIIIIHHHHHH"
82  NBYTES = struct.calcsize( FORMAT )
83
84  # Offsets within e_ident
85
86  IDENT_NBYTES      = 16     # Size of e_ident[]
87  IDENT_IDX_MAG0    = 0      # File identification
88  IDENT_IDX_MAG1    = 1      # File identification
89  IDENT_IDX_MAG2    = 2      # File identification
90  IDENT_IDX_MAG3    = 3      # File identification
91  IDENT_IDX_CLASS   = 4      # File class
92  IDENT_IDX_DATA    = 5      # Data encoding
93  IDENT_IDX_VERSION = 6      # File version
94  IDENT_IDX_PAD     = 7      # Start of padding bytes
95
96  # Elf file type flags
97
98  TYPE_NONE         = 0      # No file type
99  TYPE_REL          = 1      # Relocatable file
100  TYPE_EXEC         = 2      # Executable file
101  TYPE_DYN          = 3      # Shared object file
102  TYPE_CORE         = 4      # Core file
103  TYPE_LOPROC       = 0xff00 # Processor-specific
104  TYPE_HIPROC       = 0xffff # Processor-specific
105
106  #-----------------------------------------------------------------------
107  # Constructor
108  #-----------------------------------------------------------------------
109
110  def __init__( self, data=None ):
111    if data != None:
112      self.from_bytes( data )
113
114  #-----------------------------------------------------------------------
115  # from_bytes
116  #-----------------------------------------------------------------------
117
118  def from_bytes( self, data ):
119    ehdr_list = struct.unpack( ElfHeader.FORMAT, bytearray(data) )
120    self.ident     = ehdr_list[0].decode()
121    self.type      = ehdr_list[1]
122    self.machine   = ehdr_list[2]
123    self.version   = ehdr_list[3]
124    self.entry     = ehdr_list[4]
125    self.phoff     = ehdr_list[5]
126    self.shoff     = ehdr_list[6]
127    self.flags     = ehdr_list[7]
128    self.ehsize    = ehdr_list[8]
129    self.phentsize = ehdr_list[9]
130    self.phnum     = ehdr_list[10]
131    self.shentsize = ehdr_list[11]
132    self.shnum     = ehdr_list[12]
133    self.shstrndx  = ehdr_list[13]
134
135  #-----------------------------------------------------------------------
136  # to_bytes
137  #-----------------------------------------------------------------------
138
139  def to_bytes( self ):
140    return struct.pack( ElfHeader.FORMAT,
141      self.ident.encode(),
142      self.type,
143      self.machine,
144      self.version,
145      self.entry,
146      self.phoff,
147      self.shoff,
148      self.flags,
149      self.ehsize,
150      self.phentsize,
151      self.phnum,
152      self.shentsize,
153      self.shnum,
154      self.shstrndx,
155   )
156
157  #-----------------------------------------------------------------------
158  # __str__
159  #-----------------------------------------------------------------------
160
161  def __str__( self ):
162    return \
163"""
164 ElfHeader:
165   ident     = {},
166   type      = {},
167   machine   = {},
168   version   = {},
169   entry     = {},
170   phoff     = {},
171   shoff     = {},
172   flags     = {},
173   ehsize    = {},
174   phentsize = {},
175   phnum     = {},
176   shentsize = {},
177   shnum     = {},
178   shstrndx  = {}
179""".format(
180   self.ident,
181   self.type,
182   self.machine,
183   self.version,
184   hex(self.entry),
185   hex(self.phoff),
186   hex(self.shoff),
187   hex(self.flags),
188   self.ehsize,
189   self.phentsize,
190   self.phnum,
191   self.shentsize,
192   self.shnum,
193   self.shstrndx,
194)
195
196#=========================================================================
197# ElfSectionHeader
198#=========================================================================
199# Class encapsulating an ELF32 section header which implements the
200# following C-structure.
201#
202# typedef struct {
203#   elf_word sh_name;
204#   elf_word sh_type;
205#   elf_word sh_flags;
206#   elf_addr sh_addr;
207#   elf_off  sh_offset;
208#   elf_word sh_size;
209#   elf_word sh_link;
210#   elf_word sh_info;
211#   elf_word sh_addralign;
212#   elf_word sh_entsize;
213# } elf_shdr;
214#
215
216class ElfSectionHeader:
217
218  FORMAT = "<IIIIIIIIII"
219  NBYTES = struct.calcsize( FORMAT )
220
221  # Section types. Note that we only load some of these sections.
222
223  TYPE_NULL        = 0
224  TYPE_PROGBITS    = 1 # \
225  TYPE_SYMTAB      = 2 # | We only load sections of these types
226  TYPE_STRTAB      = 3 # /
227  TYPE_RELA        = 4
228  TYPE_HASH        = 5
229  TYPE_DYNAMIC     = 6
230  TYPE_NOTE        = 7
231  TYPE_NOBITS      = 8
232  TYPE_REL         = 9
233  TYPE_SHLIB       = 10
234  TYPE_DYNSYM      = 11
235  TYPE_LOPROC      = 0x70000000
236  TYPE_HIPROC      = 0x7fffffff
237  TYPE_LOUSER      = 0x80000000
238  TYPE_HIUSER      = 0xffffffff
239
240  # Section attribute flags. Note that we only load sections with the
241  # SHF_ALLOC flag set into the actual sparse memory.
242
243  FLAGS_WRITE       = 0x1
244  FLAGS_ALLOC       = 0x2
245  FLAGS_EXECINSTR   = 0x4
246  FLAGS_MASKPROC    = 0xf0000000
247
248  #-----------------------------------------------------------------------
249  # Constructor
250  #-----------------------------------------------------------------------
251
252  def __init__( self, data=None ):
253    if data != None:
254      self.from_bytes( data )
255
256  #-----------------------------------------------------------------------
257  # from_bytes
258  #-----------------------------------------------------------------------
259
260  def from_bytes( self, data ):
261    shdr_list = struct.unpack( ElfSectionHeader.FORMAT, bytearray(data) )
262    self.name      = shdr_list[0]
263    self.type      = shdr_list[1]
264    self.flags     = shdr_list[2]
265    self.addr      = shdr_list[3]
266    self.offset    = shdr_list[4]
267    self.size      = shdr_list[5]
268    self.link      = shdr_list[6]
269    self.info      = shdr_list[7]
270    self.addralign = shdr_list[8]
271    self.entsize   = shdr_list[9]
272
273  #-----------------------------------------------------------------------
274  # to_bytes
275  #-----------------------------------------------------------------------
276
277  def to_bytes( self ):
278    return struct.pack( ElfSectionHeader.FORMAT,
279      self.name,
280      self.type,
281      self.flags,
282      self.addr,
283      self.offset,
284      self.size,
285      self.link,
286      self.info,
287      self.addralign,
288      self.entsize,
289   )
290
291  #-----------------------------------------------------------------------
292  # __str__
293  #-----------------------------------------------------------------------
294
295  def __str__( self ):
296    return \
297"""
298 ElfSectionHeader:
299   name      = {},
300   type      = {},
301   flags     = {},
302   addr      = {},
303   offset    = {},
304   size      = {},
305   link      = {},
306   info      = {},
307   addralign = {},
308   entsize   = {},
309""".format(
310   self.name,
311   self.type,
312   hex(self.flags),
313   hex(self.addr),
314   hex(self.offset),
315   self.size,
316   self.link,
317   self.info,
318   self.addralign,
319   self.entsize,
320)
321
322#=========================================================================
323# ElfSymTabEntry
324#=========================================================================
325# Class encapsulating an ELF32 symbol table entry which implements the
326# following C-structure.
327#
328# typedef struct {
329#   elf_word st_name;
330#   elf_addr st_value;
331#   elf_word st_size;
332#   elf_byte st_info;
333#   elf_byte st_other;
334#   elf_half st_shndx;
335# } elf_sym;
336#
337
338class ElfSymTabEntry:
339
340  FORMAT = "<IIIBBH"
341  NBYTES = struct.calcsize( FORMAT )
342
343  # Symbol types. Note we only load some of these types.
344
345  TYPE_NOTYPE  = 0 # \
346  TYPE_OBJECT  = 1 # | We only load symbols of these types
347  TYPE_FUNC    = 2 # /
348  TYPE_SECTION = 3
349  TYPE_FILE    = 4
350  TYPE_LOPROC  = 13
351  TYPE_HIPROC  = 15
352
353  #-----------------------------------------------------------------------
354  # Constructor
355  #-----------------------------------------------------------------------
356
357  def __init__( self, data=None ):
358    if data != None:
359      self.from_bytes( data )
360
361  #-----------------------------------------------------------------------
362  # from_bytes
363  #-----------------------------------------------------------------------
364
365  def from_bytes( self, data ):
366    sym_list = struct.unpack( ElfSymTabEntry.FORMAT, bytearray(data) )
367    self.name  = sym_list[0]
368    self.value = sym_list[1]
369    self.size  = sym_list[2]
370    self.info  = sym_list[3]
371    self.other = sym_list[4]
372    self.shndx = sym_list[5]
373
374  #-----------------------------------------------------------------------
375  # to_bytes
376  #-----------------------------------------------------------------------
377
378  def to_bytes( self ):
379    return struct.pack( ElfSymTabEntry.FORMAT,
380      self.name,
381      self.value,
382      self.size,
383      self.info,
384      self.other,
385      self.shndx,
386   )
387
388  #-----------------------------------------------------------------------
389  # __str__
390  #-----------------------------------------------------------------------
391
392  def __str__( self ):
393    return \
394"""
395 ElfSymTabEntry:
396   ident     = {}
397   value     = {}
398   size      = {}
399   info      = {}
400   other     = {}
401   shndx     = {}
402""".format(
403   self.name,
404   hex(self.value),
405   self.size,
406   self.info,
407   self.other,
408   self.shndx,
409)
410
411#-------------------------------------------------------------------------
412# elf_reader
413#-------------------------------------------------------------------------
414# Opens and parses an ELF file into a sparse memory image object.
415
416def elf_reader( file_obj ):
417
418  # Read the data for the ELF header
419
420  ehdr_data = file_obj.read( ElfHeader.NBYTES )
421
422  # Construct an ELF header object
423
424  ehdr = ElfHeader( ehdr_data )
425
426  # Verify if its a known format and really an ELF file
427
428  if ehdr.ident[0:4] != '\x7fELF':
429    raise ValueError( "Not a valid ELF file" )
430
431  # We need to find the section string table so we can figure out the
432  # name of each section. We know that the section header for the section
433  # string table is entry shstrndx, so we first get the data for this
434  # section header.
435
436  file_obj.seek( ehdr.shoff + ehdr.shstrndx * ehdr.shentsize )
437  shdr_data = file_obj.read(ehdr.shentsize)
438
439  # Construct a section header object for the section string table
440
441  shdr = ElfSectionHeader( shdr_data )
442
443  # Read the data for the section header table
444
445  file_obj.seek( shdr.offset )
446  shstrtab_data = file_obj.read( shdr.size ).decode() # this is used as string!
447
448  # Load sections
449
450  symtab_data = None
451  strtab_data = None
452
453  mem_image = SparseMemoryImage()
454
455  for section_idx in range(ehdr.shnum):
456
457    # Read the data for the section header
458
459    file_obj.seek( ehdr.shoff + section_idx * ehdr.shentsize )
460    shdr_data = file_obj.read(ehdr.shentsize)
461
462    # Pad the returned string in case the section header is not long
463    # enough (otherwise the unpack function would not work)
464
465    shdr_data = shdr_data.ljust( ElfSectionHeader.NBYTES, b'\0' )
466
467    # Construct a section header object
468
469    shdr = ElfSectionHeader( shdr_data )
470
471    # Find the section name
472
473    start = shstrtab_data[shdr.name:]
474    section_name = start.partition('\0')[0]
475
476    # Only sections marked as alloc should be written to memory
477
478    if not ( shdr.flags & ElfSectionHeader.FLAGS_ALLOC ):
479      continue
480
481    # Read the section data if it exists
482
483    if section_name not in ['.sbss', '.bss']:
484      file_obj.seek( shdr.offset )
485      data = file_obj.read( shdr.size )
486
487    # NOTE: the .bss and .sbss sections don't actually contain any
488    # data in the ELF.  These sections should be initialized to zero.
489    # For more information see:
490    #
491    # - http://stackoverflow.com/questions/610682/bss-section-in-elf-file
492
493    else:
494      data = b'\0' * shdr.size
495
496    # Save the data holding the symbol string table
497
498    if shdr.type == ElfSectionHeader.TYPE_STRTAB:
499      strtab_data = data
500
501    # Save the data holding the symbol table
502
503    elif shdr.type == ElfSectionHeader.TYPE_SYMTAB:
504      symtab_data = data
505
506    # Otherwise create section and append it to our list of sections
507
508    else:
509      section = SparseMemoryImage.Section( section_name, shdr.addr, data )
510      mem_image.add_section( section )
511
512  # Load symbols. We skip the first symbol since it both "designates the
513  # first entry in the table and serves as the undefined symbol index".
514  # For now, I have commented this out, since we are not really using it.
515
516  # num_symbols = len(symtab_data) / ElfSymTabEntry.NBYTES
517  # for sym_idx in xrange(1,num_symbols):
518  #
519  #   # Read the data for a symbol table entry
520  #
521  #   start = sym_idx * ElfSymTabEntry.NBYTES
522  #   sym_data = symtab_data[start:start+ElfSymTabEntry.NBYTES]
523  #
524  #   # Construct a symbol table entry
525  #
526  #   sym = ElfSymTabEntry( sym_data )
527  #
528  #   # Get the symbol type
529  #
530  #   sym_type  = sym.info & 0xf
531  #
532  #   # Check to see if symbol is one of the three types we want to load
533  #
534  #   valid_sym_types = \
535  #   [
536  #     ElfSymTabEntry.TYPE_NOTYPE,
537  #     ElfSymTabEntry.TYPE_OBJECT,
538  #     ElfSymTabEntry.TYPE_FUNC,
539  #   ]
540  #
541  #   # Check to see if symbol is one of the three types we want to load
542  #
543  #   if sym_type not in valid_sym_types:
544  #     continue
545  #
546  #   # Get the symbol name from the string table
547  #
548  #   start = strtab_data[sym.name:]
549  #   name = start.partition('\0')[0]
550  #
551  #   # Add symbol to the sparse memory image
552  #
553  #   mem_image.add_symbol( name, sym.value )
554
555  return mem_image
556
557#-------------------------------------------------------------------------
558# elf_writer
559#-------------------------------------------------------------------------
560# Writes a sparse memory image object to an ELF file. Currently we write
561# the ELF file in the following order:
562#
563#  - ElfHeader
564#  - ElfSectionHeader for "null" section
565#  - ElfSectionHeader for all "normal" sections
566#  - ElfSectionHeader for ".shstrtab" section
567#  - data for all "normal" sections
568#  - data for ".shstrtab" section
569#
570
571def elf_writer( mem_image, file_obj ):
572
573  # Get the sections
574
575  sections = mem_image.get_sections()
576
577  ehdr = ElfHeader()
578
579  # Many of these fields are just copied from what binutils generates.
580  # Note that we have two extra sections beyond the normal sections. The
581  # first "null" section and the final ".shstrtab" section.
582
583  ehdr.ident     = "\x7fELF\x01\x01\x01".ljust( ElfHeader.IDENT_NBYTES, '0' )
584  ehdr.type      = ElfHeader.TYPE_EXEC
585  ehdr.machine   = 8
586  ehdr.version   = 1
587  ehdr.entry     = 0x00001000
588  ehdr.phoff     = 0
589  ehdr.shoff     = ElfHeader.NBYTES         # shdrs right after ehdr
590  ehdr.flags     = 0x70b03000
591  ehdr.ehsize    = 0
592  ehdr.phentsize = 0
593  ehdr.phnum     = 0
594  ehdr.shentsize = ElfSectionHeader.NBYTES  # shdrs are fixed size
595  ehdr.shnum     = len(sections) + 2        # add 2 for extra sections
596  ehdr.shstrndx  = len(sections) + 1        # location of shstrtab
597
598  # Write the ELF header to the file
599
600  file_obj.write( ehdr.to_bytes() )
601
602  # Write the first "null" section header to the file
603
604  shdr = ElfSectionHeader()
605  shdr.name      = 0
606  shdr.type      = 0
607  shdr.flags     = 0
608  shdr.addr      = 0
609  shdr.offset    = 0
610  shdr.size      = 0
611  shdr.link      = 0
612  shdr.info      = 0
613  shdr.addralign = 0
614  shdr.entsize   = 0
615  file_obj.write( shdr.to_bytes() )
616
617  # The section data is going to start after the ELF header and all of
618  # the section headers.
619
620  section_offset =  ElfHeader.NBYTES                        # ELF header
621  section_offset += ElfSectionHeader.NBYTES                 # null shdr
622  section_offset += len(sections) * ElfSectionHeader.NBYTES # normal shdrs
623  section_offset += 1 * ElfSectionHeader.NBYTES             # shstrtab shdr
624
625  # Collect section names in a string for writing to ".shstrtab"
626
627  section_names = "\0"
628
629  # Write the "normal" section headers to the file
630
631  for section in sections:
632
633    shdr = ElfSectionHeader()
634    shdr.name      = len(section_names)
635    shdr.type      = ElfSectionHeader.TYPE_PROGBITS
636    shdr.flags     = ElfSectionHeader.FLAGS_ALLOC
637    shdr.addr      = section.addr
638    shdr.offset    = section_offset
639    shdr.size      = len(section.data)
640    shdr.link      = 0
641    shdr.info      = 0
642    shdr.addralign = 0
643    shdr.entsize   = 0
644
645    file_obj.write( shdr.to_bytes() )
646
647    section_names  += section.name + "\0"
648    section_offset += len(section.data)
649
650  # Write the ".shstrtab" section header to the file
651
652  shdr = ElfSectionHeader()
653  shdr.name      = len(section_names)
654  shdr.type      = ElfSectionHeader.TYPE_STRTAB
655  shdr.flags     = 0
656  shdr.addr      = 0
657  shdr.offset    = section_offset
658  shdr.size      = len( section_names + ".shstrtab\0" )
659  shdr.link      = 0
660  shdr.info      = 0
661  shdr.addralign = 0
662  shdr.entsize   = 0
663
664  file_obj.write( shdr.to_bytes() )
665
666  section_names  += ".shstrtab\0"
667  section_offset += len(section_names)
668
669  # Write the section data for "normal" sections
670
671  for section in sections:
672    file_obj.write( section.data )
673
674  # Write the data for the ".shstrtab" section
675
676  file_obj.write( section_names.encode() )
677