1#========================================================================= 2# elf 3#========================================================================= 4# A simple translator between ELF files and a sparse memory image object. 5# Note that the translator is far from complete but is sufficient for use 6# in our research processors. I found this document pretty useful for 7# understanding the ELF32 format: 8# 9# http://docs.oracle.com/cd/E19457-01/801-6737/801-6737.pdf 10# 11# Note that this implementation is inspired by the ELF object file reader 12# here: 13# 14# http://www.tinyos.net/tinyos-1.x/tools/src/mspgcc-pybsl/elf.py 15# 16# which includes this copyright: 17# 18# (C) 2003 cliechti@gmx.net 19# Python license 20# 21# Shunning: I ported it to Python3 today and it is a bit interesting due 22# to the recent bytes/str disambiguation in Python3. Basically, there 23# are several changes worth noting: 24# - Currently file_obj.read() will return _bytes_ instead of str, so if 25# we want to use the variable as string, we need to perform 26# bytes.decode(). 27# - struct.unpack only works for string/bytearray (I guess?) so I had to 28# use bytearray(bytes_obj) to avoid any changes to struct.unpack and the 29# format string. 30# - Whenever we write the elf file, we have to convert the strings back 31# to bytes. Just keep in mind that for string purposes we need to decode 32# and encode back. 33# 34# Author : Christopher Batten, Shunning Jiang 35# Date : Feb 26, 2020 36 37import struct 38 39from .SparseMemoryImage import SparseMemoryImage 40 41#------------------------------------------------------------------------- 42# ELF File Format Types 43#------------------------------------------------------------------------- 44# These are the sizes for various ELF32 data types used in describing 45# various structures below. 46# 47# size alignment 48# elf_addr 4 4 Unsigned program address 49# elf_half 2 2 Unsigned medium integer 50# elf_off 4 4 Unsigned file offset 51# elf_sword 4 4 Signed large integer 52# elf_word 4 4 Unsigned large integer 53# elf_byte 1 1 Unsigned small integer 54 55#========================================================================= 56# ElfHeader 57#========================================================================= 58# Class encapsulating an ELF32 header which implements the following 59# C-structure. 60# 61# define EI_NIDENT 16 62# typedef struct { 63# elf_byte e_ident[EI_NIDENT]; 64# elf_half e_type; 65# elf_half e_machine; 66# elf_word e_version; 67# elf_addr e_entry; 68# elf_off e_phoff; 69# elf_off e_shoff; 70# elf_word e_flags; 71# elf_half e_ehsize; 72# elf_half e_phentsize; 73# elf_half e_phnum; 74# elf_half e_shentsize; 75# elf_half e_shnum; 76# elf_half e_shstrndx; 77# } elf_ehdr; 78 79class ElfHeader: 80 81 FORMAT = "<16sHHIIIIIHHHHHH" 82 NBYTES = struct.calcsize( FORMAT ) 83 84 # Offsets within e_ident 85 86 IDENT_NBYTES = 16 # Size of e_ident[] 87 IDENT_IDX_MAG0 = 0 # File identification 88 IDENT_IDX_MAG1 = 1 # File identification 89 IDENT_IDX_MAG2 = 2 # File identification 90 IDENT_IDX_MAG3 = 3 # File identification 91 IDENT_IDX_CLASS = 4 # File class 92 IDENT_IDX_DATA = 5 # Data encoding 93 IDENT_IDX_VERSION = 6 # File version 94 IDENT_IDX_PAD = 7 # Start of padding bytes 95 96 # Elf file type flags 97 98 TYPE_NONE = 0 # No file type 99 TYPE_REL = 1 # Relocatable file 100 TYPE_EXEC = 2 # Executable file 101 TYPE_DYN = 3 # Shared object file 102 TYPE_CORE = 4 # Core file 103 TYPE_LOPROC = 0xff00 # Processor-specific 104 TYPE_HIPROC = 0xffff # Processor-specific 105 106 #----------------------------------------------------------------------- 107 # Constructor 108 #----------------------------------------------------------------------- 109 110 def __init__( self, data=None ): 111 if data != None: 112 self.from_bytes( data ) 113 114 #----------------------------------------------------------------------- 115 # from_bytes 116 #----------------------------------------------------------------------- 117 118 def from_bytes( self, data ): 119 ehdr_list = struct.unpack( ElfHeader.FORMAT, bytearray(data) ) 120 self.ident = ehdr_list[0].decode() 121 self.type = ehdr_list[1] 122 self.machine = ehdr_list[2] 123 self.version = ehdr_list[3] 124 self.entry = ehdr_list[4] 125 self.phoff = ehdr_list[5] 126 self.shoff = ehdr_list[6] 127 self.flags = ehdr_list[7] 128 self.ehsize = ehdr_list[8] 129 self.phentsize = ehdr_list[9] 130 self.phnum = ehdr_list[10] 131 self.shentsize = ehdr_list[11] 132 self.shnum = ehdr_list[12] 133 self.shstrndx = ehdr_list[13] 134 135 #----------------------------------------------------------------------- 136 # to_bytes 137 #----------------------------------------------------------------------- 138 139 def to_bytes( self ): 140 return struct.pack( ElfHeader.FORMAT, 141 self.ident.encode(), 142 self.type, 143 self.machine, 144 self.version, 145 self.entry, 146 self.phoff, 147 self.shoff, 148 self.flags, 149 self.ehsize, 150 self.phentsize, 151 self.phnum, 152 self.shentsize, 153 self.shnum, 154 self.shstrndx, 155 ) 156 157 #----------------------------------------------------------------------- 158 # __str__ 159 #----------------------------------------------------------------------- 160 161 def __str__( self ): 162 return \ 163""" 164 ElfHeader: 165 ident = {}, 166 type = {}, 167 machine = {}, 168 version = {}, 169 entry = {}, 170 phoff = {}, 171 shoff = {}, 172 flags = {}, 173 ehsize = {}, 174 phentsize = {}, 175 phnum = {}, 176 shentsize = {}, 177 shnum = {}, 178 shstrndx = {} 179""".format( 180 self.ident, 181 self.type, 182 self.machine, 183 self.version, 184 hex(self.entry), 185 hex(self.phoff), 186 hex(self.shoff), 187 hex(self.flags), 188 self.ehsize, 189 self.phentsize, 190 self.phnum, 191 self.shentsize, 192 self.shnum, 193 self.shstrndx, 194) 195 196#========================================================================= 197# ElfSectionHeader 198#========================================================================= 199# Class encapsulating an ELF32 section header which implements the 200# following C-structure. 201# 202# typedef struct { 203# elf_word sh_name; 204# elf_word sh_type; 205# elf_word sh_flags; 206# elf_addr sh_addr; 207# elf_off sh_offset; 208# elf_word sh_size; 209# elf_word sh_link; 210# elf_word sh_info; 211# elf_word sh_addralign; 212# elf_word sh_entsize; 213# } elf_shdr; 214# 215 216class ElfSectionHeader: 217 218 FORMAT = "<IIIIIIIIII" 219 NBYTES = struct.calcsize( FORMAT ) 220 221 # Section types. Note that we only load some of these sections. 222 223 TYPE_NULL = 0 224 TYPE_PROGBITS = 1 # \ 225 TYPE_SYMTAB = 2 # | We only load sections of these types 226 TYPE_STRTAB = 3 # / 227 TYPE_RELA = 4 228 TYPE_HASH = 5 229 TYPE_DYNAMIC = 6 230 TYPE_NOTE = 7 231 TYPE_NOBITS = 8 232 TYPE_REL = 9 233 TYPE_SHLIB = 10 234 TYPE_DYNSYM = 11 235 TYPE_LOPROC = 0x70000000 236 TYPE_HIPROC = 0x7fffffff 237 TYPE_LOUSER = 0x80000000 238 TYPE_HIUSER = 0xffffffff 239 240 # Section attribute flags. Note that we only load sections with the 241 # SHF_ALLOC flag set into the actual sparse memory. 242 243 FLAGS_WRITE = 0x1 244 FLAGS_ALLOC = 0x2 245 FLAGS_EXECINSTR = 0x4 246 FLAGS_MASKPROC = 0xf0000000 247 248 #----------------------------------------------------------------------- 249 # Constructor 250 #----------------------------------------------------------------------- 251 252 def __init__( self, data=None ): 253 if data != None: 254 self.from_bytes( data ) 255 256 #----------------------------------------------------------------------- 257 # from_bytes 258 #----------------------------------------------------------------------- 259 260 def from_bytes( self, data ): 261 shdr_list = struct.unpack( ElfSectionHeader.FORMAT, bytearray(data) ) 262 self.name = shdr_list[0] 263 self.type = shdr_list[1] 264 self.flags = shdr_list[2] 265 self.addr = shdr_list[3] 266 self.offset = shdr_list[4] 267 self.size = shdr_list[5] 268 self.link = shdr_list[6] 269 self.info = shdr_list[7] 270 self.addralign = shdr_list[8] 271 self.entsize = shdr_list[9] 272 273 #----------------------------------------------------------------------- 274 # to_bytes 275 #----------------------------------------------------------------------- 276 277 def to_bytes( self ): 278 return struct.pack( ElfSectionHeader.FORMAT, 279 self.name, 280 self.type, 281 self.flags, 282 self.addr, 283 self.offset, 284 self.size, 285 self.link, 286 self.info, 287 self.addralign, 288 self.entsize, 289 ) 290 291 #----------------------------------------------------------------------- 292 # __str__ 293 #----------------------------------------------------------------------- 294 295 def __str__( self ): 296 return \ 297""" 298 ElfSectionHeader: 299 name = {}, 300 type = {}, 301 flags = {}, 302 addr = {}, 303 offset = {}, 304 size = {}, 305 link = {}, 306 info = {}, 307 addralign = {}, 308 entsize = {}, 309""".format( 310 self.name, 311 self.type, 312 hex(self.flags), 313 hex(self.addr), 314 hex(self.offset), 315 self.size, 316 self.link, 317 self.info, 318 self.addralign, 319 self.entsize, 320) 321 322#========================================================================= 323# ElfSymTabEntry 324#========================================================================= 325# Class encapsulating an ELF32 symbol table entry which implements the 326# following C-structure. 327# 328# typedef struct { 329# elf_word st_name; 330# elf_addr st_value; 331# elf_word st_size; 332# elf_byte st_info; 333# elf_byte st_other; 334# elf_half st_shndx; 335# } elf_sym; 336# 337 338class ElfSymTabEntry: 339 340 FORMAT = "<IIIBBH" 341 NBYTES = struct.calcsize( FORMAT ) 342 343 # Symbol types. Note we only load some of these types. 344 345 TYPE_NOTYPE = 0 # \ 346 TYPE_OBJECT = 1 # | We only load symbols of these types 347 TYPE_FUNC = 2 # / 348 TYPE_SECTION = 3 349 TYPE_FILE = 4 350 TYPE_LOPROC = 13 351 TYPE_HIPROC = 15 352 353 #----------------------------------------------------------------------- 354 # Constructor 355 #----------------------------------------------------------------------- 356 357 def __init__( self, data=None ): 358 if data != None: 359 self.from_bytes( data ) 360 361 #----------------------------------------------------------------------- 362 # from_bytes 363 #----------------------------------------------------------------------- 364 365 def from_bytes( self, data ): 366 sym_list = struct.unpack( ElfSymTabEntry.FORMAT, bytearray(data) ) 367 self.name = sym_list[0] 368 self.value = sym_list[1] 369 self.size = sym_list[2] 370 self.info = sym_list[3] 371 self.other = sym_list[4] 372 self.shndx = sym_list[5] 373 374 #----------------------------------------------------------------------- 375 # to_bytes 376 #----------------------------------------------------------------------- 377 378 def to_bytes( self ): 379 return struct.pack( ElfSymTabEntry.FORMAT, 380 self.name, 381 self.value, 382 self.size, 383 self.info, 384 self.other, 385 self.shndx, 386 ) 387 388 #----------------------------------------------------------------------- 389 # __str__ 390 #----------------------------------------------------------------------- 391 392 def __str__( self ): 393 return \ 394""" 395 ElfSymTabEntry: 396 ident = {} 397 value = {} 398 size = {} 399 info = {} 400 other = {} 401 shndx = {} 402""".format( 403 self.name, 404 hex(self.value), 405 self.size, 406 self.info, 407 self.other, 408 self.shndx, 409) 410 411#------------------------------------------------------------------------- 412# elf_reader 413#------------------------------------------------------------------------- 414# Opens and parses an ELF file into a sparse memory image object. 415 416def elf_reader( file_obj ): 417 418 # Read the data for the ELF header 419 420 ehdr_data = file_obj.read( ElfHeader.NBYTES ) 421 422 # Construct an ELF header object 423 424 ehdr = ElfHeader( ehdr_data ) 425 426 # Verify if its a known format and really an ELF file 427 428 if ehdr.ident[0:4] != '\x7fELF': 429 raise ValueError( "Not a valid ELF file" ) 430 431 # We need to find the section string table so we can figure out the 432 # name of each section. We know that the section header for the section 433 # string table is entry shstrndx, so we first get the data for this 434 # section header. 435 436 file_obj.seek( ehdr.shoff + ehdr.shstrndx * ehdr.shentsize ) 437 shdr_data = file_obj.read(ehdr.shentsize) 438 439 # Construct a section header object for the section string table 440 441 shdr = ElfSectionHeader( shdr_data ) 442 443 # Read the data for the section header table 444 445 file_obj.seek( shdr.offset ) 446 shstrtab_data = file_obj.read( shdr.size ).decode() # this is used as string! 447 448 # Load sections 449 450 symtab_data = None 451 strtab_data = None 452 453 mem_image = SparseMemoryImage() 454 455 for section_idx in range(ehdr.shnum): 456 457 # Read the data for the section header 458 459 file_obj.seek( ehdr.shoff + section_idx * ehdr.shentsize ) 460 shdr_data = file_obj.read(ehdr.shentsize) 461 462 # Pad the returned string in case the section header is not long 463 # enough (otherwise the unpack function would not work) 464 465 shdr_data = shdr_data.ljust( ElfSectionHeader.NBYTES, b'\0' ) 466 467 # Construct a section header object 468 469 shdr = ElfSectionHeader( shdr_data ) 470 471 # Find the section name 472 473 start = shstrtab_data[shdr.name:] 474 section_name = start.partition('\0')[0] 475 476 # Only sections marked as alloc should be written to memory 477 478 if not ( shdr.flags & ElfSectionHeader.FLAGS_ALLOC ): 479 continue 480 481 # Read the section data if it exists 482 483 if section_name not in ['.sbss', '.bss']: 484 file_obj.seek( shdr.offset ) 485 data = file_obj.read( shdr.size ) 486 487 # NOTE: the .bss and .sbss sections don't actually contain any 488 # data in the ELF. These sections should be initialized to zero. 489 # For more information see: 490 # 491 # - http://stackoverflow.com/questions/610682/bss-section-in-elf-file 492 493 else: 494 data = b'\0' * shdr.size 495 496 # Save the data holding the symbol string table 497 498 if shdr.type == ElfSectionHeader.TYPE_STRTAB: 499 strtab_data = data 500 501 # Save the data holding the symbol table 502 503 elif shdr.type == ElfSectionHeader.TYPE_SYMTAB: 504 symtab_data = data 505 506 # Otherwise create section and append it to our list of sections 507 508 else: 509 section = SparseMemoryImage.Section( section_name, shdr.addr, data ) 510 mem_image.add_section( section ) 511 512 # Load symbols. We skip the first symbol since it both "designates the 513 # first entry in the table and serves as the undefined symbol index". 514 # For now, I have commented this out, since we are not really using it. 515 516 # num_symbols = len(symtab_data) / ElfSymTabEntry.NBYTES 517 # for sym_idx in xrange(1,num_symbols): 518 # 519 # # Read the data for a symbol table entry 520 # 521 # start = sym_idx * ElfSymTabEntry.NBYTES 522 # sym_data = symtab_data[start:start+ElfSymTabEntry.NBYTES] 523 # 524 # # Construct a symbol table entry 525 # 526 # sym = ElfSymTabEntry( sym_data ) 527 # 528 # # Get the symbol type 529 # 530 # sym_type = sym.info & 0xf 531 # 532 # # Check to see if symbol is one of the three types we want to load 533 # 534 # valid_sym_types = \ 535 # [ 536 # ElfSymTabEntry.TYPE_NOTYPE, 537 # ElfSymTabEntry.TYPE_OBJECT, 538 # ElfSymTabEntry.TYPE_FUNC, 539 # ] 540 # 541 # # Check to see if symbol is one of the three types we want to load 542 # 543 # if sym_type not in valid_sym_types: 544 # continue 545 # 546 # # Get the symbol name from the string table 547 # 548 # start = strtab_data[sym.name:] 549 # name = start.partition('\0')[0] 550 # 551 # # Add symbol to the sparse memory image 552 # 553 # mem_image.add_symbol( name, sym.value ) 554 555 return mem_image 556 557#------------------------------------------------------------------------- 558# elf_writer 559#------------------------------------------------------------------------- 560# Writes a sparse memory image object to an ELF file. Currently we write 561# the ELF file in the following order: 562# 563# - ElfHeader 564# - ElfSectionHeader for "null" section 565# - ElfSectionHeader for all "normal" sections 566# - ElfSectionHeader for ".shstrtab" section 567# - data for all "normal" sections 568# - data for ".shstrtab" section 569# 570 571def elf_writer( mem_image, file_obj ): 572 573 # Get the sections 574 575 sections = mem_image.get_sections() 576 577 ehdr = ElfHeader() 578 579 # Many of these fields are just copied from what binutils generates. 580 # Note that we have two extra sections beyond the normal sections. The 581 # first "null" section and the final ".shstrtab" section. 582 583 ehdr.ident = "\x7fELF\x01\x01\x01".ljust( ElfHeader.IDENT_NBYTES, '0' ) 584 ehdr.type = ElfHeader.TYPE_EXEC 585 ehdr.machine = 8 586 ehdr.version = 1 587 ehdr.entry = 0x00001000 588 ehdr.phoff = 0 589 ehdr.shoff = ElfHeader.NBYTES # shdrs right after ehdr 590 ehdr.flags = 0x70b03000 591 ehdr.ehsize = 0 592 ehdr.phentsize = 0 593 ehdr.phnum = 0 594 ehdr.shentsize = ElfSectionHeader.NBYTES # shdrs are fixed size 595 ehdr.shnum = len(sections) + 2 # add 2 for extra sections 596 ehdr.shstrndx = len(sections) + 1 # location of shstrtab 597 598 # Write the ELF header to the file 599 600 file_obj.write( ehdr.to_bytes() ) 601 602 # Write the first "null" section header to the file 603 604 shdr = ElfSectionHeader() 605 shdr.name = 0 606 shdr.type = 0 607 shdr.flags = 0 608 shdr.addr = 0 609 shdr.offset = 0 610 shdr.size = 0 611 shdr.link = 0 612 shdr.info = 0 613 shdr.addralign = 0 614 shdr.entsize = 0 615 file_obj.write( shdr.to_bytes() ) 616 617 # The section data is going to start after the ELF header and all of 618 # the section headers. 619 620 section_offset = ElfHeader.NBYTES # ELF header 621 section_offset += ElfSectionHeader.NBYTES # null shdr 622 section_offset += len(sections) * ElfSectionHeader.NBYTES # normal shdrs 623 section_offset += 1 * ElfSectionHeader.NBYTES # shstrtab shdr 624 625 # Collect section names in a string for writing to ".shstrtab" 626 627 section_names = "\0" 628 629 # Write the "normal" section headers to the file 630 631 for section in sections: 632 633 shdr = ElfSectionHeader() 634 shdr.name = len(section_names) 635 shdr.type = ElfSectionHeader.TYPE_PROGBITS 636 shdr.flags = ElfSectionHeader.FLAGS_ALLOC 637 shdr.addr = section.addr 638 shdr.offset = section_offset 639 shdr.size = len(section.data) 640 shdr.link = 0 641 shdr.info = 0 642 shdr.addralign = 0 643 shdr.entsize = 0 644 645 file_obj.write( shdr.to_bytes() ) 646 647 section_names += section.name + "\0" 648 section_offset += len(section.data) 649 650 # Write the ".shstrtab" section header to the file 651 652 shdr = ElfSectionHeader() 653 shdr.name = len(section_names) 654 shdr.type = ElfSectionHeader.TYPE_STRTAB 655 shdr.flags = 0 656 shdr.addr = 0 657 shdr.offset = section_offset 658 shdr.size = len( section_names + ".shstrtab\0" ) 659 shdr.link = 0 660 shdr.info = 0 661 shdr.addralign = 0 662 shdr.entsize = 0 663 664 file_obj.write( shdr.to_bytes() ) 665 666 section_names += ".shstrtab\0" 667 section_offset += len(section_names) 668 669 # Write the section data for "normal" sections 670 671 for section in sections: 672 file_obj.write( section.data ) 673 674 # Write the data for the ".shstrtab" section 675 676 file_obj.write( section_names.encode() ) 677