1#!/usr/bin/python 2# Copyright (c) 2012 The Native Client Authors. All rights reserved. 3# Use of this source code is governed by a BSD-style license that can be 4# found in the LICENSE file. 5 6# Tool for reading archive (.a) files 7# For information about the archive file format, see: 8# http://en.wikipedia.org/wiki/Ar_(Unix) 9 10import driver_log 11import elftools 12import pathtools 13 14# See above link to wiki entry on archive format. 15AR_MAGIC = '!<arch>\n' 16# Thin archives are like normal archives except that there are only 17# indirect references to each member (the data is not embedded). 18# See manpage for a description of this. 19THIN_MAGIC = '!<thin>\n' 20 21# filetype.IsArchive calls this IsArchive. Top-level tools should prefer 22# filetype.IsArchive, both for consistency (i.e., all checks for file type come 23# from that library), and because its results are cached. 24def IsArchive(filename): 25 fp = driver_log.DriverOpen(filename, "rb") 26 magic = fp.read(len(AR_MAGIC)) 27 fp.close() 28 return magic in [AR_MAGIC, THIN_MAGIC] 29 30 31def GetMemberFilename(member, strtab_data): 32 """ Get the real filename of the archive member. """ 33 if not member.is_long_name: 34 return member.name.strip() 35 else: 36 # GNU style long filenames are /[index] 37 # where index is a position within the strtab_data. 38 # Filter out non-digits 39 name = ''.join([c for c in member.name if c.isdigit()]) 40 name_index = int(name) 41 name_data = strtab_data[name_index:] 42 name_data = name_data.split('\n', 2)[0] 43 assert (name_data.endswith('/')) 44 return name_data[:-1] 45 46 47def GetThinArchiveData(archive_filename, member, strtab_data): 48 # Get member's filename (relative to the archive) and open the member 49 # ourselves to check the data. 50 member_filename = GetMemberFilename(member, strtab_data) 51 member_filename = pathtools.join( 52 pathtools.dirname(pathtools.abspath(archive_filename)), 53 member_filename) 54 member_fp = driver_log.DriverOpen(member_filename, 'rb') 55 data = member_fp.read(member.size) 56 member_fp.close() 57 return data 58 59 60def GetArchiveType(filename): 61 fp = driver_log.DriverOpen(filename, "rb") 62 63 # Read the archive magic header 64 magic = fp.read(len(AR_MAGIC)) 65 assert(magic in [AR_MAGIC, THIN_MAGIC]) 66 67 # Find a regular file or symbol table 68 empty_file = True 69 found_type = '' 70 strtab_data = '' 71 while not found_type: 72 member = MemberHeader(fp) 73 if member.error == 'EOF': 74 break 75 elif member.error: 76 driver_log.Log.Fatal("%s: %s", filename, member.error) 77 78 empty_file = False 79 80 if member.is_regular_file: 81 if not magic == THIN_MAGIC: 82 data = fp.read(member.size) 83 else: 84 # For thin archives, do not read the data section. 85 # We instead must get at the member indirectly. 86 data = GetThinArchiveData(filename, member, strtab_data) 87 88 if data.startswith('BC'): 89 found_type = 'archive-bc' 90 else: 91 elf_header = elftools.DecodeELFHeader(data, filename) 92 if elf_header: 93 found_type = 'archive-%s' % elf_header.arch 94 elif member.is_strtab: 95 # We need the strtab data to get long filenames. 96 data = fp.read(member.size) 97 strtab_data = data 98 continue 99 else: 100 # Other symbol tables we can just skip ahead. 101 data = fp.read(member.size) 102 continue 103 104 if empty_file: 105 # Empty archives are treated as bitcode ones. 106 found_type = 'archive-bc' 107 elif not found_type: 108 driver_log.Log.Fatal("%s: Unable to determine archive type", filename) 109 110 fp.close() 111 return found_type 112 113 114class MemberHeader(object): 115 def __init__(self, fp): 116 self.error = '' 117 header = fp.read(60) 118 if len(header) == 0: 119 self.error = "EOF" 120 return 121 122 if len(header) != 60: 123 self.error = 'Short count reading archive member header' 124 return 125 126 self.name = header[0:16] 127 self.size = header[48:48 + 10] 128 self.fmag = header[58:60] 129 130 if self.fmag != '`\n': 131 self.error = 'Invalid archive member header magic string %s' % header 132 return 133 134 self.size = int(self.size) 135 136 self.is_svr4_symtab = (self.name == '/ ') 137 self.is_llvm_symtab = (self.name == '#_LLVM_SYM_TAB_#') 138 self.is_bsd4_symtab = (self.name == '__.SYMDEF SORTED') 139 self.is_strtab = (self.name == '// ') 140 self.is_regular_file = not (self.is_svr4_symtab or 141 self.is_llvm_symtab or 142 self.is_bsd4_symtab or 143 self.is_strtab) 144 145 # BSD style long names (not supported) 146 if self.name.startswith('#1/'): 147 self.error = "BSD-style long file names not supported" 148 return 149 150 # If it's a GNU long filename, note this. We use this for thin archives. 151 self.is_long_name = (self.is_regular_file and self.name.startswith('/')) 152 153 if self.is_regular_file and not self.is_long_name: 154 # Filenames end with '/' and are padded with spaces up to 16 bytes 155 self.name = self.name.strip()[:-1] 156