1#!/usr/bin/python
2# Copyright (c) 2012 The Native Client Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6# Tool for reading archive (.a) files
7# For information about the archive file format, see:
8#   http://en.wikipedia.org/wiki/Ar_(Unix)
9
10import driver_log
11import elftools
12import pathtools
13
14# See above link to wiki entry on archive format.
15AR_MAGIC = '!<arch>\n'
16# Thin archives are like normal archives except that there are only
17# indirect references to each member (the data is not embedded).
18# See manpage for a description of this.
19THIN_MAGIC = '!<thin>\n'
20
21# filetype.IsArchive calls this IsArchive. Top-level tools should prefer
22# filetype.IsArchive, both for consistency (i.e., all checks for file type come
23# from that library), and because its results are cached.
24def IsArchive(filename):
25  fp = driver_log.DriverOpen(filename, "rb")
26  magic = fp.read(len(AR_MAGIC))
27  fp.close()
28  return magic in [AR_MAGIC, THIN_MAGIC]
29
30
31def GetMemberFilename(member, strtab_data):
32  """ Get the real filename of the archive member. """
33  if not member.is_long_name:
34    return member.name.strip()
35  else:
36    # GNU style long filenames are /[index]
37    # where index is a position within the strtab_data.
38    # Filter out non-digits
39    name = ''.join([c for c in member.name if c.isdigit()])
40    name_index = int(name)
41    name_data = strtab_data[name_index:]
42    name_data = name_data.split('\n', 2)[0]
43    assert (name_data.endswith('/'))
44    return name_data[:-1]
45
46
47def GetThinArchiveData(archive_filename, member, strtab_data):
48  # Get member's filename (relative to the archive) and open the member
49  # ourselves to check the data.
50  member_filename = GetMemberFilename(member, strtab_data)
51  member_filename = pathtools.join(
52      pathtools.dirname(pathtools.abspath(archive_filename)),
53      member_filename)
54  member_fp = driver_log.DriverOpen(member_filename, 'rb')
55  data = member_fp.read(member.size)
56  member_fp.close()
57  return data
58
59
60def GetArchiveType(filename):
61  fp = driver_log.DriverOpen(filename, "rb")
62
63  # Read the archive magic header
64  magic = fp.read(len(AR_MAGIC))
65  assert(magic in [AR_MAGIC, THIN_MAGIC])
66
67  # Find a regular file or symbol table
68  empty_file = True
69  found_type = ''
70  strtab_data = ''
71  while not found_type:
72    member = MemberHeader(fp)
73    if member.error == 'EOF':
74      break
75    elif member.error:
76      driver_log.Log.Fatal("%s: %s", filename, member.error)
77
78    empty_file = False
79
80    if member.is_regular_file:
81      if not magic == THIN_MAGIC:
82        data = fp.read(member.size)
83      else:
84        # For thin archives, do not read the data section.
85        # We instead must get at the member indirectly.
86        data = GetThinArchiveData(filename, member, strtab_data)
87
88      if data.startswith('BC'):
89        found_type = 'archive-bc'
90      else:
91        elf_header = elftools.DecodeELFHeader(data, filename)
92        if elf_header:
93          found_type = 'archive-%s' % elf_header.arch
94    elif member.is_strtab:
95      # We need the strtab data to get long filenames.
96      data = fp.read(member.size)
97      strtab_data = data
98      continue
99    else:
100      # Other symbol tables we can just skip ahead.
101      data = fp.read(member.size)
102      continue
103
104  if empty_file:
105    # Empty archives are treated as bitcode ones.
106    found_type = 'archive-bc'
107  elif not found_type:
108    driver_log.Log.Fatal("%s: Unable to determine archive type", filename)
109
110  fp.close()
111  return found_type
112
113
114class MemberHeader(object):
115  def __init__(self, fp):
116    self.error = ''
117    header = fp.read(60)
118    if len(header) == 0:
119      self.error = "EOF"
120      return
121
122    if len(header) != 60:
123      self.error = 'Short count reading archive member header'
124      return
125
126    self.name = header[0:16]
127    self.size = header[48:48 + 10]
128    self.fmag = header[58:60]
129
130    if self.fmag != '`\n':
131      self.error = 'Invalid archive member header magic string %s' % header
132      return
133
134    self.size = int(self.size)
135
136    self.is_svr4_symtab = (self.name == '/               ')
137    self.is_llvm_symtab = (self.name == '#_LLVM_SYM_TAB_#')
138    self.is_bsd4_symtab = (self.name == '__.SYMDEF SORTED')
139    self.is_strtab      = (self.name == '//              ')
140    self.is_regular_file = not (self.is_svr4_symtab or
141                                self.is_llvm_symtab or
142                                self.is_bsd4_symtab or
143                                self.is_strtab)
144
145    # BSD style long names (not supported)
146    if self.name.startswith('#1/'):
147      self.error = "BSD-style long file names not supported"
148      return
149
150    # If it's a GNU long filename, note this.  We use this for thin archives.
151    self.is_long_name = (self.is_regular_file and self.name.startswith('/'))
152
153    if self.is_regular_file and not self.is_long_name:
154      # Filenames end with '/' and are padded with spaces up to 16 bytes
155      self.name = self.name.strip()[:-1]
156