1#!/usr/bin/env python
2# Copyright 2018 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
6"""Wraps ml.exe or ml64.exe and postprocesses the output to be deterministic.
7Sets timestamp in .obj file to 0, hence incompatible with link.exe /incremental.
9Use by prefixing the ml(64).exe invocation with this script:
10    python ml.py ml.exe [args...]"""
12import array
13import collections
14import struct
15import subprocess
16import sys
19class Struct(object):
20  """A thin wrapper around the struct module that returns a namedtuple"""
21  def __init__(self, name, *args):
22    """Pass the name of the return type, and then an interleaved list of
23    format strings as used by the struct module and of field names."""
24    self.fmt = '<' + ''.join(args[0::2])
25    self.type = collections.namedtuple(name, args[1::2])
27  def pack_into(self, buffer, offset, data):
28    return struct.pack_into(self.fmt, buffer, offset, *data)
30  def unpack_from(self, buffer, offset=0):
31    return self.type(*struct.unpack_from(self.fmt, buffer, offset))
33  def size(self):
34    return struct.calcsize(self.fmt)
37def Subtract(nt, **kwargs):
38  """Subtract(nt, f=2) returns a new namedtuple with 2 subtracted from nt.f"""
39  return nt._replace(**{k: getattr(nt, k) - v for k, v in kwargs.items()})
42def MakeDeterministic(objdata):
43  # Takes data produced by ml(64).exe (without any special flags) and
44  # 1. Sets the timestamp to 0
45  # 2. Strips the .debug$S section (which contains an unwanted absolute path)
47  # This makes several assumptions about ml's output:
48  # - Section data is in the same order as the corresponding section headers:
49  #   section headers preceding the .debug$S section header have their data
50  #   preceding the .debug$S section data; likewise for section headers
51  #   following the .debug$S section.
52  # - The .debug$S section contains only the absolute path to the obj file and
53  #   nothing else, in particular there's only a single entry in the symbol
54  #   table referring to the .debug$S section.
55  # - There are no COFF line number entries.
56  # - There's no IMAGE_SYM_CLASS_CLR_TOKEN symbol.
57  # These seem to hold in practice; if they stop holding this script needs to
58  # become smarter.
60  objdata = array.array('b', objdata)  # Writable, e.g. via struct.pack_into.
62  # Read coff header.
64                      'H', 'Machine',
65                      'H', 'NumberOfSections',
66                      'I', 'TimeDateStamp',
67                      'I', 'PointerToSymbolTable',
68                      'I', 'NumberOfSymbols',
70                      'H', 'SizeOfOptionalHeader',
71                      'H', 'Characteristics')
72  coff_header = COFFHEADER.unpack_from(objdata)
73  assert coff_header.SizeOfOptionalHeader == 0  # Only set for binaries.
75  # Read section headers following coff header.
77                         '8s', 'Name',
78                         'I', 'VirtualSize',
79                         'I', 'VirtualAddress',
81                         'I', 'SizeOfRawData',
82                         'I', 'PointerToRawData',
83                         'I', 'PointerToRelocations',
84                         'I', 'PointerToLineNumbers',
86                         'H', 'NumberOfRelocations',
87                         'H', 'NumberOfLineNumbers',
88                         'I', 'Characteristics')
89  section_headers = []
90  debug_section_index = -1
91  for i in range(0, coff_header.NumberOfSections):
92    section_header = SECTIONHEADER.unpack_from(
93        objdata, offset=COFFHEADER.size() + i * SECTIONHEADER.size())
94    assert not section_header[0].startswith(b'/')  # Support short names only.
95    section_headers.append(section_header)
97    if section_header.Name == b'.debug$S':
98      assert debug_section_index == -1
99      debug_section_index = i
100  assert debug_section_index != -1
102  data_start = COFFHEADER.size() + len(section_headers) * SECTIONHEADER.size()
104  # Verify the .debug$S section looks like we expect.
105  assert section_headers[debug_section_index].Name == b'.debug$S'
106  assert section_headers[debug_section_index].VirtualSize == 0
107  assert section_headers[debug_section_index].VirtualAddress == 0
108  debug_size = section_headers[debug_section_index].SizeOfRawData
109  debug_offset = section_headers[debug_section_index].PointerToRawData
110  assert section_headers[debug_section_index].PointerToRelocations == 0
111  assert section_headers[debug_section_index].PointerToLineNumbers == 0
112  assert section_headers[debug_section_index].NumberOfRelocations == 0
113  assert section_headers[debug_section_index].NumberOfLineNumbers == 0
115  # Make sure sections in front of .debug$S have their data preceding it.
116  for header in section_headers[:debug_section_index]:
117    assert header.PointerToRawData < debug_offset
118    assert header.PointerToRelocations < debug_offset
119    assert header.PointerToLineNumbers < debug_offset
121  # Make sure sections after of .debug$S have their data following it.
122  for header in section_headers[debug_section_index + 1:]:
123    # Make sure the .debug$S data is at the very end of section data:
124    assert header.PointerToRawData > debug_offset
125    assert header.PointerToRelocations == 0
126    assert header.PointerToLineNumbers == 0
128  # Make sure the first non-empty section's data starts right after the section
129  # headers.
130  for section_header in section_headers:
131    if section_header.PointerToRawData == 0:
132      assert section_header.PointerToRelocations == 0
133      assert section_header.PointerToLineNumbers == 0
134      continue
135    assert section_header.PointerToRawData == data_start
136    break
138  # Make sure the symbol table (and hence, string table) appear after the last
139  # section:
140  assert (coff_header.PointerToSymbolTable >=
141      section_headers[-1].PointerToRawData + section_headers[-1].SizeOfRawData)
143  # The symbol table contains a symbol for the no-longer-present .debug$S
144  # section. If we leave it there, lld-link will complain:
145  #
146  #    lld-link: error: .debug$S should not refer to non-existent section 5
147  #
148  # so we need to remove that symbol table entry as well. This shifts symbol
149  # entries around and we need to update symbol table indices in:
150  # - relocations
151  # - line number records (never present)
152  # - one aux symbol entry (IMAGE_SYM_CLASS_CLR_TOKEN; not present in ml output)
153  SYM = Struct('SYM',
154               '8s', 'Name',
155               'I', 'Value',
156               'h', 'SectionNumber',  # Note: Signed!
157               'H', 'Type',
159               'B', 'StorageClass',
160               'B', 'NumberOfAuxSymbols')
161  i = 0
162  debug_sym = -1
163  while i < coff_header.NumberOfSymbols:
164    sym_offset = coff_header.PointerToSymbolTable + i * SYM.size()
165    sym = SYM.unpack_from(objdata, sym_offset)
167    # 107 is IMAGE_SYM_CLASS_CLR_TOKEN, which has aux entry "CLR Token
168    # Definition", which contains a symbol index. Check it's never present.
169    assert sym.StorageClass != 107
171    # Note: sym.SectionNumber is 1-based, debug_section_index is 0-based.
172    if sym.SectionNumber - 1 == debug_section_index:
173      assert debug_sym == -1, 'more than one .debug$S symbol found'
174      debug_sym = i
175      # Make sure the .debug$S symbol looks like we expect.
176      # In particular, it should have exactly one aux symbol.
177      assert sym.Name == b'.debug$S'
178      assert sym.Value == 0
179      assert sym.Type == 0
180      assert sym.StorageClass == 3
181      assert sym.NumberOfAuxSymbols == 1
182    elif sym.SectionNumber > debug_section_index:
183      sym = Subtract(sym, SectionNumber=1)
184      SYM.pack_into(objdata, sym_offset, sym)
185    i += 1 + sym.NumberOfAuxSymbols
186  assert debug_sym != -1, '.debug$S symbol not found'
188  # Note: Usually the .debug$S section is the last, but for files saying
189  # `includelib foo.lib`, like safe_terminate_process.asm in 32-bit builds,
190  # this isn't true: .drectve is after .debug$S.
192  # Update symbol table indices in relocations.
193  # There are a few processor types that have one or two relocation types
194  # where SymbolTableIndex has a different meaning, but not for x86.
195  REL = Struct('REL',
196               'I', 'VirtualAddress',
197               'I', 'SymbolTableIndex',
198               'H', 'Type')
199  for header in section_headers[0:debug_section_index]:
200    for j in range(0, header.NumberOfRelocations):
201      rel_offset = header.PointerToRelocations + j * REL.size()
202      rel = REL.unpack_from(objdata, rel_offset)
203      assert rel.SymbolTableIndex != debug_sym
204      if rel.SymbolTableIndex > debug_sym:
205        rel = Subtract(rel, SymbolTableIndex=2)
206        REL.pack_into(objdata, rel_offset, rel)
208  # Update symbol table indices in line numbers -- just check they don't exist.
209  for header in section_headers:
210    assert header.NumberOfLineNumbers == 0
212  # Now that all indices are updated, remove the symbol table entry referring to
213  # .debug$S and its aux entry.
214  del objdata[coff_header.PointerToSymbolTable + debug_sym * SYM.size():
215              coff_header.PointerToSymbolTable + (debug_sym + 2) * SYM.size()]
217  # Now we know that it's safe to write out the input data, with just the
218  # timestamp overwritten to 0, the last section header cut out (and the
219  # offsets of all other section headers decremented by the size of that
220  # one section header), and the last section's data cut out. The symbol
221  # table offset needs to be reduced by one section header and the size of
222  # the missing section.
223  # (The COFF spec only requires on-disk sections to be aligned in image files,
224  # for obj files it's not required. If that wasn't the case, deleting slices
225  # if data would not generally be safe.)
227  # Update section offsets and remove .debug$S section data.
228  for i in range(0, debug_section_index):
229    header = section_headers[i]
230    if header.SizeOfRawData:
231      header = Subtract(header, PointerToRawData=SECTIONHEADER.size())
232    if header.NumberOfRelocations:
233      header = Subtract(header, PointerToRelocations=SECTIONHEADER.size())
234    if header.NumberOfLineNumbers:
235      header = Subtract(header, PointerToLineNumbers=SECTIONHEADER.size())
236    SECTIONHEADER.pack_into(
237        objdata, COFFHEADER.size() + i * SECTIONHEADER.size(), header)
238  for i in range(debug_section_index + 1, len(section_headers)):
239    header = section_headers[i]
240    shift = SECTIONHEADER.size() + debug_size
241    if header.SizeOfRawData:
242      header = Subtract(header, PointerToRawData=shift)
243    if header.NumberOfRelocations:
244      header = Subtract(header, PointerToRelocations=shift)
245    if header.NumberOfLineNumbers:
246      header = Subtract(header, PointerToLineNumbers=shift)
247    SECTIONHEADER.pack_into(
248        objdata, COFFHEADER.size() + i * SECTIONHEADER.size(), header)
250  del objdata[debug_offset:debug_offset + debug_size]
252  # Finally, remove .debug$S section header and update coff header.
253  coff_header = coff_header._replace(TimeDateStamp=0)
254  coff_header = Subtract(coff_header,
255                         NumberOfSections=1,
256                         PointerToSymbolTable=SECTIONHEADER.size() + debug_size,
257                         NumberOfSymbols=2)
258  COFFHEADER.pack_into(objdata, 0, coff_header)
260  del objdata[
261      COFFHEADER.size() + debug_section_index * SECTIONHEADER.size():
262      COFFHEADER.size() + (debug_section_index + 1) * SECTIONHEADER.size()]
264  # All done!
265  if sys.version_info.major == 2:
266    return objdata.tostring()
267  else:
268    return objdata.tobytes()
271def main():
272  ml_result = subprocess.call(sys.argv[1:])
273  if ml_result != 0:
274    return ml_result
276  objfile = None
277  for i in range(1, len(sys.argv)):
278    if sys.argv[i].startswith('/Fo'):
279      objfile = sys.argv[i][len('/Fo'):]
280  assert objfile, 'failed to find ml output'
282  with open(objfile, 'rb') as f:
283    objdata = f.read()
284  objdata = MakeDeterministic(objdata)
285  with open(objfile, 'wb') as f:
286    f.write(objdata)
289if __name__ == '__main__':
290  sys.exit(main())