1#!/usr/bin/env python 2# Copyright 2018 The Chromium Authors. All rights reserved. 3# Use of this source code is governed by a BSD-style license that can be 4# found in the LICENSE file. 5 6"""Wraps ml.exe or ml64.exe and postprocesses the output to be deterministic. 7Sets timestamp in .obj file to 0, hence incompatible with link.exe /incremental. 8 9Use by prefixing the ml(64).exe invocation with this script: 10 python ml.py ml.exe [args...]""" 11 12import array 13import collections 14import struct 15import subprocess 16import sys 17 18 19class Struct(object): 20 """A thin wrapper around the struct module that returns a namedtuple""" 21 def __init__(self, name, *args): 22 """Pass the name of the return type, and then an interleaved list of 23 format strings as used by the struct module and of field names.""" 24 self.fmt = '<' + ''.join(args[0::2]) 25 self.type = collections.namedtuple(name, args[1::2]) 26 27 def pack_into(self, buffer, offset, data): 28 return struct.pack_into(self.fmt, buffer, offset, *data) 29 30 def unpack_from(self, buffer, offset=0): 31 return self.type(*struct.unpack_from(self.fmt, buffer, offset)) 32 33 def size(self): 34 return struct.calcsize(self.fmt) 35 36 37def Subtract(nt, **kwargs): 38 """Subtract(nt, f=2) returns a new namedtuple with 2 subtracted from nt.f""" 39 return nt._replace(**{k: getattr(nt, k) - v for k, v in kwargs.items()}) 40 41 42def MakeDeterministic(objdata): 43 # Takes data produced by ml(64).exe (without any special flags) and 44 # 1. Sets the timestamp to 0 45 # 2. Strips the .debug$S section (which contains an unwanted absolute path) 46 47 # This makes several assumptions about ml's output: 48 # - Section data is in the same order as the corresponding section headers: 49 # section headers preceding the .debug$S section header have their data 50 # preceding the .debug$S section data; likewise for section headers 51 # following the .debug$S section. 52 # - The .debug$S section contains only the absolute path to the obj file and 53 # nothing else, in particular there's only a single entry in the symbol 54 # table referring to the .debug$S section. 55 # - There are no COFF line number entries. 56 # - There's no IMAGE_SYM_CLASS_CLR_TOKEN symbol. 57 # These seem to hold in practice; if they stop holding this script needs to 58 # become smarter. 59 60 objdata = array.array('b', objdata) # Writable, e.g. via struct.pack_into. 61 62 # Read coff header. 63 COFFHEADER = Struct('COFFHEADER', 64 'H', 'Machine', 65 'H', 'NumberOfSections', 66 'I', 'TimeDateStamp', 67 'I', 'PointerToSymbolTable', 68 'I', 'NumberOfSymbols', 69 70 'H', 'SizeOfOptionalHeader', 71 'H', 'Characteristics') 72 coff_header = COFFHEADER.unpack_from(objdata) 73 assert coff_header.SizeOfOptionalHeader == 0 # Only set for binaries. 74 75 # Read section headers following coff header. 76 SECTIONHEADER = Struct('SECTIONHEADER', 77 '8s', 'Name', 78 'I', 'VirtualSize', 79 'I', 'VirtualAddress', 80 81 'I', 'SizeOfRawData', 82 'I', 'PointerToRawData', 83 'I', 'PointerToRelocations', 84 'I', 'PointerToLineNumbers', 85 86 'H', 'NumberOfRelocations', 87 'H', 'NumberOfLineNumbers', 88 'I', 'Characteristics') 89 section_headers = [] 90 debug_section_index = -1 91 for i in range(0, coff_header.NumberOfSections): 92 section_header = SECTIONHEADER.unpack_from( 93 objdata, offset=COFFHEADER.size() + i * SECTIONHEADER.size()) 94 assert not section_header[0].startswith(b'/') # Support short names only. 95 section_headers.append(section_header) 96 97 if section_header.Name == b'.debug$S': 98 assert debug_section_index == -1 99 debug_section_index = i 100 assert debug_section_index != -1 101 102 data_start = COFFHEADER.size() + len(section_headers) * SECTIONHEADER.size() 103 104 # Verify the .debug$S section looks like we expect. 105 assert section_headers[debug_section_index].Name == b'.debug$S' 106 assert section_headers[debug_section_index].VirtualSize == 0 107 assert section_headers[debug_section_index].VirtualAddress == 0 108 debug_size = section_headers[debug_section_index].SizeOfRawData 109 debug_offset = section_headers[debug_section_index].PointerToRawData 110 assert section_headers[debug_section_index].PointerToRelocations == 0 111 assert section_headers[debug_section_index].PointerToLineNumbers == 0 112 assert section_headers[debug_section_index].NumberOfRelocations == 0 113 assert section_headers[debug_section_index].NumberOfLineNumbers == 0 114 115 # Make sure sections in front of .debug$S have their data preceding it. 116 for header in section_headers[:debug_section_index]: 117 assert header.PointerToRawData < debug_offset 118 assert header.PointerToRelocations < debug_offset 119 assert header.PointerToLineNumbers < debug_offset 120 121 # Make sure sections after of .debug$S have their data following it. 122 for header in section_headers[debug_section_index + 1:]: 123 # Make sure the .debug$S data is at the very end of section data: 124 assert header.PointerToRawData > debug_offset 125 assert header.PointerToRelocations == 0 126 assert header.PointerToLineNumbers == 0 127 128 # Make sure the first non-empty section's data starts right after the section 129 # headers. 130 for section_header in section_headers: 131 if section_header.PointerToRawData == 0: 132 assert section_header.PointerToRelocations == 0 133 assert section_header.PointerToLineNumbers == 0 134 continue 135 assert section_header.PointerToRawData == data_start 136 break 137 138 # Make sure the symbol table (and hence, string table) appear after the last 139 # section: 140 assert (coff_header.PointerToSymbolTable >= 141 section_headers[-1].PointerToRawData + section_headers[-1].SizeOfRawData) 142 143 # The symbol table contains a symbol for the no-longer-present .debug$S 144 # section. If we leave it there, lld-link will complain: 145 # 146 # lld-link: error: .debug$S should not refer to non-existent section 5 147 # 148 # so we need to remove that symbol table entry as well. This shifts symbol 149 # entries around and we need to update symbol table indices in: 150 # - relocations 151 # - line number records (never present) 152 # - one aux symbol entry (IMAGE_SYM_CLASS_CLR_TOKEN; not present in ml output) 153 SYM = Struct('SYM', 154 '8s', 'Name', 155 'I', 'Value', 156 'h', 'SectionNumber', # Note: Signed! 157 'H', 'Type', 158 159 'B', 'StorageClass', 160 'B', 'NumberOfAuxSymbols') 161 i = 0 162 debug_sym = -1 163 while i < coff_header.NumberOfSymbols: 164 sym_offset = coff_header.PointerToSymbolTable + i * SYM.size() 165 sym = SYM.unpack_from(objdata, sym_offset) 166 167 # 107 is IMAGE_SYM_CLASS_CLR_TOKEN, which has aux entry "CLR Token 168 # Definition", which contains a symbol index. Check it's never present. 169 assert sym.StorageClass != 107 170 171 # Note: sym.SectionNumber is 1-based, debug_section_index is 0-based. 172 if sym.SectionNumber - 1 == debug_section_index: 173 assert debug_sym == -1, 'more than one .debug$S symbol found' 174 debug_sym = i 175 # Make sure the .debug$S symbol looks like we expect. 176 # In particular, it should have exactly one aux symbol. 177 assert sym.Name == b'.debug$S' 178 assert sym.Value == 0 179 assert sym.Type == 0 180 assert sym.StorageClass == 3 181 assert sym.NumberOfAuxSymbols == 1 182 elif sym.SectionNumber > debug_section_index: 183 sym = Subtract(sym, SectionNumber=1) 184 SYM.pack_into(objdata, sym_offset, sym) 185 i += 1 + sym.NumberOfAuxSymbols 186 assert debug_sym != -1, '.debug$S symbol not found' 187 188 # Note: Usually the .debug$S section is the last, but for files saying 189 # `includelib foo.lib`, like safe_terminate_process.asm in 32-bit builds, 190 # this isn't true: .drectve is after .debug$S. 191 192 # Update symbol table indices in relocations. 193 # There are a few processor types that have one or two relocation types 194 # where SymbolTableIndex has a different meaning, but not for x86. 195 REL = Struct('REL', 196 'I', 'VirtualAddress', 197 'I', 'SymbolTableIndex', 198 'H', 'Type') 199 for header in section_headers[0:debug_section_index]: 200 for j in range(0, header.NumberOfRelocations): 201 rel_offset = header.PointerToRelocations + j * REL.size() 202 rel = REL.unpack_from(objdata, rel_offset) 203 assert rel.SymbolTableIndex != debug_sym 204 if rel.SymbolTableIndex > debug_sym: 205 rel = Subtract(rel, SymbolTableIndex=2) 206 REL.pack_into(objdata, rel_offset, rel) 207 208 # Update symbol table indices in line numbers -- just check they don't exist. 209 for header in section_headers: 210 assert header.NumberOfLineNumbers == 0 211 212 # Now that all indices are updated, remove the symbol table entry referring to 213 # .debug$S and its aux entry. 214 del objdata[coff_header.PointerToSymbolTable + debug_sym * SYM.size(): 215 coff_header.PointerToSymbolTable + (debug_sym + 2) * SYM.size()] 216 217 # Now we know that it's safe to write out the input data, with just the 218 # timestamp overwritten to 0, the last section header cut out (and the 219 # offsets of all other section headers decremented by the size of that 220 # one section header), and the last section's data cut out. The symbol 221 # table offset needs to be reduced by one section header and the size of 222 # the missing section. 223 # (The COFF spec only requires on-disk sections to be aligned in image files, 224 # for obj files it's not required. If that wasn't the case, deleting slices 225 # if data would not generally be safe.) 226 227 # Update section offsets and remove .debug$S section data. 228 for i in range(0, debug_section_index): 229 header = section_headers[i] 230 if header.SizeOfRawData: 231 header = Subtract(header, PointerToRawData=SECTIONHEADER.size()) 232 if header.NumberOfRelocations: 233 header = Subtract(header, PointerToRelocations=SECTIONHEADER.size()) 234 if header.NumberOfLineNumbers: 235 header = Subtract(header, PointerToLineNumbers=SECTIONHEADER.size()) 236 SECTIONHEADER.pack_into( 237 objdata, COFFHEADER.size() + i * SECTIONHEADER.size(), header) 238 for i in range(debug_section_index + 1, len(section_headers)): 239 header = section_headers[i] 240 shift = SECTIONHEADER.size() + debug_size 241 if header.SizeOfRawData: 242 header = Subtract(header, PointerToRawData=shift) 243 if header.NumberOfRelocations: 244 header = Subtract(header, PointerToRelocations=shift) 245 if header.NumberOfLineNumbers: 246 header = Subtract(header, PointerToLineNumbers=shift) 247 SECTIONHEADER.pack_into( 248 objdata, COFFHEADER.size() + i * SECTIONHEADER.size(), header) 249 250 del objdata[debug_offset:debug_offset + debug_size] 251 252 # Finally, remove .debug$S section header and update coff header. 253 coff_header = coff_header._replace(TimeDateStamp=0) 254 coff_header = Subtract(coff_header, 255 NumberOfSections=1, 256 PointerToSymbolTable=SECTIONHEADER.size() + debug_size, 257 NumberOfSymbols=2) 258 COFFHEADER.pack_into(objdata, 0, coff_header) 259 260 del objdata[ 261 COFFHEADER.size() + debug_section_index * SECTIONHEADER.size(): 262 COFFHEADER.size() + (debug_section_index + 1) * SECTIONHEADER.size()] 263 264 # All done! 265 if sys.version_info.major == 2: 266 return objdata.tostring() 267 else: 268 return objdata.tobytes() 269 270 271def main(): 272 ml_result = subprocess.call(sys.argv[1:]) 273 if ml_result != 0: 274 return ml_result 275 276 objfile = None 277 for i in range(1, len(sys.argv)): 278 if sys.argv[i].startswith('/Fo'): 279 objfile = sys.argv[i][len('/Fo'):] 280 assert objfile, 'failed to find ml output' 281 282 with open(objfile, 'rb') as f: 283 objdata = f.read() 284 objdata = MakeDeterministic(objdata) 285 with open(objfile, 'wb') as f: 286 f.write(objdata) 287 288 289if __name__ == '__main__': 290 sys.exit(main()) 291