| #!/usr/bin/env python3 |
| # Copyright 2018 The Chromium Authors |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| """Wraps ml.exe or ml64.exe and postprocesses the output to be deterministic. |
| Sets timestamp in .obj file to 0, hence incompatible with link.exe /incremental. |
| |
| Use by prefixing the ml(64).exe invocation with this script: |
| python ml.py ml.exe [args...]""" |
| |
| import array |
| import collections |
| import struct |
| import subprocess |
| import sys |
| |
| |
| class Struct(object): |
| """A thin wrapper around the struct module that returns a namedtuple""" |
| |
| def __init__(self, name, *args): |
| """Pass the name of the return type, and then an interleaved list of |
| format strings as used by the struct module and of field names.""" |
| self.fmt = '<' + ''.join(args[0::2]) |
| self.type = collections.namedtuple(name, args[1::2]) |
| |
| def pack_into(self, buffer, offset, data): |
| return struct.pack_into(self.fmt, buffer, offset, *data) |
| |
| def unpack_from(self, buffer, offset=0): |
| return self.type(*struct.unpack_from(self.fmt, buffer, offset)) |
| |
| def size(self): |
| return struct.calcsize(self.fmt) |
| |
| |
| def Subtract(nt, **kwargs): |
| """Subtract(nt, f=2) returns a new namedtuple with 2 subtracted from nt.f""" |
| return nt._replace(**{k: getattr(nt, k) - v for k, v in kwargs.items()}) |
| |
| |
| def MakeDeterministic(objdata): |
| # Takes data produced by ml(64).exe (without any special flags) and |
| # 1. Sets the timestamp to 0 |
| # 2. Strips the .debug$S section (which contains an unwanted absolute path) |
| |
| # This makes several assumptions about ml's output: |
| # - Section data is in the same order as the corresponding section headers: |
| # section headers preceding the .debug$S section header have their data |
| # preceding the .debug$S section data; likewise for section headers |
| # following the .debug$S section. |
| # - The .debug$S section contains only the absolute path to the obj file and |
| # nothing else, in particular there's only a single entry in the symbol |
| # table referring to the .debug$S section. |
| # - There are no COFF line number entries. |
| # - There's no IMAGE_SYM_CLASS_CLR_TOKEN symbol. |
| # These seem to hold in practice; if they stop holding this script needs to |
| # become smarter. |
| |
| objdata = array.array('b', objdata) # Writable, e.g. via struct.pack_into. |
| |
| # Read coff header. |
| COFFHEADER = Struct('COFFHEADER', 'H', 'Machine', 'H', 'NumberOfSections', |
| 'I', 'TimeDateStamp', 'I', 'PointerToSymbolTable', 'I', |
| 'NumberOfSymbols', 'H', 'SizeOfOptionalHeader', 'H', |
| 'Characteristics') |
| coff_header = COFFHEADER.unpack_from(objdata) |
| assert coff_header.SizeOfOptionalHeader == 0 # Only set for binaries. |
| |
| # Read section headers following coff header. |
| SECTIONHEADER = Struct('SECTIONHEADER', '8s', 'Name', 'I', 'VirtualSize', 'I', |
| 'VirtualAddress', 'I', 'SizeOfRawData', 'I', |
| 'PointerToRawData', 'I', 'PointerToRelocations', 'I', |
| 'PointerToLineNumbers', 'H', 'NumberOfRelocations', |
| 'H', 'NumberOfLineNumbers', 'I', 'Characteristics') |
| section_headers = [] |
| debug_section_index = -1 |
| for i in range(0, coff_header.NumberOfSections): |
| section_header = SECTIONHEADER.unpack_from(objdata, |
| offset=COFFHEADER.size() + |
| i * SECTIONHEADER.size()) |
| assert not section_header[0].startswith(b'/') # Support short names only. |
| section_headers.append(section_header) |
| |
| if section_header.Name == b'.debug$S': |
| assert debug_section_index == -1 |
| debug_section_index = i |
| assert debug_section_index != -1 |
| |
| data_start = COFFHEADER.size() + len(section_headers) * SECTIONHEADER.size() |
| |
| # Verify the .debug$S section looks like we expect. |
| assert section_headers[debug_section_index].Name == b'.debug$S' |
| assert section_headers[debug_section_index].VirtualSize == 0 |
| assert section_headers[debug_section_index].VirtualAddress == 0 |
| debug_size = section_headers[debug_section_index].SizeOfRawData |
| debug_offset = section_headers[debug_section_index].PointerToRawData |
| assert section_headers[debug_section_index].PointerToRelocations == 0 |
| assert section_headers[debug_section_index].PointerToLineNumbers == 0 |
| assert section_headers[debug_section_index].NumberOfRelocations == 0 |
| assert section_headers[debug_section_index].NumberOfLineNumbers == 0 |
| |
| # Make sure sections in front of .debug$S have their data preceding it. |
| for header in section_headers[:debug_section_index]: |
| assert header.PointerToRawData < debug_offset |
| assert header.PointerToRelocations < debug_offset |
| assert header.PointerToLineNumbers < debug_offset |
| |
| # Make sure sections after of .debug$S have their data following it. |
| for header in section_headers[debug_section_index + 1:]: |
| # Make sure the .debug$S data is at the very end of section data: |
| assert header.PointerToRawData > debug_offset |
| assert header.PointerToRelocations == 0 |
| assert header.PointerToLineNumbers == 0 |
| |
| # Make sure the first non-empty section's data starts right after the section |
| # headers. |
| for section_header in section_headers: |
| if section_header.PointerToRawData == 0: |
| assert section_header.PointerToRelocations == 0 |
| assert section_header.PointerToLineNumbers == 0 |
| continue |
| assert section_header.PointerToRawData == data_start |
| break |
| |
| # Make sure the symbol table (and hence, string table) appear after the last |
| # section: |
| assert ( |
| coff_header.PointerToSymbolTable >= |
| section_headers[-1].PointerToRawData + section_headers[-1].SizeOfRawData) |
| |
| # The symbol table contains a symbol for the no-longer-present .debug$S |
| # section. If we leave it there, lld-link will complain: |
| # |
| # lld-link: error: .debug$S should not refer to non-existent section 5 |
| # |
| # so we need to remove that symbol table entry as well. This shifts symbol |
| # entries around and we need to update symbol table indices in: |
| # - relocations |
| # - line number records (never present) |
| # - one aux symbol entry (IMAGE_SYM_CLASS_CLR_TOKEN; not present in ml output) |
| SYM = Struct( |
| 'SYM', |
| '8s', |
| 'Name', |
| 'I', |
| 'Value', |
| 'h', |
| 'SectionNumber', # Note: Signed! |
| 'H', |
| 'Type', |
| 'B', |
| 'StorageClass', |
| 'B', |
| 'NumberOfAuxSymbols') |
| i = 0 |
| debug_sym = -1 |
| while i < coff_header.NumberOfSymbols: |
| sym_offset = coff_header.PointerToSymbolTable + i * SYM.size() |
| sym = SYM.unpack_from(objdata, sym_offset) |
| |
| # 107 is IMAGE_SYM_CLASS_CLR_TOKEN, which has aux entry "CLR Token |
| # Definition", which contains a symbol index. Check it's never present. |
| assert sym.StorageClass != 107 |
| |
| # Note: sym.SectionNumber is 1-based, debug_section_index is 0-based. |
| if sym.SectionNumber - 1 == debug_section_index: |
| assert debug_sym == -1, 'more than one .debug$S symbol found' |
| debug_sym = i |
| # Make sure the .debug$S symbol looks like we expect. |
| # In particular, it should have exactly one aux symbol. |
| assert sym.Name == b'.debug$S' |
| assert sym.Value == 0 |
| assert sym.Type == 0 |
| assert sym.StorageClass == 3 |
| assert sym.NumberOfAuxSymbols == 1 |
| elif sym.SectionNumber > debug_section_index: |
| sym = Subtract(sym, SectionNumber=1) |
| SYM.pack_into(objdata, sym_offset, sym) |
| i += 1 + sym.NumberOfAuxSymbols |
| assert debug_sym != -1, '.debug$S symbol not found' |
| |
| # Note: Usually the .debug$S section is the last, but for files saying |
| # `includelib foo.lib`, like safe_terminate_process.asm in 32-bit builds, |
| # this isn't true: .drectve is after .debug$S. |
| |
| # Update symbol table indices in relocations. |
| # There are a few processor types that have one or two relocation types |
| # where SymbolTableIndex has a different meaning, but not for x86. |
| REL = Struct('REL', 'I', 'VirtualAddress', 'I', 'SymbolTableIndex', 'H', |
| 'Type') |
| for header in section_headers[0:debug_section_index]: |
| for j in range(0, header.NumberOfRelocations): |
| rel_offset = header.PointerToRelocations + j * REL.size() |
| rel = REL.unpack_from(objdata, rel_offset) |
| assert rel.SymbolTableIndex != debug_sym |
| if rel.SymbolTableIndex > debug_sym: |
| rel = Subtract(rel, SymbolTableIndex=2) |
| REL.pack_into(objdata, rel_offset, rel) |
| |
| # Update symbol table indices in line numbers -- just check they don't exist. |
| for header in section_headers: |
| assert header.NumberOfLineNumbers == 0 |
| |
| # Now that all indices are updated, remove the symbol table entry referring to |
| # .debug$S and its aux entry. |
| del objdata[coff_header.PointerToSymbolTable + |
| debug_sym * SYM.size():coff_header.PointerToSymbolTable + |
| (debug_sym + 2) * SYM.size()] |
| |
| # Now we know that it's safe to write out the input data, with just the |
| # timestamp overwritten to 0, the last section header cut out (and the |
| # offsets of all other section headers decremented by the size of that |
| # one section header), and the last section's data cut out. The symbol |
| # table offset needs to be reduced by one section header and the size of |
| # the missing section. |
| # (The COFF spec only requires on-disk sections to be aligned in image files, |
| # for obj files it's not required. If that wasn't the case, deleting slices |
| # if data would not generally be safe.) |
| |
| # Update section offsets and remove .debug$S section data. |
| for i in range(0, debug_section_index): |
| header = section_headers[i] |
| if header.SizeOfRawData: |
| header = Subtract(header, PointerToRawData=SECTIONHEADER.size()) |
| if header.NumberOfRelocations: |
| header = Subtract(header, PointerToRelocations=SECTIONHEADER.size()) |
| if header.NumberOfLineNumbers: |
| header = Subtract(header, PointerToLineNumbers=SECTIONHEADER.size()) |
| SECTIONHEADER.pack_into(objdata, |
| COFFHEADER.size() + i * SECTIONHEADER.size(), |
| header) |
| for i in range(debug_section_index + 1, len(section_headers)): |
| header = section_headers[i] |
| shift = SECTIONHEADER.size() + debug_size |
| if header.SizeOfRawData: |
| header = Subtract(header, PointerToRawData=shift) |
| if header.NumberOfRelocations: |
| header = Subtract(header, PointerToRelocations=shift) |
| if header.NumberOfLineNumbers: |
| header = Subtract(header, PointerToLineNumbers=shift) |
| SECTIONHEADER.pack_into(objdata, |
| COFFHEADER.size() + i * SECTIONHEADER.size(), |
| header) |
| |
| del objdata[debug_offset:debug_offset + debug_size] |
| |
| # Finally, remove .debug$S section header and update coff header. |
| coff_header = coff_header._replace(TimeDateStamp=0) |
| coff_header = Subtract(coff_header, |
| NumberOfSections=1, |
| PointerToSymbolTable=SECTIONHEADER.size() + debug_size, |
| NumberOfSymbols=2) |
| COFFHEADER.pack_into(objdata, 0, coff_header) |
| |
| del objdata[COFFHEADER.size() + |
| debug_section_index * SECTIONHEADER.size():COFFHEADER.size() + |
| (debug_section_index + 1) * SECTIONHEADER.size()] |
| |
| # All done! |
| if sys.version_info.major == 2: |
| return objdata.tostring() |
| else: |
| return objdata.tobytes() |
| |
| |
| def main(): |
| ml_result = subprocess.call(sys.argv[1:]) |
| if ml_result != 0: |
| return ml_result |
| |
| objfile = None |
| for i in range(1, len(sys.argv)): |
| if sys.argv[i].startswith('/Fo'): |
| objfile = sys.argv[i][len('/Fo'):] |
| assert objfile, 'failed to find ml output' |
| |
| with open(objfile, 'rb') as f: |
| objdata = f.read() |
| objdata = MakeDeterministic(objdata) |
| with open(objfile, 'wb') as f: |
| f.write(objdata) |
| |
| |
| if __name__ == '__main__': |
| sys.exit(main()) |