| #!/usr/bin/python |
| """ A small program to compute checksums of LLVM checkout. |
| """ |
| from __future__ import absolute_import |
| from __future__ import division |
| from __future__ import print_function |
| |
| import hashlib |
| import logging |
| import re |
| import sys |
| from argparse import ArgumentParser |
| from project_tree import * |
| |
| SVN_DATES_REGEX = re.compile(r"\$(Date|LastChangedDate)[^\$]+\$") |
| |
| |
| def main(): |
| parser = ArgumentParser() |
| parser.add_argument( |
| "-v", "--verbose", action="store_true", help="enable debug logging") |
| parser.add_argument( |
| "-c", |
| "--check", |
| metavar="reference_file", |
| help="read checksums from reference_file and " + |
| "check they match checksums of llvm_path.") |
| parser.add_argument( |
| "--partial", |
| action="store_true", |
| help="ignore projects from reference_file " + |
| "that are not checked out in llvm_path.") |
| parser.add_argument( |
| "--multi_dir", |
| action="store_true", |
| help="indicates llvm_path contains llvm, checked out " + |
| "into multiple directories, as opposed to a " + |
| "typical single source tree checkout.") |
| parser.add_argument("llvm_path") |
| |
| args = parser.parse_args() |
| if args.check is not None: |
| with open(args.check, "r") as f: |
| reference_checksums = ReadLLVMChecksums(f) |
| else: |
| reference_checksums = None |
| |
| if args.verbose: |
| logging.basicConfig(level=logging.DEBUG) |
| |
| llvm_projects = CreateLLVMProjects(not args.multi_dir) |
| checksums = ComputeLLVMChecksums(args.llvm_path, llvm_projects) |
| |
| if reference_checksums is None: |
| WriteLLVMChecksums(checksums, sys.stdout) |
| sys.exit(0) |
| |
| if not ValidateChecksums(reference_checksums, checksums, args.partial): |
| sys.stdout.write("Checksums differ.\nNew checksums:\n") |
| WriteLLVMChecksums(checksums, sys.stdout) |
| sys.stdout.write("Reference checksums:\n") |
| WriteLLVMChecksums(reference_checksums, sys.stdout) |
| sys.exit(1) |
| else: |
| sys.stdout.write("Checksums match.") |
| |
| |
| def ComputeLLVMChecksums(root_path, projects): |
| """Compute checksums for LLVM sources checked out using svn. |
| |
| Args: |
| root_path: a directory of llvm checkout. |
| projects: a list of LLVMProject instances, which describe checkout paths, |
| relative to root_path. |
| |
| Returns: |
| A dict mapping from project name to project checksum. |
| """ |
| hash_algo = hashlib.sha256 |
| |
| def collapse_svn_substitutions(contents): |
| # Replace svn substitutions for $Date$ and $LastChangedDate$. |
| # Unfortunately, these are locale-specific. |
| return SVN_DATES_REGEX.sub("$\1$", contents) |
| |
| def read_and_collapse_svn_subsitutions(file_path): |
| with open(file_path, "rb") as f: |
| contents = f.read() |
| new_contents = collapse_svn_substitutions(contents) |
| if contents != new_contents: |
| logging.debug("Replaced svn keyword substitutions in %s", file_path) |
| logging.debug("\n\tBefore\n%s\n\tAfter\n%s", contents, new_contents) |
| return new_contents |
| |
| project_checksums = dict() |
| # Hash each project. |
| for proj in projects: |
| project_root = os.path.join(root_path, proj.relpath) |
| if not os.path.exists(project_root): |
| logging.info("Folder %s doesn't exist, skipping project %s", proj.relpath, |
| proj.name) |
| continue |
| |
| files = list() |
| |
| def add_file_hash(file_path): |
| if os.path.islink(file_path) and not os.path.exists(file_path): |
| content = os.readlink(file_path) |
| else: |
| content = read_and_collapse_svn_subsitutions(file_path) |
| hasher = hash_algo() |
| hasher.update(content) |
| file_digest = hasher.hexdigest() |
| logging.debug("Checksum %s for file %s", file_digest, file_path) |
| files.append((file_path, file_digest)) |
| |
| logging.info("Computing checksum for %s", proj.name) |
| WalkProjectFiles(root_path, projects, proj, add_file_hash) |
| |
| # Compute final checksum. |
| files.sort(key=lambda x: x[0]) |
| hasher = hash_algo() |
| for file_path, file_digest in files: |
| file_path = os.path.relpath(file_path, project_root) |
| hasher.update(file_path) |
| hasher.update(file_digest) |
| project_checksums[proj.name] = hasher.hexdigest() |
| return project_checksums |
| |
| |
| def WriteLLVMChecksums(checksums, f): |
| """Writes checksums to a text file. |
| |
| Args: |
| checksums: a dict mapping from project name to project checksum (result of |
| ComputeLLVMChecksums). |
| f: a file object to write into. |
| """ |
| |
| for proj in sorted(checksums.keys()): |
| f.write("{} {}\n".format(checksums[proj], proj)) |
| |
| |
| def ReadLLVMChecksums(f): |
| """Reads checksums from a text file, produced by WriteLLVMChecksums. |
| |
| Returns: |
| A dict, mapping from project name to project checksum. |
| """ |
| checksums = {} |
| while True: |
| line = f.readline() |
| if line == "": |
| break |
| checksum, proj = line.split() |
| checksums[proj] = checksum |
| return checksums |
| |
| |
| def ValidateChecksums(reference_checksums, |
| new_checksums, |
| allow_missing_projects=False): |
| """Validates that reference_checksums and new_checksums match. |
| |
| Args: |
| reference_checksums: a dict of reference checksums, mapping from a project |
| name to a project checksum. |
| new_checksums: a dict of checksums to be checked, mapping from a project |
| name to a project checksum. |
| allow_missing_projects: |
| When True, reference_checksums may contain more projects than |
| new_checksums. Projects missing from new_checksums are ignored. |
| When False, new_checksums and reference_checksums must contain checksums |
| for the same set of projects. If there is a project in |
| reference_checksums, missing from new_checksums, ValidateChecksums |
| will return False. |
| |
| Returns: |
| True, if checksums match with regards to allow_missing_projects flag value. |
| False, otherwise. |
| """ |
| if not allow_missing_projects: |
| if len(new_checksums) != len(reference_checksums): |
| return False |
| |
| for proj, checksum in new_checksums.iteritems(): |
| # We never computed a checksum for this project. |
| if proj not in reference_checksums: |
| return False |
| # Checksum did not match. |
| if reference_checksums[proj] != checksum: |
| return False |
| |
| return True |
| |
| |
| if __name__ == "__main__": |
| main() |