| #!/usr/bin/env python |
| """\ |
| Usage: extract_and_run_command.py [-j N] [command to run] -- [files and/or directories] |
| -j is the number of workers to start, defaulting to 1. |
| [command to run] must be a command that can accept one or many files |
| to process as arguments. |
| |
| WARNING: This script does NOT respond to SIGINT. You must use SIGQUIT or SIGKILL to |
| terminate it early. |
| """ |
| |
| ### The canonical location for this file is |
| ### https://hg.mozilla.org/build/tools/file/default/stage/extract_and_run_command.py |
| ### |
| ### Please update the copy in puppet to deploy new changes to |
| ### stage.mozilla.org, see |
| # https://wiki.mozilla.org/ReleaseEngineering/How_To/Modify_scripts_on_stage |
| |
| import logging |
| import os |
| from os import path |
| import sys |
| from Queue import Queue |
| import shutil |
| import subprocess |
| import tempfile |
| from threading import Thread |
| import time |
| |
| # Add mar.py's location to the path. |
| sys.path.append( |
| path.join(path.dirname(path.realpath(__file__)), "../buildfarm/utils")) |
| logging.basicConfig( |
| stream=sys.stdout, level=logging.INFO, format="%(message)s") |
| log = logging.getLogger(__name__) |
| |
| from mar import BZ2MarFile |
| |
| SEVENZIP = "7za" |
| |
| |
| def extractMar(filename, tempdir): |
| m = BZ2MarFile(filename) |
| m.extractall(path=tempdir) |
| |
| |
| def extractExe(filename, tempdir): |
| try: |
| # We don't actually care about output, put we redirect to a tempfile |
| # to avoid deadlocking in wait() when stdout=PIPE |
| fd = tempfile.TemporaryFile() |
| proc = subprocess.Popen([SEVENZIP, 'x', '-o%s' % tempdir, filename], |
| stdout=fd, stderr=subprocess.STDOUT) |
| proc.wait() |
| except subprocess.CalledProcessError: |
| # Not all EXEs are 7-zip files, so we have to ignore extraction errors |
| pass |
| |
| # The keys here are matched against the last 3 characters of filenames. |
| # The values are callables that accept two string arguments. |
| EXTRACTORS = { |
| '.mar': extractMar, |
| '.exe': extractExe, |
| } |
| |
| |
| def find_files(d): |
| """yields all of the files in `d'""" |
| for root, dirs, files in os.walk(d): |
| for f in files: |
| yield path.abspath(path.join(root, f)) |
| |
| |
| def rchmod(d, mode=0755): |
| """chmods everything in `d' to `mode', including `d' itself""" |
| os.chmod(d, mode) |
| for root, dirs, files in os.walk(d): |
| for item in dirs: |
| os.chmod(path.join(root, item), mode) |
| for item in files: |
| os.chmod(path.join(root, item), mode) |
| |
| |
| def maybe_extract(filename): |
| """If an extractor is found for `filename', extracts it to a temporary |
| directory and chmods it. The consumer is responsible for removing |
| the extracted files, if desired.""" |
| ext = path.splitext(filename)[1] |
| if ext not in EXTRACTORS.keys(): |
| return None |
| # Append the full filepath to the tempdir |
| tempdir_root = tempfile.mkdtemp() |
| tempdir = path.join(tempdir_root, filename.lstrip('/')) |
| os.makedirs(tempdir) |
| EXTRACTORS[ext](filename, tempdir) |
| rchmod(tempdir_root) |
| return tempdir_root |
| |
| |
| def process(item, command): |
| def format_time(t): |
| return time.strftime("%H:%M:%S", time.localtime(t)) |
| # Buffer output to avoid interleaving of multiple workers' |
| logs = [] |
| args = [item] |
| proc = None |
| start = time.time() |
| logs.append("START %s: %s" % (format_time(start), item)) |
| # If the file was extracted, we need to process all of its files, too. |
| tempdir = maybe_extract(item) |
| if tempdir: |
| for f in find_files(tempdir): |
| args.append(f) |
| |
| try: |
| fd = tempfile.TemporaryFile() |
| proc = subprocess.Popen(command + args, stdout=fd) |
| proc.wait() |
| if proc.returncode != 0: |
| raise Exception("returned %s" % proc.returncode) |
| finally: |
| if tempdir: |
| shutil.rmtree(tempdir) |
| fd.seek(0) |
| # rstrip() here to avoid an unnecessary newline, if it exists. |
| logs.append(fd.read().rstrip()) |
| end = time.time() |
| elapsed = end - start |
| logs.append("END %s (%d seconds elapsed): %s\n" % ( |
| format_time(end), elapsed, item)) |
| # Now that we've got all of our output, print it. It's important that |
| # the logging module is used for this, because "print" is not |
| # thread-safe. |
| log.info("\n".join(logs)) |
| |
| |
| def worker(command, errors): |
| item = q.get() |
| while item != None: |
| try: |
| process(item, command) |
| except: |
| errors.put(item) |
| item = q.get() |
| |
| if __name__ == '__main__': |
| # getopt is used in favour of optparse to enable "--" as a separator |
| # between the command and list of files. optparse doesn't allow that. |
| from getopt import getopt |
| options, args = getopt(sys.argv[1:], 'j:h', ['help']) |
| |
| concurrency = 1 |
| for o, a in options: |
| if o == '-j': |
| concurrency = int(a) |
| elif o in ('-h', '--help'): |
| log.info(__doc__) |
| sys.exit(0) |
| |
| if len(args) < 3 or '--' not in args: |
| log.error(__doc__) |
| sys.exit(1) |
| |
| command = [] |
| while args[0] != "--": |
| command.append(args.pop(0)) |
| args.pop(0) |
| |
| q = Queue() |
| errors = Queue() |
| threads = [] |
| for i in range(concurrency): |
| t = Thread(target=worker, args=(command, errors)) |
| t.start() |
| threads.append(t) |
| |
| # find_files is a generator, so work will begin prior to it finding |
| # all of the files |
| for arg in args: |
| if path.isfile(arg): |
| q.put(arg) |
| else: |
| for f in find_files(arg): |
| q.put(f) |
| # Because the workers are started before we start populating the q |
| # they can't use .empty() to determine whether or not their done. |
| # We also can't use q.join() or j.task_done(), because we need to |
| # support Python 2.4. We know that find_files won't yield None, |
| # so we can detect doneness by having workers die when they get None |
| # as an item. |
| for i in range(concurrency): |
| q.put(None) |
| |
| for t in threads: |
| t.join() |
| |
| if not errors.empty(): |
| log.error("Command failed for the following files:") |
| while not errors.empty(): |
| log.error(" %s" % errors.get()) |
| sys.exit(1) |