| #!/usr/bin/env python |
| # Copyright 2014 The Chromium Authors. All rights reserved. |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| |
| """A git command for managing a local cache of git repositories.""" |
| |
| from __future__ import print_function |
| import errno |
| import logging |
| import optparse |
| import os |
| import tempfile |
| import time |
| import subprocess |
| import sys |
| import urlparse |
| import zipfile |
| |
| from download_from_google_storage import Gsutil |
| import gclient_utils |
| import subcommand |
| |
| try: |
| # pylint: disable=E0602 |
| WinErr = WindowsError |
| except NameError: |
| class WinErr(Exception): |
| pass |
| |
| class LockError(Exception): |
| pass |
| |
| |
| class Lockfile(object): |
| """Class to represent a cross-platform process-specific lockfile.""" |
| |
| def __init__(self, path): |
| self.path = os.path.abspath(path) |
| self.lockfile = self.path + ".lock" |
| self.pid = os.getpid() |
| |
| def _read_pid(self): |
| """Read the pid stored in the lockfile. |
| |
| Note: This method is potentially racy. By the time it returns the lockfile |
| may have been unlocked, removed, or stolen by some other process. |
| """ |
| try: |
| with open(self.lockfile, 'r') as f: |
| pid = int(f.readline().strip()) |
| except (IOError, ValueError): |
| pid = None |
| return pid |
| |
| def _make_lockfile(self): |
| """Safely creates a lockfile containing the current pid.""" |
| open_flags = (os.O_CREAT | os.O_EXCL | os.O_WRONLY) |
| fd = os.open(self.lockfile, open_flags, 0o644) |
| f = os.fdopen(fd, 'w') |
| print(self.pid, file=f) |
| f.close() |
| |
| def _remove_lockfile(self): |
| """Delete the lockfile. Complains (implicitly) if it doesn't exist. |
| |
| See gclient_utils.py:rmtree docstring for more explanation on the |
| windows case. |
| """ |
| if sys.platform == 'win32': |
| lockfile = os.path.normcase(self.lockfile) |
| for _ in xrange(3): |
| exitcode = subprocess.call(['cmd.exe', '/c', |
| 'del', '/f', '/q', lockfile]) |
| if exitcode == 0: |
| return |
| time.sleep(3) |
| raise LockError('Failed to remove lock: %s' % lockfile) |
| else: |
| os.remove(self.lockfile) |
| |
| def lock(self): |
| """Acquire the lock. |
| |
| Note: This is a NON-BLOCKING FAIL-FAST operation. |
| Do. Or do not. There is no try. |
| """ |
| try: |
| self._make_lockfile() |
| except OSError as e: |
| if e.errno == errno.EEXIST: |
| raise LockError("%s is already locked" % self.path) |
| else: |
| raise LockError("Failed to create %s (err %s)" % (self.path, e.errno)) |
| |
| def unlock(self): |
| """Release the lock.""" |
| if not self.is_locked(): |
| raise LockError("%s is not locked" % self.path) |
| if not self.i_am_locking(): |
| raise LockError("%s is locked, but not by me" % self.path) |
| self._remove_lockfile() |
| |
| def break_lock(self): |
| """Remove the lock, even if it was created by someone else.""" |
| try: |
| self._remove_lockfile() |
| return True |
| except OSError as exc: |
| if exc.errno == errno.ENOENT: |
| return False |
| else: |
| raise |
| |
| def is_locked(self): |
| """Test if the file is locked by anyone. |
| |
| Note: This method is potentially racy. By the time it returns the lockfile |
| may have been unlocked, removed, or stolen by some other process. |
| """ |
| return os.path.exists(self.lockfile) |
| |
| def i_am_locking(self): |
| """Test if the file is locked by this process.""" |
| return self.is_locked() and self.pid == self._read_pid() |
| |
| def __enter__(self): |
| self.lock() |
| return self |
| |
| def __exit__(self, *_exc): |
| # Windows is unreliable when it comes to file locking. YMMV. |
| try: |
| self.unlock() |
| except WinErr: |
| pass |
| |
| |
| class Mirror(object): |
| |
| git_exe = 'git.bat' if sys.platform.startswith('win') else 'git' |
| gsutil_exe = os.path.join( |
| os.path.dirname(os.path.abspath(__file__)), |
| 'third_party', 'gsutil', 'gsutil') |
| bootstrap_bucket = 'chromium-git-cache' |
| |
| def __init__(self, url, refs=None, print_func=None): |
| self.url = url |
| self.refs = refs or [] |
| self.basedir = self.UrlToCacheDir(url) |
| self.mirror_path = os.path.join(self.GetCachePath(), self.basedir) |
| self.print = print_func or print |
| |
| @staticmethod |
| def UrlToCacheDir(url): |
| """Convert a git url to a normalized form for the cache dir path.""" |
| parsed = urlparse.urlparse(url) |
| norm_url = parsed.netloc + parsed.path |
| if norm_url.endswith('.git'): |
| norm_url = norm_url[:-len('.git')] |
| return norm_url.replace('-', '--').replace('/', '-').lower() |
| |
| @staticmethod |
| def FindExecutable(executable): |
| """This mimics the "which" utility.""" |
| path_folders = os.environ.get('PATH').split(os.pathsep) |
| |
| for path_folder in path_folders: |
| target = os.path.join(path_folder, executable) |
| # Just incase we have some ~/blah paths. |
| target = os.path.abspath(os.path.expanduser(target)) |
| if os.path.isfile(target) and os.access(target, os.X_OK): |
| return target |
| if sys.platform.startswith('win'): |
| for suffix in ('.bat', '.cmd', '.exe'): |
| alt_target = target + suffix |
| if os.path.isfile(alt_target) and os.access(alt_target, os.X_OK): |
| return alt_target |
| return None |
| |
| @classmethod |
| def SetCachePath(cls, cachepath): |
| setattr(cls, 'cachepath', cachepath) |
| |
| @classmethod |
| def GetCachePath(cls): |
| if not hasattr(cls, 'cachepath'): |
| try: |
| cachepath = subprocess.check_output( |
| [cls.git_exe, 'config', '--global', 'cache.cachepath']).strip() |
| except subprocess.CalledProcessError: |
| cachepath = None |
| if not cachepath: |
| raise RuntimeError('No global cache.cachepath git configuration found.') |
| setattr(cls, 'cachepath', cachepath) |
| return getattr(cls, 'cachepath') |
| |
| def RunGit(self, cmd, **kwargs): |
| """Run git in a subprocess.""" |
| cwd = kwargs.setdefault('cwd', self.mirror_path) |
| kwargs.setdefault('print_stdout', False) |
| kwargs.setdefault('filter_fn', self.print) |
| env = kwargs.get('env') or kwargs.setdefault('env', os.environ.copy()) |
| env.setdefault('GIT_ASKPASS', 'true') |
| env.setdefault('SSH_ASKPASS', 'true') |
| self.print('running "git %s" in "%s"' % (' '.join(cmd), cwd)) |
| gclient_utils.CheckCallAndFilter([self.git_exe] + cmd, **kwargs) |
| |
| def config(self, cwd=None): |
| if cwd is None: |
| cwd = self.mirror_path |
| self.RunGit(['config', 'core.deltaBaseCacheLimit', |
| gclient_utils.DefaultDeltaBaseCacheLimit()], cwd=cwd) |
| self.RunGit(['config', 'remote.origin.url', self.url], cwd=cwd) |
| self.RunGit(['config', '--replace-all', 'remote.origin.fetch', |
| '+refs/heads/*:refs/heads/*'], cwd=cwd) |
| for ref in self.refs: |
| ref = ref.lstrip('+').rstrip('/') |
| if ref.startswith('refs/'): |
| refspec = '+%s:%s' % (ref, ref) |
| else: |
| refspec = '+refs/%s/*:refs/%s/*' % (ref, ref) |
| self.RunGit(['config', '--add', 'remote.origin.fetch', refspec], cwd=cwd) |
| |
| def bootstrap_repo(self, directory): |
| """Bootstrap the repo from Google Stroage if possible.""" |
| |
| python_fallback = False |
| if sys.platform.startswith('win') and not self.FindExecutable('7z'): |
| python_fallback = True |
| elif sys.platform.startswith('darwin'): |
| # The OSX version of unzip doesn't support zip64. |
| python_fallback = True |
| elif not self.FindExecutable('unzip'): |
| python_fallback = True |
| |
| gs_folder = 'gs://%s/%s' % (self.bootstrap_bucket, self.basedir) |
| gsutil = Gsutil( |
| self.gsutil_exe, boto_path=os.devnull, bypass_prodaccess=True) |
| # Get the most recent version of the zipfile. |
| _, ls_out, _ = gsutil.check_call('ls', gs_folder) |
| ls_out_sorted = sorted(ls_out.splitlines()) |
| if not ls_out_sorted: |
| # This repo is not on Google Storage. |
| return False |
| latest_checkout = ls_out_sorted[-1] |
| |
| # Download zip file to a temporary directory. |
| try: |
| tempdir = tempfile.mkdtemp() |
| self.print('Downloading %s' % latest_checkout) |
| code, out, err = gsutil.check_call('cp', latest_checkout, tempdir) |
| if code: |
| self.print('%s\n%s' % (out, err)) |
| return False |
| filename = os.path.join(tempdir, latest_checkout.split('/')[-1]) |
| |
| # Unpack the file with 7z on Windows, unzip on linux, or fallback. |
| if not python_fallback: |
| if sys.platform.startswith('win'): |
| cmd = ['7z', 'x', '-o%s' % directory, '-tzip', filename] |
| else: |
| cmd = ['unzip', filename, '-d', directory] |
| retcode = subprocess.call(cmd) |
| else: |
| try: |
| with zipfile.ZipFile(filename, 'r') as f: |
| f.printdir() |
| f.extractall(directory) |
| except Exception as e: |
| self.print('Encountered error: %s' % str(e), file=sys.stderr) |
| retcode = 1 |
| else: |
| retcode = 0 |
| finally: |
| # Clean up the downloaded zipfile. |
| gclient_utils.rmtree(tempdir) |
| |
| if retcode: |
| self.print( |
| 'Extracting bootstrap zipfile %s failed.\n' |
| 'Resuming normal operations.' % filename) |
| return False |
| return True |
| |
| def exists(self): |
| return os.path.isfile(os.path.join(self.mirror_path, 'config')) |
| |
| def populate(self, depth=None, shallow=False, bootstrap=False, |
| verbose=False): |
| if shallow and not depth: |
| depth = 10000 |
| gclient_utils.safe_makedirs(self.GetCachePath()) |
| |
| v = [] |
| if verbose: |
| v = ['-v', '--progress'] |
| |
| d = [] |
| if depth: |
| d = ['--depth', str(depth)] |
| |
| |
| with Lockfile(self.mirror_path): |
| # Setup from scratch if the repo is new or is in a bad state. |
| tempdir = None |
| if not os.path.exists(os.path.join(self.mirror_path, 'config')): |
| gclient_utils.rmtree(self.mirror_path) |
| tempdir = tempfile.mkdtemp( |
| suffix=self.basedir, dir=self.GetCachePath()) |
| bootstrapped = not depth and bootstrap and self.bootstrap_repo(tempdir) |
| if not bootstrapped: |
| self.RunGit(['init', '--bare'], cwd=tempdir) |
| else: |
| if depth and os.path.exists(os.path.join(self.mirror_path, 'shallow')): |
| logging.warn( |
| 'Shallow fetch requested, but repo cache already exists.') |
| d = [] |
| |
| rundir = tempdir or self.mirror_path |
| self.config(rundir) |
| fetch_cmd = ['fetch'] + v + d + ['origin'] |
| fetch_specs = subprocess.check_output( |
| [self.git_exe, 'config', '--get-all', 'remote.origin.fetch'], |
| cwd=rundir).strip().splitlines() |
| for spec in fetch_specs: |
| try: |
| self.RunGit(fetch_cmd + [spec], cwd=rundir, retry=True) |
| except subprocess.CalledProcessError: |
| logging.warn('Fetch of %s failed' % spec) |
| if tempdir: |
| os.rename(tempdir, self.mirror_path) |
| |
| def update_bootstrap(self): |
| # The files are named <git number>.zip |
| gen_number = subprocess.check_output( |
| [self.git_exe, 'number', 'master'], cwd=self.mirror_path).strip() |
| self.RunGit(['gc']) # Run Garbage Collect to compress packfile. |
| # Creating a temp file and then deleting it ensures we can use this name. |
| _, tmp_zipfile = tempfile.mkstemp(suffix='.zip') |
| os.remove(tmp_zipfile) |
| subprocess.call(['zip', '-r', tmp_zipfile, '.'], cwd=self.mirror_path) |
| gsutil = Gsutil(path=self.gsutil_exe, boto_path=None) |
| dest_name = 'gs://%s/%s/%s.zip' % ( |
| self.bootstrap_bucket, self.basedir, gen_number) |
| gsutil.call('cp', tmp_zipfile, dest_name) |
| os.remove(tmp_zipfile) |
| |
| def unlock(self): |
| lf = Lockfile(self.mirror_path) |
| config_lock = os.path.join(self.mirror_path, 'config.lock') |
| if os.path.exists(config_lock): |
| os.remove(config_lock) |
| lf.break_lock() |
| |
| @subcommand.usage('[url of repo to check for caching]') |
| def CMDexists(parser, args): |
| """Check to see if there already is a cache of the given repo.""" |
| _, args = parser.parse_args(args) |
| if not len(args) == 1: |
| parser.error('git cache exists only takes exactly one repo url.') |
| url = args[0] |
| mirror = Mirror(url) |
| if mirror.exists(): |
| print(mirror.mirror_path) |
| return 0 |
| return 1 |
| |
| |
| @subcommand.usage('[url of repo to create a bootstrap zip file]') |
| def CMDupdate_bootstrap(parser, args): |
| """Create and uploads a bootstrap tarball.""" |
| # Lets just assert we can't do this on Windows. |
| if sys.platform.startswith('win'): |
| print('Sorry, update bootstrap will not work on Windows.', file=sys.stderr) |
| return 1 |
| |
| # First, we need to ensure the cache is populated. |
| populate_args = args[:] |
| populate_args.append('--no_bootstrap') |
| CMDpopulate(parser, populate_args) |
| |
| # Get the repo directory. |
| _, args = parser.parse_args(args) |
| url = args[0] |
| mirror = Mirror(url) |
| mirror.update_bootstrap() |
| return 0 |
| |
| |
| @subcommand.usage('[url of repo to add to or update in cache]') |
| def CMDpopulate(parser, args): |
| """Ensure that the cache has all up-to-date objects for the given repo.""" |
| parser.add_option('--depth', type='int', |
| help='Only cache DEPTH commits of history') |
| parser.add_option('--shallow', '-s', action='store_true', |
| help='Only cache 10000 commits of history') |
| parser.add_option('--ref', action='append', |
| help='Specify additional refs to be fetched') |
| parser.add_option('--no_bootstrap', action='store_true', |
| help='Don\'t bootstrap from Google Storage') |
| |
| options, args = parser.parse_args(args) |
| if not len(args) == 1: |
| parser.error('git cache populate only takes exactly one repo url.') |
| url = args[0] |
| |
| mirror = Mirror(url, refs=options.ref) |
| kwargs = { |
| 'verbose': options.verbose, |
| 'shallow': options.shallow, |
| 'bootstrap': not options.no_bootstrap, |
| } |
| if options.depth: |
| kwargs['depth'] = options.depth |
| mirror.populate(**kwargs) |
| |
| |
| @subcommand.usage('[url of repo to unlock, or -a|--all]') |
| def CMDunlock(parser, args): |
| """Unlock one or all repos if their lock files are still around.""" |
| parser.add_option('--force', '-f', action='store_true', |
| help='Actually perform the action') |
| parser.add_option('--all', '-a', action='store_true', |
| help='Unlock all repository caches') |
| options, args = parser.parse_args(args) |
| if len(args) > 1 or (len(args) == 0 and not options.all): |
| parser.error('git cache unlock takes exactly one repo url, or --all') |
| |
| repo_dirs = [] |
| if not options.all: |
| url = args[0] |
| repo_dirs.append(Mirror(url).mirror_path) |
| else: |
| cachepath = Mirror.GetCachePath() |
| repo_dirs = [os.path.join(cachepath, path) |
| for path in os.listdir(cachepath) |
| if os.path.isdir(os.path.join(cachepath, path))] |
| repo_dirs.extend([os.path.join(cachepath, |
| lockfile.replace('.lock', '')) |
| for lockfile in os.listdir(cachepath) |
| if os.path.isfile(os.path.join(cachepath, |
| lockfile)) |
| and lockfile.endswith('.lock') |
| and os.path.join(cachepath, lockfile) |
| not in repo_dirs]) |
| lockfiles = [repo_dir + '.lock' for repo_dir in repo_dirs |
| if os.path.exists(repo_dir + '.lock')] |
| |
| if not options.force: |
| parser.error('git cache unlock requires -f|--force to do anything. ' |
| 'Refusing to unlock the following repo caches: ' |
| ', '.join(lockfiles)) |
| |
| unlocked_repos = [] |
| untouched_repos = [] |
| for repo_dir in repo_dirs: |
| lf = Lockfile(repo_dir) |
| config_lock = os.path.join(repo_dir, 'config.lock') |
| unlocked = False |
| if os.path.exists(config_lock): |
| os.remove(config_lock) |
| unlocked = True |
| if lf.break_lock(): |
| unlocked = True |
| |
| if unlocked: |
| unlocked_repos.append(repo_dir) |
| else: |
| untouched_repos.append(repo_dir) |
| |
| if unlocked_repos: |
| logging.info('Broke locks on these caches:\n %s' % '\n '.join( |
| unlocked_repos)) |
| if untouched_repos: |
| logging.debug('Did not touch these caches:\n %s' % '\n '.join( |
| untouched_repos)) |
| |
| |
| class OptionParser(optparse.OptionParser): |
| """Wrapper class for OptionParser to handle global options.""" |
| |
| def __init__(self, *args, **kwargs): |
| optparse.OptionParser.__init__(self, *args, prog='git cache', **kwargs) |
| self.add_option('-c', '--cache-dir', |
| help='Path to the directory containing the cache') |
| self.add_option('-v', '--verbose', action='count', default=0, |
| help='Increase verbosity (can be passed multiple times)') |
| |
| def parse_args(self, args=None, values=None): |
| options, args = optparse.OptionParser.parse_args(self, args, values) |
| |
| try: |
| global_cache_dir = Mirror.GetCachePath() |
| except RuntimeError: |
| global_cache_dir = None |
| if options.cache_dir: |
| if global_cache_dir and ( |
| os.path.abspath(options.cache_dir) != |
| os.path.abspath(global_cache_dir)): |
| logging.warn('Overriding globally-configured cache directory.') |
| Mirror.SetCachePath(options.cache_dir) |
| |
| levels = [logging.WARNING, logging.INFO, logging.DEBUG] |
| logging.basicConfig(level=levels[min(options.verbose, len(levels) - 1)]) |
| |
| return options, args |
| |
| |
| def main(argv): |
| dispatcher = subcommand.CommandDispatcher(__name__) |
| return dispatcher.execute(OptionParser(), argv) |
| |
| |
| if __name__ == '__main__': |
| sys.exit(main(sys.argv[1:])) |