blob: be44ec50b038171cf2333cfde11b221e3f6aa18f [file] [log] [blame]
#!/usr/bin/env python
# Copyright 2014 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""A git command for managing a local cache of git repositories."""
import errno
import logging
import optparse
import os
import tempfile
import subprocess
import sys
import urlparse
from download_from_google_storage import Gsutil
import gclient_utils
import subcommand
GIT_EXECUTABLE = 'git.bat' if sys.platform.startswith('win') else 'git'
BOOTSTRAP_BUCKET = 'chromium-git-cache'
GSUTIL_DEFAULT_PATH = os.path.join(
os.path.dirname(os.path.abspath(__file__)),
'third_party', 'gsutil', 'gsutil')
def UrlToCacheDir(url):
"""Convert a git url to a normalized form for the cache dir path."""
parsed = urlparse.urlparse(url)
norm_url = parsed.netloc + parsed.path
if norm_url.endswith('.git'):
norm_url = norm_url[:-len('.git')]
return norm_url.replace('-', '--').replace('/', '-').lower()
def RunGit(cmd, **kwargs):
"""Run git in a subprocess."""
kwargs.setdefault('cwd', os.getcwd())
if kwargs.get('filter_fn'):
kwargs['filter_fn'] = gclient_utils.GitFilter(kwargs.get('filter_fn'))
kwargs.setdefault('print_stdout', False)
env = kwargs.get('env') or kwargs.setdefault('env', os.environ.copy())
env.setdefault('GIT_ASKPASS', 'true')
env.setdefault('SSH_ASKPASS', 'true')
else:
kwargs.setdefault('print_stdout', True)
stdout = kwargs.get('stdout', sys.stdout)
print >> stdout, 'running "git %s" in "%s"' % (' '.join(cmd), kwargs['cwd'])
gclient_utils.CheckCallAndFilter([GIT_EXECUTABLE] + cmd, **kwargs)
class LockError(Exception):
pass
class Lockfile(object):
"""Class to represent a cross-platform process-specific lockfile."""
def __init__(self, path):
self.path = os.path.abspath(path)
self.lockfile = self.path + ".lock"
self.pid = os.getpid()
def _read_pid(self):
"""Read the pid stored in the lockfile.
Note: This method is potentially racy. By the time it returns the lockfile
may have been unlocked, removed, or stolen by some other process.
"""
try:
with open(self.lockfile, 'r') as f:
pid = int(f.readline().strip())
except (IOError, ValueError):
pid = None
return pid
def _make_lockfile(self):
"""Safely creates a lockfile containing the current pid."""
open_flags = (os.O_CREAT | os.O_EXCL | os.O_WRONLY)
fd = os.open(self.lockfile, open_flags, 0o644)
f = os.fdopen(fd, 'w')
print >> f, self.pid
f.close()
def _remove_lockfile(self):
"""Delete the lockfile. Complains (implicitly) if it doesn't exist."""
os.remove(self.lockfile)
def lock(self):
"""Acquire the lock.
Note: This is a NON-BLOCKING FAIL-FAST operation.
Do. Or do not. There is no try.
"""
try:
self._make_lockfile()
except OSError as e:
if e.errno == errno.EEXIST:
raise LockError("%s is already locked" % self.path)
else:
raise LockError("Failed to create %s (err %s)" % (self.path, e.errno))
def unlock(self):
"""Release the lock."""
if not self.is_locked():
raise LockError("%s is not locked" % self.path)
if not self.i_am_locking():
raise LockError("%s is locked, but not by me" % self.path)
self._remove_lockfile()
def break_lock(self):
"""Remove the lock, even if it was created by someone else."""
try:
self._remove_lockfile()
return True
except OSError as exc:
if exc.errno == errno.ENOENT:
return False
else:
raise
def is_locked(self):
"""Test if the file is locked by anyone.
Note: This method is potentially racy. By the time it returns the lockfile
may have been unlocked, removed, or stolen by some other process.
"""
return os.path.exists(self.lockfile)
def i_am_locking(self):
"""Test if the file is locked by this process."""
return self.is_locked() and self.pid == self._read_pid()
def __enter__(self):
self.lock()
return self
def __exit__(self, *_exc):
self.unlock()
@subcommand.usage('[url of repo to check for caching]')
def CMDexists(parser, args):
"""Check to see if there already is a cache of the given repo."""
options, args = parser.parse_args(args)
if not len(args) == 1:
parser.error('git cache exists only takes exactly one repo url.')
url = args[0]
repo_dir = os.path.join(options.cache_dir, UrlToCacheDir(url))
flag_file = os.path.join(repo_dir, 'config')
if os.path.isdir(repo_dir) and os.path.isfile(flag_file):
print repo_dir
return 0
return 1
@subcommand.usage('[url of repo to create a bootstrap zip file]')
def CMDupdate_bootstrap(parser, args):
"""Create and uploads a bootstrap tarball."""
# Lets just assert we can't do this on Windows.
if sys.platform.startswith('win'):
print >> sys.stderr, 'Sorry, update bootstrap will not work on Windows.'
return 1
# First, we need to ensure the cache is populated.
populate_args = args[:]
populate_args.append('--no_bootstrap')
CMDpopulate(parser, populate_args)
# Get the repo directory.
options, args = parser.parse_args(args)
url = args[0]
repo_dir = os.path.join(options.cache_dir, UrlToCacheDir(url))
# The files are named <git number>.zip
gen_number = subprocess.check_output(['git', 'number', 'master'],
cwd=repo_dir).strip()
RunGit(['gc'], cwd=repo_dir) # Run Garbage Collect to compress packfile.
# Creating a temp file and then deleting it ensures we can use this name.
_, tmp_zipfile = tempfile.mkstemp(suffix='.zip')
os.remove(tmp_zipfile)
subprocess.call(['zip', '-r', tmp_zipfile, '.'], cwd=repo_dir)
gsutil = Gsutil(path=GSUTIL_DEFAULT_PATH, boto_path=None)
dest_name = 'gs://%s/%s/%s.zip' % (BOOTSTRAP_BUCKET,
UrlToCacheDir(url),
gen_number)
gsutil.call('cp', tmp_zipfile, dest_name)
os.remove(tmp_zipfile)
@subcommand.usage('[url of repo to add to or update in cache]')
def CMDpopulate(parser, args):
"""Ensure that the cache has all up-to-date objects for the given repo."""
parser.add_option('--depth', type='int',
help='Only cache DEPTH commits of history')
parser.add_option('--shallow', '-s', action='store_true',
help='Only cache 10000 commits of history')
parser.add_option('--ref', action='append',
help='Specify additional refs to be fetched')
parser.add_option('--no_bootstrap', action='store_true',
help='Don\'t bootstrap from Google Storage')
options, args = parser.parse_args(args)
if options.shallow and not options.depth:
options.depth = 10000
if not len(args) == 1:
parser.error('git cache populate only takes exactly one repo url.')
url = args[0]
gclient_utils.safe_makedirs(options.cache_dir)
repo_dir = os.path.join(options.cache_dir, UrlToCacheDir(url))
v = []
filter_fn = lambda l: '[up to date]' not in l
if options.verbose:
v = ['-v', '--progress']
filter_fn = None
d = []
if options.depth:
d = ['--depth', '%d' % options.depth]
def _find(executable):
"""This mimics the "which" utility."""
path_folders = os.environ.get('PATH').split(os.pathsep)
for path_folder in path_folders:
target = os.path.join(path_folder, executable)
# Just incase we have some ~/blah paths.
target = os.path.abspath(os.path.expanduser(target))
if os.path.isfile(target) and os.access(target, os.X_OK):
return target
return False
def _maybe_bootstrap_repo(directory):
"""Bootstrap the repo from Google Stroage if possible.
Requires 7z on Windows and Unzip on Linux/Mac.
"""
if options.no_bootstrap:
return False
if sys.platform.startswith('win'):
if not _find('7z'):
print 'Cannot find 7z in the path.'
print 'If you want git cache to be able to bootstrap from '
print 'Google Storage, please install 7z from:'
print 'http://www.7-zip.org/download.html'
return False
else:
if not _find('unzip'):
print 'Cannot find unzip in the path.'
print 'If you want git cache to be able to bootstrap from '
print 'Google Storage, please ensure unzip is present on your system.'
return False
folder = UrlToCacheDir(url)
gs_folder = 'gs://%s/%s' % (BOOTSTRAP_BUCKET, folder)
gsutil = Gsutil(GSUTIL_DEFAULT_PATH, boto_path=os.devnull,
bypass_prodaccess=True)
# Get the most recent version of the zipfile.
_, ls_out, _ = gsutil.check_call('ls', gs_folder)
ls_out_sorted = sorted(ls_out.splitlines())
if not ls_out_sorted:
# This repo is not on Google Storage.
return False
latest_checkout = ls_out_sorted[-1]
# Download zip file to a temporary directory.
tempdir = tempfile.mkdtemp()
print 'Downloading %s...' % latest_checkout
code, out, err = gsutil.check_call('cp', latest_checkout, tempdir)
if code:
print '%s\n%s' % (out, err)
return False
filename = os.path.join(tempdir, latest_checkout.split('/')[-1])
# Unpack the file with 7z on Windows, or unzip everywhere else.
if sys.platform.startswith('win'):
cmd = ['7z', 'x', '-o%s' % directory, '-tzip', filename]
else:
cmd = ['unzip', filename, '-d', directory]
retcode = subprocess.call(cmd)
# Clean up the downloaded zipfile.
gclient_utils.rmtree(tempdir)
if retcode:
print 'Extracting bootstrap zipfile %s failed.' % filename
print 'Resuming normal operations'
return False
return True
def _config(directory):
RunGit(['config', 'core.deltaBaseCacheLimit',
gclient_utils.DefaultDeltaBaseCacheLimit()], cwd=directory)
RunGit(['config', 'remote.origin.url', url],
cwd=directory)
RunGit(['config', '--replace-all', 'remote.origin.fetch',
'+refs/heads/*:refs/heads/*'],
cwd=directory)
RunGit(['config', '--add', 'remote.origin.fetch',
'+refs/tags/*:refs/tags/*'],
cwd=directory)
for ref in options.ref or []:
ref = ref.rstrip('/')
refspec = '+refs/%s/*:refs/%s/*' % (ref, ref)
RunGit(['config', '--add', 'remote.origin.fetch', refspec],
cwd=directory)
with Lockfile(repo_dir):
# Setup from scratch if the repo is new or is in a bad state.
if not os.path.exists(os.path.join(repo_dir, 'config')):
gclient_utils.rmtree(repo_dir)
tempdir = tempfile.mkdtemp(suffix=UrlToCacheDir(url),
dir=options.cache_dir)
bootstrapped = _maybe_bootstrap_repo(tempdir)
if not bootstrapped:
RunGit(['init', '--bare'], cwd=tempdir)
_config(tempdir)
fetch_cmd = ['fetch'] + v + d + ['origin']
RunGit(fetch_cmd, filter_fn=filter_fn, cwd=tempdir, retry=True)
os.rename(tempdir, repo_dir)
else:
_config(repo_dir)
if options.depth and os.path.exists(os.path.join(repo_dir, 'shallow')):
logging.warn('Shallow fetch requested, but repo cache already exists.')
fetch_cmd = ['fetch'] + v + ['origin']
RunGit(fetch_cmd, filter_fn=filter_fn, cwd=repo_dir, retry=True)
@subcommand.usage('[url of repo to unlock, or -a|--all]')
def CMDunlock(parser, args):
"""Unlock one or all repos if their lock files are still around."""
parser.add_option('--force', '-f', action='store_true',
help='Actually perform the action')
parser.add_option('--all', '-a', action='store_true',
help='Unlock all repository caches')
options, args = parser.parse_args(args)
if len(args) > 1 or (len(args) == 0 and not options.all):
parser.error('git cache unlock takes exactly one repo url, or --all')
if not options.all:
url = args[0]
repo_dirs = [os.path.join(options.cache_dir, UrlToCacheDir(url))]
else:
repo_dirs = [os.path.join(options.cache_dir, path)
for path in os.listdir(options.cache_dir)
if os.path.isdir(os.path.join(options.cache_dir, path))]
repo_dirs.extend([os.path.join(options.cache_dir,
lockfile.replace('.lock', ''))
for lockfile in os.listdir(options.cache_dir)
if os.path.isfile(os.path.join(options.cache_dir,
lockfile))
and lockfile.endswith('.lock')
and os.path.join(options.cache_dir, lockfile)
not in repo_dirs])
lockfiles = [repo_dir + '.lock' for repo_dir in repo_dirs
if os.path.exists(repo_dir + '.lock')]
if not options.force:
parser.error('git cache unlock requires -f|--force to do anything. '
'Refusing to unlock the following repo caches: '
', '.join(lockfiles))
unlocked = []
untouched = []
for repo_dir in repo_dirs:
lf = Lockfile(repo_dir)
config_lock = os.path.join(repo_dir, 'config.lock')
unlocked = False
if os.path.exists(config_lock):
os.remove(config_lock)
unlocked = True
if lf.break_lock():
unlocked = True
if unlocked:
unlocked.append(repo_dir)
else:
untouched.append(repo_dir)
if unlocked:
logging.info('Broke locks on these caches: %s' % unlocked)
if untouched:
logging.debug('Did not touch these caches: %s' % untouched)
class OptionParser(optparse.OptionParser):
"""Wrapper class for OptionParser to handle global options."""
def __init__(self, *args, **kwargs):
optparse.OptionParser.__init__(self, *args, prog='git cache', **kwargs)
self.add_option('-c', '--cache-dir',
help='Path to the directory containing the cache')
self.add_option('-v', '--verbose', action='count', default=0,
help='Increase verbosity (can be passed multiple times)')
def parse_args(self, args=None, values=None):
options, args = optparse.OptionParser.parse_args(self, args, values)
try:
global_cache_dir = subprocess.check_output(
[GIT_EXECUTABLE, 'config', '--global', 'cache.cachepath']).strip()
if options.cache_dir:
if global_cache_dir and (
os.path.abspath(options.cache_dir) !=
os.path.abspath(global_cache_dir)):
logging.warn('Overriding globally-configured cache directory.')
else:
options.cache_dir = global_cache_dir
except subprocess.CalledProcessError:
if not options.cache_dir:
self.error('No cache directory specified on command line '
'or in cache.cachepath.')
options.cache_dir = os.path.abspath(options.cache_dir)
levels = [logging.WARNING, logging.INFO, logging.DEBUG]
logging.basicConfig(level=levels[min(options.verbose, len(levels) - 1)])
return options, args
def main(argv):
dispatcher = subcommand.CommandDispatcher(__name__)
return dispatcher.execute(OptionParser(), argv)
if __name__ == '__main__':
sys.exit(main(sys.argv[1:]))