blob: 73a6785c036bddddc11de8c9e010aae5aad1dd23 [file] [log] [blame]
# Copyright (c) 2009, 2010, 2011 Google Inc. All rights reserved.
# Copyright (c) 2009 Apple Inc. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above
# copyright notice, this list of conditions and the following disclaimer
# in the documentation and/or other materials provided with the
# distribution.
# * Neither the name of Google Inc. nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import datetime
import logging
import re
from webkitpy.common.memoized import memoized
from webkitpy.common.system.executive import Executive, ScriptError
from webkitpy.common.system.filesystem import FileSystem
_log = logging.getLogger(__name__)
class Git(object):
# Unless otherwise specified, methods are expected to return paths relative
# to self.checkout_root.
# Git doesn't appear to document error codes, but seems to return
# 1 or 128, mostly.
ERROR_FILE_IS_MISSING = 128
executable_name = 'git'
def __init__(self, cwd=None, executive=None, filesystem=None):
self._executive = executive or Executive()
self._filesystem = filesystem or FileSystem()
self.cwd = cwd or self._filesystem.abspath(self._filesystem.getcwd())
if not Git.in_working_directory(self.cwd, executive=self._executive):
module_directory = self._filesystem.abspath(
self._filesystem.dirname(self._filesystem.path_to_module(self.__module__)))
_log.info('The current directory (%s) is not in a git repo, trying directory %s.',
cwd, module_directory)
if Git.in_working_directory(module_directory, executive=self._executive):
self.cwd = module_directory
_log.error('Failed to find Git repo for %s or %s', cwd, module_directory)
self._init_executable_name()
self.checkout_root = self.find_checkout_root(self.cwd)
def _init_executable_name(self):
# FIXME: This is a hack and should be removed.
try:
self._executive.run_command(['git', 'help'])
except OSError:
try:
self._executive.run_command(['git.bat', 'help'])
# The Win port uses the depot_tools package, which contains a number
# of development tools, including Python and git. Instead of using a
# real git executable, depot_tools indirects via a batch file, called
# "git.bat". This batch file allows depot_tools to auto-update the real
# git executable, which is contained in a subdirectory.
_log.debug('Engaging git.bat Windows hack.')
self.executable_name = 'git.bat'
except OSError:
_log.debug('Failed to engage git.bat Windows hack.')
def _run_git(self,
command_args,
cwd=None,
input=None, # pylint: disable=redefined-builtin
timeout_seconds=None,
decode_output=True,
return_exit_code=False):
full_command_args = [self.executable_name] + command_args
cwd = cwd or self.checkout_root
return self._executive.run_command(
full_command_args,
cwd=cwd,
input=input,
timeout_seconds=timeout_seconds,
return_exit_code=return_exit_code,
decode_output=decode_output)
# SCM always returns repository relative path, but sometimes we need
# absolute paths to pass to rm, etc.
def absolute_path(self, repository_relative_path):
return self._filesystem.join(self.checkout_root, repository_relative_path)
@classmethod
def in_working_directory(cls, path, executive=None):
try:
executive = executive or Executive()
return executive.run_command([cls.executable_name, 'rev-parse', '--is-inside-work-tree'],
cwd=path, error_handler=Executive.ignore_error).rstrip() == 'true'
except OSError:
# The Windows bots seem to through a WindowsError when git isn't installed.
return False
def find_checkout_root(self, path):
# "git rev-parse --show-cdup" would be another way to get to the root
checkout_root = self._run_git(['rev-parse', '--show-toplevel'], cwd=(path or './')).strip()
if not self._filesystem.isabs(checkout_root): # Sometimes git returns relative paths
checkout_root = self._filesystem.join(path, checkout_root)
return checkout_root
@classmethod
def read_git_config(cls, key, cwd=None, executive=None):
# FIXME: This should probably use cwd=self.checkout_root.
# Pass --get-all for cases where the config has multiple values
# Pass the cwd if provided so that we can handle the case of running webkit-patch outside of the working directory.
# FIXME: This should use an Executive.
executive = executive or Executive()
return executive.run_command(
[cls.executable_name, 'config', '--get-all', key], error_handler=Executive.ignore_error, cwd=cwd).rstrip('\n')
def _discard_local_commits(self):
self._run_git(['reset', '--hard', self._remote_branch_ref()])
def _local_commits(self, ref='HEAD'):
return self._run_git(['log', '--pretty=oneline', ref + '...' + self._remote_branch_ref()]).splitlines()
def _rebase_in_progress(self):
return self._filesystem.exists(self.absolute_path(self._filesystem.join('.git', 'rebase-apply')))
def has_working_directory_changes(self, pathspec=None):
"""Checks whether there are uncommitted changes."""
command = ['diff', 'HEAD', '--no-renames', '--name-only']
if pathspec:
command.extend(['--', pathspec])
return self._run_git(command) != ''
def _discard_working_directory_changes(self):
# Could run git clean here too, but that wouldn't match subversion
self._run_git(['reset', 'HEAD', '--hard'])
# Aborting rebase even though this does not match subversion
if self._rebase_in_progress():
self._run_git(['rebase', '--abort'])
def unstaged_changes(self):
"""Lists files with unstaged changes, including untracked files.
Returns a dict mapping modified file paths (relative to checkout root)
to one-character codes identifying the change, e.g. 'M' for modified,
'D' for deleted, '?' for untracked.
"""
# `git status -z` is a version of `git status -s`, that's recommended
# for machine parsing. Lines are terminated with NUL rather than LF.
change_lines = self._run_git(['status', '-z', '--untracked-files=all']).rstrip('\x00')
if not change_lines:
return {} # No changes.
unstaged_changes = {}
for line in change_lines.split('\x00'):
assert len(line) >= 4, 'Unexpected change line format %s' % line
if line[1] == ' ':
continue # Already staged for commit.
path = line[3:]
unstaged_changes[path] = line[1]
return unstaged_changes
def add_list(self, paths, return_exit_code=False):
return self._run_git(['add'] + paths, return_exit_code=return_exit_code)
def delete_list(self, paths):
return self._run_git(['rm', '-f'] + paths)
def move(self, origin, destination):
return self._run_git(['mv', '-f', origin, destination])
def exists(self, path):
return_code = self._run_git(['show', 'HEAD:%s' % path], return_exit_code=True, decode_output=False)
return return_code != self.ERROR_FILE_IS_MISSING
def _branch_from_ref(self, ref):
return ref.replace('refs/heads/', '')
def current_branch(self):
"""Returns the name of the current branch, or empty string if HEAD is detached."""
ref = self._run_git(['rev-parse', '--symbolic-full-name', 'HEAD']).strip()
if ref == 'HEAD':
# HEAD is detached; return an empty string.
return ''
return self._branch_from_ref(ref)
def current_branch_or_ref(self):
"""Returns the name of the current branch, or the commit hash if HEAD is detached."""
branch_name = self.current_branch()
if not branch_name:
# HEAD is detached; use commit SHA instead.
return self._run_git(['rev-parse', 'HEAD']).strip()
return branch_name
def _upstream_branch(self):
current_branch = self.current_branch()
return self._branch_from_ref(self.read_git_config(
'branch.%s.merge' % current_branch, cwd=self.checkout_root, executive=self._executive).strip())
def _merge_base(self, git_commit=None):
if git_commit:
# Rewrite UPSTREAM to the upstream branch
if 'UPSTREAM' in git_commit:
upstream = self._upstream_branch()
if not upstream:
raise ScriptError(message='No upstream/tracking branch set.')
git_commit = git_commit.replace('UPSTREAM', upstream)
# Special-case <refname>.. to include working copy changes, e.g., 'HEAD....' shows only the diffs from HEAD.
if git_commit.endswith('....'):
return git_commit[:-4]
if '..' not in git_commit:
git_commit = git_commit + '^..' + git_commit
return git_commit
return self._remote_merge_base()
def changed_files(self, git_commit=None, diff_filter='ADM'):
# FIXME: --diff-filter could be used to avoid the "extract_filenames" step.
status_command = ['diff', '-r', '--name-status',
'--no-renames', '--no-ext-diff', '--full-index', self._merge_base(git_commit)]
# FIXME: I'm not sure we're returning the same set of files that SVN.changed_files is.
# Added (A), Copied (C), Deleted (D), Modified (M), Renamed (R)
return self._run_status_and_extract_filenames(status_command, self._status_regexp(diff_filter))
def added_files(self):
return self._run_status_and_extract_filenames(self.status_command(), self._status_regexp('A'))
def _run_status_and_extract_filenames(self, status_command, status_regexp):
filenames = []
# We run with cwd=self.checkout_root so that returned-paths are root-relative.
for line in self._run_git(status_command, cwd=self.checkout_root).splitlines():
match = re.search(status_regexp, line)
if not match:
continue
# status = match.group('status')
filename = match.group('filename')
filenames.append(filename)
return filenames
def status_command(self):
# git status returns non-zero when there are changes, so we use git diff name --name-status HEAD instead.
# No file contents printed, thus utf-8 autodecoding in self.run is fine.
return ['diff', '--name-status', '--no-renames', 'HEAD']
def _status_regexp(self, expected_types):
return '^(?P<status>[%s])\t(?P<filename>.+)$' % expected_types
@staticmethod
def supports_local_commits():
return True
def display_name(self):
return 'git'
def most_recent_log_matching(self, grep_str, path):
# We use '--grep=' + foo rather than '--grep', foo because
# git 1.7.0.4 (and earlier) didn't support the separate arg.
return self._run_git(['log', '-1', '--grep=' + grep_str, '--date=iso', self.find_checkout_root(path)])
def _commit_position_from_git_log(self, git_log):
match = re.search(r"^\s*Cr-Commit-Position:.*@\{#(?P<commit_position>\d+)\}", git_log, re.MULTILINE)
if not match:
return ''
return int(match.group('commit_position'))
def commit_position(self, path):
"""Returns the latest chromium commit position found in the checkout."""
git_log = self.most_recent_log_matching('Cr-Commit-Position:', path)
return self._commit_position_from_git_log(git_log)
def _commit_position_regex_for_timestamp(self):
return 'Cr-Commit-Position:.*@{#%s}'
def timestamp_of_revision(self, path, revision):
git_log = self.most_recent_log_matching(self._commit_position_regex_for_timestamp() % revision, path)
match = re.search(r"^Date:\s*(\d{4})-(\d{2})-(\d{2}) (\d{2}):(\d{2}):(\d{2}) ([+-])(\d{2})(\d{2})$", git_log, re.MULTILINE)
if not match:
return ''
# Manually modify the timezone since Git doesn't have an option to show it in UTC.
# Git also truncates milliseconds but we're going to ignore that for now.
time_with_timezone = datetime.datetime(int(match.group(1)), int(match.group(2)), int(match.group(3)),
int(match.group(4)), int(match.group(5)), int(match.group(6)), 0)
sign = 1 if match.group(7) == '+' else -1
time_without_timezone = time_with_timezone - \
datetime.timedelta(hours=sign * int(match.group(8)), minutes=int(match.group(9)))
return time_without_timezone.strftime('%Y-%m-%dT%H:%M:%SZ')
def create_patch(self, git_commit=None, changed_files=None):
"""Returns a byte array (str()) representing the patch file.
Patch files are effectively binary since they may contain
files of multiple different encodings.
"""
order = self._patch_order()
command = [
'diff',
'--binary',
'--no-color',
'--no-ext-diff',
'--full-index',
'--no-renames',
'--src-prefix=a/',
'--dst-prefix=b/',
]
if order:
command.append(order)
command += [self._merge_base(git_commit), '--']
if changed_files:
command += changed_files
return self._run_git(command, decode_output=False, cwd=self.checkout_root)
def _patch_order(self):
# Put code changes at the top of the patch and layout tests
# at the bottom, this makes for easier reviewing.
config_path = self._filesystem.dirname(self._filesystem.path_to_module('webkitpy.common.config'))
order_file = self._filesystem.join(config_path, 'orderfile')
if self._filesystem.exists(order_file):
return '-O%s' % order_file
return ''
@memoized
def commit_position_from_git_commit(self, git_commit):
git_log = self.git_commit_detail(git_commit)
return self._commit_position_from_git_log(git_log)
def checkout_branch(self, name):
self._run_git(['checkout', '-q', name])
def create_clean_branch(self, name):
self._run_git(['checkout', '-q', '-b', name, self._remote_branch_ref()])
def blame(self, path):
return self._run_git(['blame', '--show-email', path])
# Git-specific methods:
def _branch_ref_exists(self, branch_ref):
return self._run_git(['show-ref', '--quiet', '--verify', branch_ref], return_exit_code=True) == 0
def delete_branch(self, branch_name):
if self._branch_ref_exists('refs/heads/' + branch_name):
self._run_git(['branch', '-D', branch_name])
def _remote_merge_base(self):
return self._run_git(['merge-base', self._remote_branch_ref(), 'HEAD']).strip()
def _remote_branch_ref(self):
# Use references so that we can avoid collisions, e.g. we don't want to operate on refs/heads/trunk if it exists.
remote_master_ref = 'refs/remotes/origin/master'
if not self._branch_ref_exists(remote_master_ref):
raise ScriptError(message="Can't find a branch to diff against. %s does not exist" % remote_master_ref)
return remote_master_ref
def commit_locally_with_message(self, message):
command = ['commit', '--all', '-F', '-']
self._run_git(command, input=message)
def pull(self, timeout_seconds=None):
self._run_git(['pull'], timeout_seconds=timeout_seconds)
def latest_git_commit(self):
return self._run_git(['log', '-1', '--format=%H']).strip()
def git_commits_since(self, commit):
return self._run_git(['log', commit + '..master', '--format=%H', '--reverse']).split()
def git_commit_detail(self, commit, format=None): # pylint: disable=redefined-builtin
args = ['log', '-1', commit]
if format:
args.append('--format=' + format)
return self._run_git(args)
def affected_files(self, commit):
output = self._run_git(['log', '-1', '--format=', '--name-only', commit])
return output.strip().split('\n')
def _branch_tracking_remote_master(self):
origin_info = self._run_git(['remote', 'show', 'origin', '-n'])
match = re.search(r"^\s*(?P<branch_name>\S+)\s+merges with remote master$", origin_info, re.MULTILINE)
if not match:
raise ScriptError(message='Unable to find local branch tracking origin/master.')
branch = str(match.group('branch_name'))
return self._branch_from_ref(self._run_git(['rev-parse', '--symbolic-full-name', branch]).strip())
def is_cleanly_tracking_remote_master(self):
if self.has_working_directory_changes():
return False
if self.current_branch() != self._branch_tracking_remote_master():
return False
if len(self._local_commits(self._branch_tracking_remote_master())) > 0:
return False
return True
def ensure_cleanly_tracking_remote_master(self):
self._discard_working_directory_changes()
self._run_git(['checkout', '-q', self._branch_tracking_remote_master()])
self._discard_local_commits()