GSUtil.py wrapper script

This is for transitioning us out of the horribly outdated gsutil 3.4 thats
currently residing in depot_tools.  This script:
1. Downloads GSUtil, pinned to a version (4.6 by default).
2. Executes GSUtil with the given parameters
3. (TODO) Runs the fallback GSUtil if #2 fails.


The transition plan is:
1. Set the fallback as default, making this script a no-op
2. Modify recipes (main consumer of gsutil) to pass in --force_version 4.6
3. Switch 4.6 as the default for gsutil.py
4. Make fallback failure a loud red failure
5. Remove depot_tools/third_party/gsutil

BUG=434422

Review URL: https://codereview.chromium.org/742173002

git-svn-id: svn://svn.chromium.org/chrome/trunk/tools/depot_tools@293320 0039d316-1c4b-4281-b951-d872f2087c98
diff --git a/.gitignore b/.gitignore
index 395a7ff..4db19bf 100644
--- a/.gitignore
+++ b/.gitignore
@@ -24,6 +24,7 @@
 /git_bin
 /git-*_bin
 /svn_bin
+/external_bin
 /win_toolchain/vs2013_files
 /win_toolchain/.timestamps
 /win_toolchain/.vspro
diff --git a/gsutil.py b/gsutil.py
new file mode 100755
index 0000000..e3f50d4
--- /dev/null
+++ b/gsutil.py
@@ -0,0 +1,147 @@
+#!/usr/bin/env python
+# Copyright 2014 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""Run a pinned gsutil."""
+
+
+import argparse
+import shutil
+import zipfile
+import hashlib
+import base64
+import os
+import sys
+import json
+import urllib
+import subprocess
+
+
+GSUTIL_URL = 'https://storage.googleapis.com/pub/'
+API_URL = 'https://www.googleapis.com/storage/v1/b/pub/o/'
+
+THIS_DIR = os.path.dirname(os.path.abspath(__file__))
+DEFAULT_BIN_DIR = os.path.join(THIS_DIR, 'external_bin', 'gsutil')
+DEFAULT_FALLBACK_GSUTIL = os.path.join(
+    THIS_DIR, 'third_party', 'gsutil', 'gsutil')
+
+
+class SubprocessError(Exception):
+  pass
+
+
+class InvalidGsutilError(Exception):
+  pass
+
+
+def call(args, verbose=True, **kwargs):
+  kwargs['stdout'] = subprocess.PIPE
+  kwargs['stderr'] = subprocess.STDOUT
+  proc = subprocess.Popen(args, **kwargs)
+  out = []
+  for line in proc.stdout:
+    out.append(line)
+    if verbose:
+      sys.stdout.write(line)
+  code = proc.wait()
+  if code:
+    raise SubprocessError('%s failed with %s' % (args, code))
+  return ''.join(out)
+
+
+def download_gsutil(version, target_dir):
+  """Downloads gsutil into the target_dir."""
+  filename = 'gsutil_%s.zip' % version
+  target_filename = os.path.join(target_dir, filename)
+
+  # Check if the target exists already.
+  if os.path.exists(target_filename):
+    md5_calc = hashlib.md5()
+    with open(target_filename, 'rb') as f:
+      while True:
+        buf = f.read(4096)
+        if not buf:
+          break
+        md5_calc.update(buf)
+    local_md5 = md5_calc.hexdigest()
+
+    metadata_url = '%s%s' % (API_URL, filename)
+    metadata = json.load(urllib.urlopen(metadata_url))
+    remote_md5 = base64.b64decode(metadata['md5Hash'])
+
+    if local_md5 == remote_md5:
+      return target_filename
+    os.remove(target_filename)
+
+  # Do the download.
+  url = '%s%s' % (GSUTIL_URL, filename)
+  u = urllib.urlopen(url)
+  with open(target_filename, 'wb') as f:
+    while True:
+      buf = u.read(4096)
+      if not buf:
+        break
+      f.write(buf)
+  return target_filename
+
+
+def check_gsutil(gsutil_bin):
+  """Run gsutil version and make sure it runs."""
+  try:
+    call([sys.executable, gsutil_bin, 'version'], verbose=False)
+    return True
+  except SubprocessError:
+    return False
+
+
+def ensure_gsutil(version, target):
+  bin_dir = os.path.join(target, 'gsutil_%s' % version)
+  gsutil_bin = os.path.join(bin_dir, 'gsutil', 'gsutil')
+  if os.path.isfile(gsutil_bin) and check_gsutil(gsutil_bin):
+    # Everything is awesome! we're all done here.
+    return gsutil_bin
+
+  if os.path.isdir(bin_dir):
+    # Clean up if we're redownloading a corrupted gsutil.
+    shutil.rmtree(bin_dir)
+  cache_dir = os.path.join(target, '.cache_dir')
+  if not os.path.isdir(cache_dir):
+    os.makedirs(cache_dir)
+  target_zip_filename = download_gsutil(version, cache_dir)
+  with zipfile.ZipFile(target_zip_filename, 'r') as target_zip:
+    target_zip.extractall(bin_dir)
+
+  # Final check that the gsutil bin is okay.  This should never fail.
+  if not check_gsutil(gsutil_bin):
+    raise InvalidGsutilError()
+
+  return gsutil_bin
+
+
+def run_gsutil(force_version, fallback, target, args):
+  if force_version:
+    gsutil_bin = ensure_gsutil(force_version, target)
+  else:
+    gsutil_bin = fallback
+  cmd = [sys.executable, gsutil_bin] + args
+  call(cmd)
+
+
+def parse_args():
+  parser = argparse.ArgumentParser()
+  parser.add_argument('--force-version')
+  parser.add_argument('--fallback', default=DEFAULT_FALLBACK_GSUTIL)
+  parser.add_argument('--target', default=DEFAULT_BIN_DIR)
+  parser.add_argument('args', nargs=argparse.REMAINDER)
+
+  args = parser.parse_args()
+  return args.force_version, args.fallback, args.target, args.args
+
+
+def main():
+  force_version, fallback, target, args = parse_args()
+  run_gsutil(force_version, fallback, target, args)
+
+if __name__ == '__main__':
+  sys.exit(main())
diff --git a/tests/gsutil_test.py b/tests/gsutil_test.py
new file mode 100755
index 0000000..f815243
--- /dev/null
+++ b/tests/gsutil_test.py
@@ -0,0 +1,171 @@
+#!/usr/bin/env python
+# Copyright 2014 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""Test gsutil.py."""
+
+
+import __builtin__
+import unittest
+import hashlib
+import zipfile
+import shutil
+import sys
+import base64
+import tempfile
+import json
+import os
+import urllib
+
+
+# Add depot_tools to path
+THIS_DIR = os.path.dirname(os.path.abspath(__file__))
+DEPOT_TOOLS_DIR = os.path.dirname(THIS_DIR)
+sys.path.append(DEPOT_TOOLS_DIR)
+
+import gsutil
+
+
+class TestError(Exception):
+  pass
+
+
+class Buffer(object):
+  def __init__(self, data=None):
+    self.data = data or ''
+
+  def write(self, buf):
+    self.data += buf
+
+  def read(self, amount=None):
+    if not amount:
+      amount = len(self.data)
+    result = self.data[:amount]
+    self.data = self.data[amount:]
+    return result
+
+
+class FakeCall(object):
+  def __init__(self):
+    self.expectations = []
+
+  def add_expectation(self, *args, **kwargs):
+    returns = kwargs.pop('_returns', None)
+    self.expectations.append((args, kwargs, returns))
+
+  def __call__(self, *args, **kwargs):
+    if not self.expectations:
+      raise TestError('Got unexpected\n%s\n%s' % (args, kwargs))
+    exp_args, exp_kwargs, exp_returns = self.expectations.pop(0)
+    if args != exp_args or kwargs != exp_kwargs:
+      message = 'Expected:\n  args: %s\n  kwargs: %s\n' % (exp_args, exp_kwargs)
+      message += 'Got:\n  args: %s\n  kwargs: %s\n' % (args, kwargs)
+      raise TestError(message)
+    if isinstance(exp_returns, Exception):
+      raise exp_returns
+    return exp_returns
+
+
+class GsutilUnitTests(unittest.TestCase):
+  def setUp(self):
+    self.fake = FakeCall()
+    self.tempdir = tempfile.mkdtemp()
+    self.old_urlopen = getattr(urllib, 'urlopen')
+    self.old_call = getattr(gsutil, 'call')
+    setattr(urllib, 'urlopen', self.fake)
+    setattr(gsutil, 'call', self.fake)
+
+  def tearDown(self):
+    self.assertEqual(self.fake.expectations, [])
+    shutil.rmtree(self.tempdir)
+    setattr(urllib, 'urlopen', self.old_urlopen)
+    setattr(gsutil, 'call', self.old_call)
+
+  def test_download_gsutil(self):
+    version = '4.2'
+    filename = 'gsutil_%s.zip' % version
+    full_filename = os.path.join(self.tempdir, filename)
+    fake_file = 'This is gsutil.zip'
+    fake_file2 = 'This is other gsutil.zip'
+    url = '%s%s' % (gsutil.GSUTIL_URL, filename)
+    self.fake.add_expectation(url, _returns=Buffer(fake_file))
+
+    self.assertEquals(
+        gsutil.download_gsutil(version, self.tempdir), full_filename)
+    with open(full_filename, 'r') as f:
+      self.assertEquals(fake_file, f.read())
+
+    metadata_url = gsutil.API_URL + filename
+    md5_calc = hashlib.md5()
+    md5_calc.update(fake_file)
+    b64_md5 = base64.b64encode(md5_calc.hexdigest())
+    self.fake.add_expectation(metadata_url, _returns=Buffer(json.dumps({
+        'md5Hash': b64_md5
+    })))
+    self.assertEquals(
+        gsutil.download_gsutil(version, self.tempdir), full_filename)
+    with open(full_filename, 'r') as f:
+      self.assertEquals(fake_file, f.read())
+    self.assertEquals(self.fake.expectations, [])
+
+    self.fake.add_expectation(metadata_url, _returns=Buffer(json.dumps({
+        'md5Hash': base64.b64encode('aaaaaaa')  # Bad MD5
+    })))
+    self.fake.add_expectation(url, _returns=Buffer(fake_file2))
+    self.assertEquals(
+        gsutil.download_gsutil(version, self.tempdir), full_filename)
+    with open(full_filename, 'r') as f:
+      self.assertEquals(fake_file2, f.read())
+    self.assertEquals(self.fake.expectations, [])
+
+  def test_ensure_gsutil_full(self):
+    version = '4.2'
+    gsutil_dir = os.path.join(self.tempdir, 'gsutil_%s' % version, 'gsutil')
+    gsutil_bin = os.path.join(gsutil_dir, 'gsutil')
+    os.makedirs(gsutil_dir)
+
+    self.fake.add_expectation(
+        [sys.executable, gsutil_bin, 'version'], verbose=False,
+        _returns=gsutil.SubprocessError())
+
+    with open(gsutil_bin, 'w') as f:
+      f.write('Foobar')
+    zip_filename = 'gsutil_%s.zip' % version
+    url = '%s%s' % (gsutil.GSUTIL_URL, zip_filename)
+    _, tempzip = tempfile.mkstemp()
+    fake_gsutil = 'Fake gsutil'
+    with zipfile.ZipFile(tempzip, 'w') as zf:
+      zf.writestr('gsutil/gsutil', fake_gsutil)
+    with open(tempzip, 'rb') as f:
+      self.fake.add_expectation(url, _returns=Buffer(f.read()))
+    self.fake.add_expectation(
+        [sys.executable, gsutil_bin, 'version'], verbose=False,
+        _returns=gsutil.SubprocessError())
+
+    # This should delete the old bin and rewrite it with 'Fake gsutil'
+    self.assertRaises(
+        gsutil.InvalidGsutilError, gsutil.ensure_gsutil, version, self.tempdir)
+    self.assertTrue(os.path.isdir(os.path.join(self.tempdir, '.cache_dir')))
+    self.assertTrue(os.path.exists(gsutil_bin))
+    with open(gsutil_bin, 'r') as f:
+      self.assertEquals(f.read(), fake_gsutil)
+    self.assertEquals(self.fake.expectations, [])
+
+  def test_ensure_gsutil_short(self):
+    version = '4.2'
+    gsutil_dir = os.path.join(self.tempdir, 'gsutil_%s' % version, 'gsutil')
+    gsutil_bin = os.path.join(gsutil_dir, 'gsutil')
+    os.makedirs(gsutil_dir)
+
+    # Mock out call().
+    self.fake.add_expectation(
+        [sys.executable, gsutil_bin, 'version'], verbose=False, _returns=True)
+
+    with open(gsutil_bin, 'w') as f:
+      f.write('Foobar')
+    self.assertEquals(
+        gsutil.ensure_gsutil(version, self.tempdir), gsutil_bin)
+
+if __name__ == '__main__':
+  unittest.main()