Add check-added-large-files hook
diff --git a/README.md b/README.md
index 98e3ac7..8d17026 100644
--- a/README.md
+++ b/README.md
@@ -23,6 +23,7 @@
### Hooks available
- `autopep8-wrapper` - Runs autopep8 over python source.
+' `check-added-large-files` - Prevent giant files from being committed.
- `check-json` - Attempts to load all json files to verify syntax.
- `check-yaml` - Attempts to load all yaml files to verify syntax.
- `debug-statements` - Check for pdb / ipdb / pudb statements in code.
diff --git a/hooks.yaml b/hooks.yaml
index 5ef125c..3eb0e94 100644
--- a/hooks.yaml
+++ b/hooks.yaml
@@ -5,6 +5,13 @@
language: python
files: \.py$
args: [-i]
+- id: check-added-large-files
+ name: Check for added large files
+ language: python
+ entry: check-added-large-files
+ description: Prevent giant files from being committed
+ # Match all files
+ files: ''
- id: check-json
name: Check JSON
description: This hook checks json files for parseable syntax.
diff --git a/pre_commit_hooks/check_added_large_files.py b/pre_commit_hooks/check_added_large_files.py
new file mode 100644
index 0000000..999e9c1
--- /dev/null
+++ b/pre_commit_hooks/check_added_large_files.py
@@ -0,0 +1,49 @@
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+import argparse
+import math
+import os
+import sys
+
+from plumbum import local
+
+
+def find_large_added_files(filenames, maxkb):
+ # Find all added files that are also in the list of files pre-commit tells
+ # us about
+ filenames = set(local['git'](
+ 'diff', '--staged', '--name-only', '--diff-filter', 'A',
+ ).splitlines()) & set(filenames)
+
+ retv = 0
+ for filename in filenames:
+ kb = int(math.ceil(os.stat(filename).st_size / 1024))
+ if kb > maxkb:
+ print('{0} ({1} KB) exceeds {2} KB.'.format(filename, kb, maxkb))
+ retv = 1
+
+ return retv
+
+
+def main(argv=None):
+ argv = argv if argv is not None else sys.argv[1:]
+
+ parser = argparse.ArgumentParser()
+ parser.add_argument(
+ 'filenames', nargs='*',
+ help='Filenames pre-commit believes are changed.'
+ )
+ parser.add_argument(
+ '--maxkb', type=int, default=500,
+ help='Maxmimum allowable KB for added files',
+ )
+
+ args = parser.parse_args(argv)
+ return find_large_added_files(args.filenames, args.maxkb)
+
+
+if __name__ == '__main__':
+ exit(main())
diff --git a/setup.py b/setup.py
index 8905945..3681e65 100644
--- a/setup.py
+++ b/setup.py
@@ -38,6 +38,7 @@
'console_scripts': [
'autopep8-wrapper = pre_commit_hooks.autopep8_wrapper:main',
'check-json = pre_commit_hooks.check_json:check_json',
+ 'check-added-large-files = pre_commit_hooks.check_added_large_files:main',
'check-yaml = pre_commit_hooks.check_yaml:check_yaml',
'debug-statement-hook = pre_commit_hooks.debug_statement_hook:debug_statement_hook',
'end-of-file-fixer = pre_commit_hooks.end_of_file_fixer:end_of_file_fixer',
diff --git a/tests/check_added_large_files_test.py b/tests/check_added_large_files_test.py
new file mode 100644
index 0000000..b0ae4cd
--- /dev/null
+++ b/tests/check_added_large_files_test.py
@@ -0,0 +1,79 @@
+from __future__ import absolute_import
+from __future__ import unicode_literals
+
+import io
+
+import pytest
+from plumbum import local
+
+from pre_commit_hooks.check_added_large_files import find_large_added_files
+from pre_commit_hooks.check_added_large_files import main
+
+
+@pytest.yield_fixture
+def temp_git_dir(tmpdir):
+ git_dir = tmpdir.join('gits').strpath
+ local['git']('init', git_dir)
+ yield git_dir
+
+
+def write_file(filename, contents):
+ """Hax because coveragepy chokes on nested context managers."""
+ with io.open(filename, 'w') as file_obj:
+ file_obj.write(contents)
+
+
+def test_nothing_added(temp_git_dir):
+ with local.cwd(temp_git_dir):
+ assert find_large_added_files(['f.py'], 0) == 0
+
+
+def test_adding_something(temp_git_dir):
+ with local.cwd(temp_git_dir):
+ write_file('f.py', "print('hello world')")
+ local['git']('add', 'f.py')
+
+ # Should fail with max size of 0
+ assert find_large_added_files(['f.py'], 0) == 1
+
+
+def test_add_something_giant(temp_git_dir):
+ with local.cwd(temp_git_dir):
+ write_file('f.py', 'a' * 10000)
+
+ # Should not fail when not added
+ assert find_large_added_files(['f.py'], 0) == 0
+
+ local['git']('add', 'f.py')
+
+ # Should fail with strict bound
+ assert find_large_added_files(['f.py'], 0) == 1
+
+ # Should also fail with actual bound
+ assert find_large_added_files(['f.py'], 9) == 1
+
+ # Should pass with higher bound
+ assert find_large_added_files(['f.py'], 10) == 0
+
+
+def test_added_file_not_in_pre_commits_list(temp_git_dir):
+ with local.cwd(temp_git_dir):
+ write_file('f.py', "print('hello world')")
+ local['git']('add', 'f.py')
+
+ # Should pass even with a size of 0
+ assert find_large_added_files(['g.py'], 0) == 0
+
+
+def test_integration(temp_git_dir):
+ with local.cwd(temp_git_dir):
+ assert main(argv=[]) == 0
+
+ write_file('f.py', 'a' * 10000)
+ local['git']('add', 'f.py')
+
+ # Should not fail with default
+ assert main(argv=['f.py']) == 0
+
+ # Should fail with --maxkb
+ assert main(argv=['--maxkb', '9', 'f.py']) == 1