Fix #518, provide --enforce-all option to check_added_large_files The --enforce-all option when provided ensures that all files passed on the command line are checked against the size limit. Default behaviour remains unchanged.
diff --git a/README.md b/README.md index 3552721..a6b62ab 100644 --- a/README.md +++ b/README.md
@@ -26,8 +26,11 @@ #### `check-added-large-files` Prevent giant files from being committed. - Specify what is "too large" with `args: ['--maxkb=123']` (default=500kB). + - Limits checked files to those indicated as staged for addition by git. - If `git-lfs` is installed, lfs files will be skipped (requires `git-lfs>=2.2.1`) + - `--enforce-all` - Check all listed files not just those staged for + addition. #### `check-ast` Simply check whether files parse as valid python.
diff --git a/pre_commit_hooks/check_added_large_files.py b/pre_commit_hooks/check_added_large_files.py index 91f5754..cb646d7 100644 --- a/pre_commit_hooks/check_added_large_files.py +++ b/pre_commit_hooks/check_added_large_files.py
@@ -21,11 +21,20 @@ return set(json.loads(lfs_ret)['files']) -def find_large_added_files(filenames: Sequence[str], maxkb: int) -> int: +def find_large_added_files( + filenames: Sequence[str], + maxkb: int, + *, + enforce_all: bool = False, +) -> int: # Find all added files that are also in the list of files pre-commit tells # us about retv = 0 - for filename in (added_files() & set(filenames)) - lfs_files(): + filenames_filtered = set(filenames) - lfs_files() + if not enforce_all: + filenames_filtered &= added_files() + + for filename in filenames_filtered: kb = int(math.ceil(os.stat(filename).st_size / 1024)) if kb > maxkb: print(f'{filename} ({kb} KB) exceeds {maxkb} KB.') @@ -41,12 +50,20 @@ help='Filenames pre-commit believes are changed.', ) parser.add_argument( + '--enforce-all', action='store_true', + help='Enforce all files are checked, not just staged files.', + ) + parser.add_argument( '--maxkb', type=int, default=500, help='Maxmimum allowable KB for added files', ) - args = parser.parse_args(argv) - return find_large_added_files(args.filenames, args.maxkb) + + return find_large_added_files( + args.filenames, + args.maxkb, + enforce_all=args.enforce_all, + ) if __name__ == '__main__':
diff --git a/tests/check_added_large_files_test.py b/tests/check_added_large_files_test.py index 40ffd24..ff53b05 100644 --- a/tests/check_added_large_files_test.py +++ b/tests/check_added_large_files_test.py
@@ -40,6 +40,17 @@ assert find_large_added_files(['f.py'], 10) == 0 +def test_enforce_all(temp_git_dir): + with temp_git_dir.as_cwd(): + temp_git_dir.join('f.py').write('a' * 10000) + + # Should fail, when not staged with enforce_all + assert find_large_added_files(['f.py'], 0, enforce_all=True) == 1 + + # Should pass, when not staged without enforce_all + assert find_large_added_files(['f.py'], 0, enforce_all=False) == 0 + + def test_added_file_not_in_pre_commits_list(temp_git_dir): with temp_git_dir.as_cwd(): temp_git_dir.join('f.py').write("print('hello world')") @@ -97,3 +108,15 @@ # Now move it and make sure the hook still succeeds cmd_output('git', 'mv', 'a.bin', 'b.bin') assert main(('--maxkb', '9', 'b.bin')) == 0 + + +@xfailif_no_gitlfs +def test_enforce_allows_gitlfs(temp_git_dir, monkeypatch): # pragma: no cover + with temp_git_dir.as_cwd(): + monkeypatch.setenv('HOME', str(temp_git_dir)) + cmd_output('git', 'lfs', 'install') + temp_git_dir.join('f.py').write('a' * 10000) + cmd_output('git', 'lfs', 'track', 'f.py') + cmd_output('git', 'add', '--', '.') + # With --enforce-all large files on git lfs should succeed + assert main(('--enforce-all', '--maxkb', '9', 'f.py')) == 0