blob: 9e0619b094b20c89386e094bbd0c14170b462c51 [file] [log] [blame]
Anthony Sottile8f615292022-01-15 19:24:05 -05001from __future__ import annotations
2
Anthony Sottilee0a6e662014-12-31 12:21:21 -08003import argparse
Anthony Sottilee0a6e662014-12-31 12:21:21 -08004import math
5import os
Alex Martani03a65ca2021-10-21 15:29:54 -07006import subprocess
Anthony Sottile030bfac2019-01-31 19:19:10 -08007from typing import Sequence
Anthony Sottilee0a6e662014-12-31 12:21:21 -08008
gkiselc682b502015-01-07 14:07:32 -08009from pre_commit_hooks.util import added_files
Alex Martani03a65ca2021-10-21 15:29:54 -070010from pre_commit_hooks.util import zsplit
Anthony Sottile3f6f23d2015-12-25 09:25:14 -080011
12
Anthony Sottile8f615292022-01-15 19:24:05 -050013def filter_lfs_files(filenames: set[str]) -> None: # pragma: no cover (lfs)
Alex Martani03a65ca2021-10-21 15:29:54 -070014 """Remove files tracked by git-lfs from the set."""
15 if not filenames:
16 return
Anthony Sottile3f6f23d2015-12-25 09:25:14 -080017
Alex Martani03a65ca2021-10-21 15:29:54 -070018 check_attr = subprocess.run(
19 ('git', 'check-attr', 'filter', '-z', '--stdin'),
20 stdout=subprocess.PIPE,
21 stderr=subprocess.DEVNULL,
22 encoding='utf-8',
23 check=True,
24 input='\0'.join(filenames),
25 )
26 stdout = zsplit(check_attr.stdout)
27 for i in range(0, len(stdout), 3):
28 filename, filter_tag = stdout[i], stdout[i + 2]
29 if filter_tag == 'lfs':
30 filenames.remove(filename)
Anthony Sottilee0a6e662014-12-31 12:21:21 -080031
32
Marcus Shawcroft012bb062020-09-16 06:26:11 +010033def find_large_added_files(
34 filenames: Sequence[str],
35 maxkb: int,
36 *,
37 enforce_all: bool = False,
38) -> int:
Anthony Sottilee0a6e662014-12-31 12:21:21 -080039 # Find all added files that are also in the list of files pre-commit tells
40 # us about
Anthony Sottilee0a6e662014-12-31 12:21:21 -080041 retv = 0
Alex Martani03a65ca2021-10-21 15:29:54 -070042 filenames_filtered = set(filenames)
43 filter_lfs_files(filenames_filtered)
44
Marcus Shawcroft012bb062020-09-16 06:26:11 +010045 if not enforce_all:
46 filenames_filtered &= added_files()
47
48 for filename in filenames_filtered:
Miroslav Šedivý2e4efef2023-09-21 19:54:38 +020049 kb = math.ceil(os.stat(filename).st_size / 1024)
Anthony Sottilee0a6e662014-12-31 12:21:21 -080050 if kb > maxkb:
Anthony Sottilef5c42a02020-02-05 11:10:42 -080051 print(f'{filename} ({kb} KB) exceeds {maxkb} KB.')
Anthony Sottilee0a6e662014-12-31 12:21:21 -080052 retv = 1
53
54 return retv
55
56
Anthony Sottile8f615292022-01-15 19:24:05 -050057def main(argv: Sequence[str] | None = None) -> int:
Anthony Sottilee0a6e662014-12-31 12:21:21 -080058 parser = argparse.ArgumentParser()
59 parser.add_argument(
60 'filenames', nargs='*',
Anthony Sottile2a902e02017-07-12 18:35:24 -070061 help='Filenames pre-commit believes are changed.',
Anthony Sottilee0a6e662014-12-31 12:21:21 -080062 )
63 parser.add_argument(
Marcus Shawcroft012bb062020-09-16 06:26:11 +010064 '--enforce-all', action='store_true',
65 help='Enforce all files are checked, not just staged files.',
66 )
67 parser.add_argument(
Anthony Sottilee0a6e662014-12-31 12:21:21 -080068 '--maxkb', type=int, default=500,
Janosh Riebesell05a52972021-10-23 12:11:03 +010069 help='Maximum allowable KB for added files',
Anthony Sottilee0a6e662014-12-31 12:21:21 -080070 )
Anthony Sottilee0a6e662014-12-31 12:21:21 -080071 args = parser.parse_args(argv)
Marcus Shawcroft012bb062020-09-16 06:26:11 +010072
73 return find_large_added_files(
74 args.filenames,
75 args.maxkb,
76 enforce_all=args.enforce_all,
77 )
Anthony Sottilee0a6e662014-12-31 12:21:21 -080078
79
80if __name__ == '__main__':
Anthony Sottile39ab2ed2021-10-23 13:23:50 -040081 raise SystemExit(main())