Anthony Sottile | 8f61529 | 2022-01-15 19:24:05 -0500 | [diff] [blame] | 1 | from __future__ import annotations |
| 2 | |
Morgan Courbet | fc8a5b2 | 2017-06-13 21:38:14 +0200 | [diff] [blame] | 3 | import argparse |
Anthony Sottile | fbcd096 | 2017-09-05 20:20:43 -0700 | [diff] [blame] | 4 | import collections |
Anthony Sottile | 030bfac | 2019-01-31 19:19:10 -0800 | [diff] [blame] | 5 | from typing import Sequence |
Morgan Courbet | fc8a5b2 | 2017-06-13 21:38:14 +0200 | [diff] [blame] | 6 | |
| 7 | |
Anthony Sottile | fbcd096 | 2017-09-05 20:20:43 -0700 | [diff] [blame] | 8 | CRLF = b'\r\n' |
| 9 | LF = b'\n' |
| 10 | CR = b'\r' |
| 11 | # Prefer LF to CRLF to CR, but detect CRLF before LF |
| 12 | ALL_ENDINGS = (CR, CRLF, LF) |
| 13 | FIX_TO_LINE_ENDING = {'cr': CR, 'crlf': CRLF, 'lf': LF} |
Morgan Courbet | fc8a5b2 | 2017-06-13 21:38:14 +0200 | [diff] [blame] | 14 | |
| 15 | |
Anthony Sottile | f5c42a0 | 2020-02-05 11:10:42 -0800 | [diff] [blame] | 16 | def _fix(filename: str, contents: bytes, ending: bytes) -> None: |
Anthony Sottile | fbcd096 | 2017-09-05 20:20:43 -0700 | [diff] [blame] | 17 | new_contents = b''.join( |
| 18 | line.rstrip(b'\r\n') + ending for line in contents.splitlines(True) |
| 19 | ) |
| 20 | with open(filename, 'wb') as f: |
| 21 | f.write(new_contents) |
Morgan Courbet | fc8a5b2 | 2017-06-13 21:38:14 +0200 | [diff] [blame] | 22 | |
| 23 | |
Anthony Sottile | f5c42a0 | 2020-02-05 11:10:42 -0800 | [diff] [blame] | 24 | def fix_filename(filename: str, fix: str) -> int: |
Anthony Sottile | fbcd096 | 2017-09-05 20:20:43 -0700 | [diff] [blame] | 25 | with open(filename, 'rb') as f: |
| 26 | contents = f.read() |
Morgan Courbet | fc8a5b2 | 2017-06-13 21:38:14 +0200 | [diff] [blame] | 27 | |
Anthony Sottile | 8f61529 | 2022-01-15 19:24:05 -0500 | [diff] [blame] | 28 | counts: dict[bytes, int] = collections.defaultdict(int) |
Morgan Courbet | fc8a5b2 | 2017-06-13 21:38:14 +0200 | [diff] [blame] | 29 | |
Anthony Sottile | fbcd096 | 2017-09-05 20:20:43 -0700 | [diff] [blame] | 30 | for line in contents.splitlines(True): |
| 31 | for ending in ALL_ENDINGS: |
| 32 | if line.endswith(ending): |
| 33 | counts[ending] += 1 |
| 34 | break |
Morgan Courbet | fc8a5b2 | 2017-06-13 21:38:14 +0200 | [diff] [blame] | 35 | |
Anthony Sottile | fbcd096 | 2017-09-05 20:20:43 -0700 | [diff] [blame] | 36 | # Some amount of mixed line endings |
| 37 | mixed = sum(bool(x) for x in counts.values()) > 1 |
Morgan Courbet | fc8a5b2 | 2017-06-13 21:38:14 +0200 | [diff] [blame] | 38 | |
Andy Gimblett | 59ed512 | 2018-11-26 17:29:49 +0000 | [diff] [blame] | 39 | if fix == 'no' or (fix == 'auto' and not mixed): |
Anthony Sottile | fbcd096 | 2017-09-05 20:20:43 -0700 | [diff] [blame] | 40 | return mixed |
Morgan Courbet | fc8a5b2 | 2017-06-13 21:38:14 +0200 | [diff] [blame] | 41 | |
Anthony Sottile | fbcd096 | 2017-09-05 20:20:43 -0700 | [diff] [blame] | 42 | if fix == 'auto': |
| 43 | max_ending = LF |
| 44 | max_lines = 0 |
| 45 | # ordering is important here such that lf > crlf > cr |
| 46 | for ending_type in ALL_ENDINGS: |
| 47 | # also important, using >= to find a max that prefers the last |
| 48 | if counts[ending_type] >= max_lines: |
| 49 | max_ending = ending_type |
| 50 | max_lines = counts[ending_type] |
Morgan Courbet | fc8a5b2 | 2017-06-13 21:38:14 +0200 | [diff] [blame] | 51 | |
Anthony Sottile | fbcd096 | 2017-09-05 20:20:43 -0700 | [diff] [blame] | 52 | _fix(filename, contents, max_ending) |
| 53 | return 1 |
Morgan Courbet | fc8a5b2 | 2017-06-13 21:38:14 +0200 | [diff] [blame] | 54 | else: |
Anthony Sottile | fbcd096 | 2017-09-05 20:20:43 -0700 | [diff] [blame] | 55 | target_ending = FIX_TO_LINE_ENDING[fix] |
| 56 | # find if there are lines with *other* endings |
Anthony Sottile | 76047f6 | 2017-09-27 07:47:24 -0700 | [diff] [blame] | 57 | # It's possible there's no line endings of the target type |
| 58 | counts.pop(target_ending, None) |
Anthony Sottile | fbcd096 | 2017-09-05 20:20:43 -0700 | [diff] [blame] | 59 | other_endings = bool(sum(counts.values())) |
| 60 | if other_endings: |
| 61 | _fix(filename, contents, target_ending) |
| 62 | return other_endings |
Morgan Courbet | fc8a5b2 | 2017-06-13 21:38:14 +0200 | [diff] [blame] | 63 | |
| 64 | |
Anthony Sottile | 8f61529 | 2022-01-15 19:24:05 -0500 | [diff] [blame] | 65 | def main(argv: Sequence[str] | None = None) -> int: |
Morgan Courbet | fc8a5b2 | 2017-06-13 21:38:14 +0200 | [diff] [blame] | 66 | parser = argparse.ArgumentParser() |
| 67 | parser.add_argument( |
Anthony Sottile | fbcd096 | 2017-09-05 20:20:43 -0700 | [diff] [blame] | 68 | '-f', '--fix', |
| 69 | choices=('auto', 'no') + tuple(FIX_TO_LINE_ENDING), |
| 70 | default='auto', |
Morgan Courbet | fc8a5b2 | 2017-06-13 21:38:14 +0200 | [diff] [blame] | 71 | help='Replace line ending with the specified. Default is "auto"', |
| 72 | ) |
| 73 | parser.add_argument('filenames', nargs='*', help='Filenames to fix') |
| 74 | args = parser.parse_args(argv) |
| 75 | |
Anthony Sottile | fbcd096 | 2017-09-05 20:20:43 -0700 | [diff] [blame] | 76 | retv = 0 |
| 77 | for filename in args.filenames: |
Andy Gimblett | 59ed512 | 2018-11-26 17:29:49 +0000 | [diff] [blame] | 78 | if fix_filename(filename, args.fix): |
| 79 | if args.fix == 'no': |
Anthony Sottile | f5c42a0 | 2020-02-05 11:10:42 -0800 | [diff] [blame] | 80 | print(f'{filename}: mixed line endings') |
Andy Gimblett | 59ed512 | 2018-11-26 17:29:49 +0000 | [diff] [blame] | 81 | else: |
Anthony Sottile | f5c42a0 | 2020-02-05 11:10:42 -0800 | [diff] [blame] | 82 | print(f'{filename}: fixed mixed line endings') |
Andy Gimblett | 59ed512 | 2018-11-26 17:29:49 +0000 | [diff] [blame] | 83 | retv = 1 |
Anthony Sottile | fbcd096 | 2017-09-05 20:20:43 -0700 | [diff] [blame] | 84 | return retv |
Morgan Courbet | fc8a5b2 | 2017-06-13 21:38:14 +0200 | [diff] [blame] | 85 | |
| 86 | |
| 87 | if __name__ == '__main__': |
Anthony Sottile | 39ab2ed | 2021-10-23 13:23:50 -0400 | [diff] [blame] | 88 | raise SystemExit(main()) |