Anthony Sottile | 8f61529 | 2022-01-15 19:24:05 -0500 | [diff] [blame] | 1 | from __future__ import annotations |
| 2 | |
Anthony Sottile | aa2ba6f | 2015-11-13 12:34:37 -0800 | [diff] [blame] | 3 | import argparse |
Anthony Sottile | 030bfac | 2019-01-31 19:19:10 -0800 | [diff] [blame] | 4 | from typing import IO |
Anthony Sottile | f5c42a0 | 2020-02-05 11:10:42 -0800 | [diff] [blame] | 5 | from typing import NamedTuple |
Anthony Sottile | 030bfac | 2019-01-31 19:19:10 -0800 | [diff] [blame] | 6 | from typing import Sequence |
Anthony Sottile | aa2ba6f | 2015-11-13 12:34:37 -0800 | [diff] [blame] | 7 | |
Anthony Sottile | 79a1b26 | 2019-05-15 10:04:18 -0700 | [diff] [blame] | 8 | DEFAULT_PRAGMA = b'# -*- coding: utf-8 -*-' |
Anthony Sottile | aa2ba6f | 2015-11-13 12:34:37 -0800 | [diff] [blame] | 9 | |
| 10 | |
Anthony Sottile | f5c42a0 | 2020-02-05 11:10:42 -0800 | [diff] [blame] | 11 | def has_coding(line: bytes) -> bool: |
Anthony Sottile | aa2ba6f | 2015-11-13 12:34:37 -0800 | [diff] [blame] | 12 | if not line.strip(): |
| 13 | return False |
| 14 | return ( |
Anthony Sottile | 79a1b26 | 2019-05-15 10:04:18 -0700 | [diff] [blame] | 15 | line.lstrip()[:1] == b'#' and ( |
Anthony Sottile | aa2ba6f | 2015-11-13 12:34:37 -0800 | [diff] [blame] | 16 | b'unicode' in line or |
| 17 | b'encoding' in line or |
| 18 | b'coding:' in line or |
| 19 | b'coding=' in line |
| 20 | ) |
| 21 | ) |
| 22 | |
| 23 | |
Anthony Sottile | f5c42a0 | 2020-02-05 11:10:42 -0800 | [diff] [blame] | 24 | class ExpectedContents(NamedTuple): |
| 25 | shebang: bytes |
| 26 | rest: bytes |
| 27 | # True: has exactly the coding pragma expected |
| 28 | # False: missing coding pragma entirely |
| 29 | # None: has a coding pragma, but it does not match |
Anthony Sottile | 8f61529 | 2022-01-15 19:24:05 -0500 | [diff] [blame] | 30 | pragma_status: bool | None |
Anthony Sottile | f5c42a0 | 2020-02-05 11:10:42 -0800 | [diff] [blame] | 31 | ending: bytes |
Anthony Sottile | aa2ba6f | 2015-11-13 12:34:37 -0800 | [diff] [blame] | 32 | |
Anthony Sottile | 03bf17f | 2016-04-27 11:18:14 -0700 | [diff] [blame] | 33 | @property |
Anthony Sottile | f5c42a0 | 2020-02-05 11:10:42 -0800 | [diff] [blame] | 34 | def has_any_pragma(self) -> bool: |
Anthony Sottile | 03bf17f | 2016-04-27 11:18:14 -0700 | [diff] [blame] | 35 | return self.pragma_status is not False |
Anthony Sottile | aa2ba6f | 2015-11-13 12:34:37 -0800 | [diff] [blame] | 36 | |
Anthony Sottile | f5c42a0 | 2020-02-05 11:10:42 -0800 | [diff] [blame] | 37 | def is_expected_pragma(self, remove: bool) -> bool: |
Anthony Sottile | 03bf17f | 2016-04-27 11:18:14 -0700 | [diff] [blame] | 38 | expected_pragma_status = not remove |
| 39 | return self.pragma_status is expected_pragma_status |
Anthony Sottile | aa2ba6f | 2015-11-13 12:34:37 -0800 | [diff] [blame] | 40 | |
Anthony Sottile | aa2ba6f | 2015-11-13 12:34:37 -0800 | [diff] [blame] | 41 | |
Anthony Sottile | f5c42a0 | 2020-02-05 11:10:42 -0800 | [diff] [blame] | 42 | def _get_expected_contents( |
| 43 | first_line: bytes, |
| 44 | second_line: bytes, |
| 45 | rest: bytes, |
| 46 | expected_pragma: bytes, |
| 47 | ) -> ExpectedContents: |
Anthony Sottile | 79a1b26 | 2019-05-15 10:04:18 -0700 | [diff] [blame] | 48 | ending = b'\r\n' if first_line.endswith(b'\r\n') else b'\n' |
| 49 | |
Anthony Sottile | aa2ba6f | 2015-11-13 12:34:37 -0800 | [diff] [blame] | 50 | if first_line.startswith(b'#!'): |
Anthony Sottile | 03bf17f | 2016-04-27 11:18:14 -0700 | [diff] [blame] | 51 | shebang = first_line |
| 52 | potential_coding = second_line |
Anthony Sottile | aa2ba6f | 2015-11-13 12:34:37 -0800 | [diff] [blame] | 53 | else: |
Anthony Sottile | 03bf17f | 2016-04-27 11:18:14 -0700 | [diff] [blame] | 54 | shebang = b'' |
| 55 | potential_coding = first_line |
| 56 | rest = second_line + rest |
| 57 | |
Anthony Sottile | 79a1b26 | 2019-05-15 10:04:18 -0700 | [diff] [blame] | 58 | if potential_coding.rstrip(b'\r\n') == expected_pragma: |
Anthony Sottile | 8f61529 | 2022-01-15 19:24:05 -0500 | [diff] [blame] | 59 | pragma_status: bool | None = True |
Anthony Sottile | 03bf17f | 2016-04-27 11:18:14 -0700 | [diff] [blame] | 60 | elif has_coding(potential_coding): |
| 61 | pragma_status = None |
| 62 | else: |
| 63 | pragma_status = False |
| 64 | rest = potential_coding + rest |
| 65 | |
| 66 | return ExpectedContents( |
Anthony Sottile | 79a1b26 | 2019-05-15 10:04:18 -0700 | [diff] [blame] | 67 | shebang=shebang, rest=rest, pragma_status=pragma_status, ending=ending, |
Anthony Sottile | 03bf17f | 2016-04-27 11:18:14 -0700 | [diff] [blame] | 68 | ) |
| 69 | |
| 70 | |
Anthony Sottile | f5c42a0 | 2020-02-05 11:10:42 -0800 | [diff] [blame] | 71 | def fix_encoding_pragma( |
| 72 | f: IO[bytes], |
| 73 | remove: bool = False, |
| 74 | expected_pragma: bytes = DEFAULT_PRAGMA, |
| 75 | ) -> int: |
Anthony Sottile | 693709e | 2016-08-11 22:56:54 -0700 | [diff] [blame] | 76 | expected = _get_expected_contents( |
| 77 | f.readline(), f.readline(), f.read(), expected_pragma, |
| 78 | ) |
Anthony Sottile | 03bf17f | 2016-04-27 11:18:14 -0700 | [diff] [blame] | 79 | |
| 80 | # Special cases for empty files |
| 81 | if not expected.rest.strip(): |
| 82 | # If a file only has a shebang or a coding pragma, remove it |
| 83 | if expected.has_any_pragma or expected.shebang: |
| 84 | f.seek(0) |
| 85 | f.truncate() |
| 86 | f.write(b'') |
| 87 | return 1 |
| 88 | else: |
| 89 | return 0 |
| 90 | |
| 91 | if expected.is_expected_pragma(remove): |
| 92 | return 0 |
| 93 | |
| 94 | # Otherwise, write out the new file |
| 95 | f.seek(0) |
| 96 | f.truncate() |
| 97 | f.write(expected.shebang) |
| 98 | if not remove: |
Anthony Sottile | 79a1b26 | 2019-05-15 10:04:18 -0700 | [diff] [blame] | 99 | f.write(expected_pragma + expected.ending) |
Anthony Sottile | 03bf17f | 2016-04-27 11:18:14 -0700 | [diff] [blame] | 100 | f.write(expected.rest) |
Anthony Sottile | aa2ba6f | 2015-11-13 12:34:37 -0800 | [diff] [blame] | 101 | |
| 102 | return 1 |
| 103 | |
| 104 | |
Anthony Sottile | f5c42a0 | 2020-02-05 11:10:42 -0800 | [diff] [blame] | 105 | def _normalize_pragma(pragma: str) -> bytes: |
| 106 | return pragma.encode().rstrip() |
Anthony Sottile | 693709e | 2016-08-11 22:56:54 -0700 | [diff] [blame] | 107 | |
| 108 | |
Anthony Sottile | 8f61529 | 2022-01-15 19:24:05 -0500 | [diff] [blame] | 109 | def main(argv: Sequence[str] | None = None) -> int: |
Anthony Sottile | 4575652 | 2019-02-11 19:56:15 -0800 | [diff] [blame] | 110 | parser = argparse.ArgumentParser( |
| 111 | 'Fixes the encoding pragma of python files', |
| 112 | ) |
Anthony Sottile | aa2ba6f | 2015-11-13 12:34:37 -0800 | [diff] [blame] | 113 | parser.add_argument('filenames', nargs='*', help='Filenames to fix') |
Anthony Sottile | 03bf17f | 2016-04-27 11:18:14 -0700 | [diff] [blame] | 114 | parser.add_argument( |
Anthony Sottile | 693709e | 2016-08-11 22:56:54 -0700 | [diff] [blame] | 115 | '--pragma', default=DEFAULT_PRAGMA, type=_normalize_pragma, |
Anthony Sottile | f5c42a0 | 2020-02-05 11:10:42 -0800 | [diff] [blame] | 116 | help=( |
| 117 | f'The encoding pragma to use. ' |
| 118 | f'Default: {DEFAULT_PRAGMA.decode()}' |
Anthony Sottile | 693709e | 2016-08-11 22:56:54 -0700 | [diff] [blame] | 119 | ), |
| 120 | ) |
| 121 | parser.add_argument( |
Anthony Sottile | 03bf17f | 2016-04-27 11:18:14 -0700 | [diff] [blame] | 122 | '--remove', action='store_true', |
| 123 | help='Remove the encoding pragma (Useful in a python3-only codebase)', |
| 124 | ) |
Anthony Sottile | aa2ba6f | 2015-11-13 12:34:37 -0800 | [diff] [blame] | 125 | args = parser.parse_args(argv) |
| 126 | |
| 127 | retv = 0 |
| 128 | |
Anthony Sottile | 03bf17f | 2016-04-27 11:18:14 -0700 | [diff] [blame] | 129 | if args.remove: |
| 130 | fmt = 'Removed encoding pragma from {filename}' |
| 131 | else: |
| 132 | fmt = 'Added `{pragma}` to {filename}' |
| 133 | |
Anthony Sottile | aa2ba6f | 2015-11-13 12:34:37 -0800 | [diff] [blame] | 134 | for filename in args.filenames: |
Anthony Sottile | 03bf17f | 2016-04-27 11:18:14 -0700 | [diff] [blame] | 135 | with open(filename, 'r+b') as f: |
Anthony Sottile | 693709e | 2016-08-11 22:56:54 -0700 | [diff] [blame] | 136 | file_ret = fix_encoding_pragma( |
| 137 | f, remove=args.remove, expected_pragma=args.pragma, |
| 138 | ) |
Anthony Sottile | aa2ba6f | 2015-11-13 12:34:37 -0800 | [diff] [blame] | 139 | retv |= file_ret |
| 140 | if file_ret: |
Anthony Sottile | fea76b9 | 2020-02-03 08:41:48 -0800 | [diff] [blame] | 141 | print( |
Anthony Sottile | f5c42a0 | 2020-02-05 11:10:42 -0800 | [diff] [blame] | 142 | fmt.format(pragma=args.pragma.decode(), filename=filename), |
Anthony Sottile | fea76b9 | 2020-02-03 08:41:48 -0800 | [diff] [blame] | 143 | ) |
Anthony Sottile | aa2ba6f | 2015-11-13 12:34:37 -0800 | [diff] [blame] | 144 | |
| 145 | return retv |
| 146 | |
Anthony Sottile | 70e405e | 2016-11-30 09:56:42 -0800 | [diff] [blame] | 147 | |
Anthony Sottile | 8626e26 | 2019-02-11 19:57:37 -0800 | [diff] [blame] | 148 | if __name__ == '__main__': |
Anthony Sottile | 39ab2ed | 2021-10-23 13:23:50 -0400 | [diff] [blame] | 149 | raise SystemExit(main()) |