blob: 60c71eeb84dc997298ce2c0bce4cf800afc26275 [file] [log] [blame]
Anthony Sottile8f615292022-01-15 19:24:05 -05001from __future__ import annotations
2
Anthony Sottileaa2ba6f2015-11-13 12:34:37 -08003import argparse
Anthony Sottile030bfac2019-01-31 19:19:10 -08004from typing import IO
Anthony Sottilef5c42a02020-02-05 11:10:42 -08005from typing import NamedTuple
Anthony Sottile030bfac2019-01-31 19:19:10 -08006from typing import Sequence
Anthony Sottileaa2ba6f2015-11-13 12:34:37 -08007
Anthony Sottile79a1b262019-05-15 10:04:18 -07008DEFAULT_PRAGMA = b'# -*- coding: utf-8 -*-'
Anthony Sottileaa2ba6f2015-11-13 12:34:37 -08009
10
Anthony Sottilef5c42a02020-02-05 11:10:42 -080011def has_coding(line: bytes) -> bool:
Anthony Sottileaa2ba6f2015-11-13 12:34:37 -080012 if not line.strip():
13 return False
14 return (
Anthony Sottile79a1b262019-05-15 10:04:18 -070015 line.lstrip()[:1] == b'#' and (
Anthony Sottileaa2ba6f2015-11-13 12:34:37 -080016 b'unicode' in line or
17 b'encoding' in line or
18 b'coding:' in line or
19 b'coding=' in line
20 )
21 )
22
23
Anthony Sottilef5c42a02020-02-05 11:10:42 -080024class ExpectedContents(NamedTuple):
25 shebang: bytes
26 rest: bytes
27 # True: has exactly the coding pragma expected
28 # False: missing coding pragma entirely
29 # None: has a coding pragma, but it does not match
Anthony Sottile8f615292022-01-15 19:24:05 -050030 pragma_status: bool | None
Anthony Sottilef5c42a02020-02-05 11:10:42 -080031 ending: bytes
Anthony Sottileaa2ba6f2015-11-13 12:34:37 -080032
Anthony Sottile03bf17f2016-04-27 11:18:14 -070033 @property
Anthony Sottilef5c42a02020-02-05 11:10:42 -080034 def has_any_pragma(self) -> bool:
Anthony Sottile03bf17f2016-04-27 11:18:14 -070035 return self.pragma_status is not False
Anthony Sottileaa2ba6f2015-11-13 12:34:37 -080036
Anthony Sottilef5c42a02020-02-05 11:10:42 -080037 def is_expected_pragma(self, remove: bool) -> bool:
Anthony Sottile03bf17f2016-04-27 11:18:14 -070038 expected_pragma_status = not remove
39 return self.pragma_status is expected_pragma_status
Anthony Sottileaa2ba6f2015-11-13 12:34:37 -080040
Anthony Sottileaa2ba6f2015-11-13 12:34:37 -080041
Anthony Sottilef5c42a02020-02-05 11:10:42 -080042def _get_expected_contents(
43 first_line: bytes,
44 second_line: bytes,
45 rest: bytes,
46 expected_pragma: bytes,
47) -> ExpectedContents:
Anthony Sottile79a1b262019-05-15 10:04:18 -070048 ending = b'\r\n' if first_line.endswith(b'\r\n') else b'\n'
49
Anthony Sottileaa2ba6f2015-11-13 12:34:37 -080050 if first_line.startswith(b'#!'):
Anthony Sottile03bf17f2016-04-27 11:18:14 -070051 shebang = first_line
52 potential_coding = second_line
Anthony Sottileaa2ba6f2015-11-13 12:34:37 -080053 else:
Anthony Sottile03bf17f2016-04-27 11:18:14 -070054 shebang = b''
55 potential_coding = first_line
56 rest = second_line + rest
57
Anthony Sottile79a1b262019-05-15 10:04:18 -070058 if potential_coding.rstrip(b'\r\n') == expected_pragma:
Anthony Sottile8f615292022-01-15 19:24:05 -050059 pragma_status: bool | None = True
Anthony Sottile03bf17f2016-04-27 11:18:14 -070060 elif has_coding(potential_coding):
61 pragma_status = None
62 else:
63 pragma_status = False
64 rest = potential_coding + rest
65
66 return ExpectedContents(
Anthony Sottile79a1b262019-05-15 10:04:18 -070067 shebang=shebang, rest=rest, pragma_status=pragma_status, ending=ending,
Anthony Sottile03bf17f2016-04-27 11:18:14 -070068 )
69
70
Anthony Sottilef5c42a02020-02-05 11:10:42 -080071def fix_encoding_pragma(
72 f: IO[bytes],
73 remove: bool = False,
74 expected_pragma: bytes = DEFAULT_PRAGMA,
75) -> int:
Anthony Sottile693709e2016-08-11 22:56:54 -070076 expected = _get_expected_contents(
77 f.readline(), f.readline(), f.read(), expected_pragma,
78 )
Anthony Sottile03bf17f2016-04-27 11:18:14 -070079
80 # Special cases for empty files
81 if not expected.rest.strip():
82 # If a file only has a shebang or a coding pragma, remove it
83 if expected.has_any_pragma or expected.shebang:
84 f.seek(0)
85 f.truncate()
86 f.write(b'')
87 return 1
88 else:
89 return 0
90
91 if expected.is_expected_pragma(remove):
92 return 0
93
94 # Otherwise, write out the new file
95 f.seek(0)
96 f.truncate()
97 f.write(expected.shebang)
98 if not remove:
Anthony Sottile79a1b262019-05-15 10:04:18 -070099 f.write(expected_pragma + expected.ending)
Anthony Sottile03bf17f2016-04-27 11:18:14 -0700100 f.write(expected.rest)
Anthony Sottileaa2ba6f2015-11-13 12:34:37 -0800101
102 return 1
103
104
Anthony Sottilef5c42a02020-02-05 11:10:42 -0800105def _normalize_pragma(pragma: str) -> bytes:
106 return pragma.encode().rstrip()
Anthony Sottile693709e2016-08-11 22:56:54 -0700107
108
Anthony Sottile8f615292022-01-15 19:24:05 -0500109def main(argv: Sequence[str] | None = None) -> int:
Anthony Sottile45756522019-02-11 19:56:15 -0800110 parser = argparse.ArgumentParser(
111 'Fixes the encoding pragma of python files',
112 )
Anthony Sottileaa2ba6f2015-11-13 12:34:37 -0800113 parser.add_argument('filenames', nargs='*', help='Filenames to fix')
Anthony Sottile03bf17f2016-04-27 11:18:14 -0700114 parser.add_argument(
Anthony Sottile693709e2016-08-11 22:56:54 -0700115 '--pragma', default=DEFAULT_PRAGMA, type=_normalize_pragma,
Anthony Sottilef5c42a02020-02-05 11:10:42 -0800116 help=(
117 f'The encoding pragma to use. '
118 f'Default: {DEFAULT_PRAGMA.decode()}'
Anthony Sottile693709e2016-08-11 22:56:54 -0700119 ),
120 )
121 parser.add_argument(
Anthony Sottile03bf17f2016-04-27 11:18:14 -0700122 '--remove', action='store_true',
123 help='Remove the encoding pragma (Useful in a python3-only codebase)',
124 )
Anthony Sottileaa2ba6f2015-11-13 12:34:37 -0800125 args = parser.parse_args(argv)
126
127 retv = 0
128
Anthony Sottile03bf17f2016-04-27 11:18:14 -0700129 if args.remove:
130 fmt = 'Removed encoding pragma from {filename}'
131 else:
132 fmt = 'Added `{pragma}` to {filename}'
133
Anthony Sottileaa2ba6f2015-11-13 12:34:37 -0800134 for filename in args.filenames:
Anthony Sottile03bf17f2016-04-27 11:18:14 -0700135 with open(filename, 'r+b') as f:
Anthony Sottile693709e2016-08-11 22:56:54 -0700136 file_ret = fix_encoding_pragma(
137 f, remove=args.remove, expected_pragma=args.pragma,
138 )
Anthony Sottileaa2ba6f2015-11-13 12:34:37 -0800139 retv |= file_ret
140 if file_ret:
Anthony Sottilefea76b92020-02-03 08:41:48 -0800141 print(
Anthony Sottilef5c42a02020-02-05 11:10:42 -0800142 fmt.format(pragma=args.pragma.decode(), filename=filename),
Anthony Sottilefea76b92020-02-03 08:41:48 -0800143 )
Anthony Sottileaa2ba6f2015-11-13 12:34:37 -0800144
145 return retv
146
Anthony Sottile70e405e2016-11-30 09:56:42 -0800147
Anthony Sottile8626e262019-02-11 19:57:37 -0800148if __name__ == '__main__':
Anthony Sottile39ab2ed2021-10-23 13:23:50 -0400149 raise SystemExit(main())