| from __future__ import absolute_import |
| from __future__ import print_function |
| from __future__ import unicode_literals |
| |
| import argparse |
| import collections |
| from typing import IO |
| from typing import Optional |
| from typing import Sequence |
| from typing import Union |
| |
| DEFAULT_PRAGMA = b'# -*- coding: utf-8 -*-\n' |
| |
| |
| def has_coding(line): # type: (bytes) -> bool |
| if not line.strip(): |
| return False |
| return ( |
| line.lstrip()[0:1] == b'#' and ( |
| b'unicode' in line or |
| b'encoding' in line or |
| b'coding:' in line or |
| b'coding=' in line |
| ) |
| ) |
| |
| |
| class ExpectedContents(collections.namedtuple( |
| 'ExpectedContents', ('shebang', 'rest', 'pragma_status'), |
| )): |
| """ |
| pragma_status: |
| - True: has exactly the coding pragma expected |
| - False: missing coding pragma entirely |
| - None: has a coding pragma, but it does not match |
| """ |
| __slots__ = () |
| |
| @property |
| def has_any_pragma(self): # type: () -> bool |
| return self.pragma_status is not False |
| |
| def is_expected_pragma(self, remove): # type: (bool) -> bool |
| expected_pragma_status = not remove |
| return self.pragma_status is expected_pragma_status |
| |
| |
| def _get_expected_contents(first_line, second_line, rest, expected_pragma): |
| # type: (bytes, bytes, bytes, bytes) -> ExpectedContents |
| if first_line.startswith(b'#!'): |
| shebang = first_line |
| potential_coding = second_line |
| else: |
| shebang = b'' |
| potential_coding = first_line |
| rest = second_line + rest |
| |
| if potential_coding == expected_pragma: |
| pragma_status = True # type: Optional[bool] |
| elif has_coding(potential_coding): |
| pragma_status = None |
| else: |
| pragma_status = False |
| rest = potential_coding + rest |
| |
| return ExpectedContents( |
| shebang=shebang, rest=rest, pragma_status=pragma_status, |
| ) |
| |
| |
| def fix_encoding_pragma(f, remove=False, expected_pragma=DEFAULT_PRAGMA): |
| # type: (IO[bytes], bool, bytes) -> int |
| expected = _get_expected_contents( |
| f.readline(), f.readline(), f.read(), expected_pragma, |
| ) |
| |
| # Special cases for empty files |
| if not expected.rest.strip(): |
| # If a file only has a shebang or a coding pragma, remove it |
| if expected.has_any_pragma or expected.shebang: |
| f.seek(0) |
| f.truncate() |
| f.write(b'') |
| return 1 |
| else: |
| return 0 |
| |
| if expected.is_expected_pragma(remove): |
| return 0 |
| |
| # Otherwise, write out the new file |
| f.seek(0) |
| f.truncate() |
| f.write(expected.shebang) |
| if not remove: |
| f.write(expected_pragma) |
| f.write(expected.rest) |
| |
| return 1 |
| |
| |
| def _normalize_pragma(pragma): # type: (Union[bytes, str]) -> bytes |
| if not isinstance(pragma, bytes): |
| pragma = pragma.encode('UTF-8') |
| return pragma.rstrip() + b'\n' |
| |
| |
| def _to_disp(pragma): # type: (bytes) -> str |
| return pragma.decode().rstrip() |
| |
| |
| def main(argv=None): # type: (Optional[Sequence[str]]) -> int |
| parser = argparse.ArgumentParser( |
| 'Fixes the encoding pragma of python files', |
| ) |
| parser.add_argument('filenames', nargs='*', help='Filenames to fix') |
| parser.add_argument( |
| '--pragma', default=DEFAULT_PRAGMA, type=_normalize_pragma, |
| help='The encoding pragma to use. Default: {}'.format( |
| _to_disp(DEFAULT_PRAGMA), |
| ), |
| ) |
| parser.add_argument( |
| '--remove', action='store_true', |
| help='Remove the encoding pragma (Useful in a python3-only codebase)', |
| ) |
| args = parser.parse_args(argv) |
| |
| retv = 0 |
| |
| if args.remove: |
| fmt = 'Removed encoding pragma from {filename}' |
| else: |
| fmt = 'Added `{pragma}` to {filename}' |
| |
| for filename in args.filenames: |
| with open(filename, 'r+b') as f: |
| file_ret = fix_encoding_pragma( |
| f, remove=args.remove, expected_pragma=args.pragma, |
| ) |
| retv |= file_ret |
| if file_ret: |
| print(fmt.format( |
| pragma=_to_disp(args.pragma), filename=filename, |
| )) |
| |
| return retv |
| |
| |
| if __name__ == "__main__": |
| exit(main()) |