Add a --remove option to fix-encoding-pragma
diff --git a/README.md b/README.md
index 091a8a4..6c3a3ec 100644
--- a/README.md
+++ b/README.md
@@ -45,7 +45,8 @@
- `double-quote-string-fixer` - This hook replaces double quoted strings
with single quoted strings.
- `end-of-file-fixer` - Makes sure files end in a newline and only a newline.
-- `fix-encoding-pragma` - Add `# -*- coding: utf-8 -*-` to the top of python files
+- `fix-encoding-pragma` - Add `# -*- coding: utf-8 -*-` to the top of python files.
+ - To remove the coding pragma pass `--remove` (useful in a python3-only codebase)
- `flake8` - Run flake8 on your python files.
- `name-tests-test` - Assert that files in tests/ end in `_test.py`.
- Use `args: ['--django']` to match `test*.py` instead.
diff --git a/pre_commit_hooks/fix_encoding_pragma.py b/pre_commit_hooks/fix_encoding_pragma.py
index 48fc9c7..8586937 100644
--- a/pre_commit_hooks/fix_encoding_pragma.py
+++ b/pre_commit_hooks/fix_encoding_pragma.py
@@ -3,7 +3,7 @@
from __future__ import unicode_literals
import argparse
-import io
+import collections
expected_pragma = b'# -*- coding: utf-8 -*-\n'
@@ -21,34 +21,72 @@
)
-def fix_encoding_pragma(f):
- first_line = f.readline()
- second_line = f.readline()
- old = f.read()
- f.seek(0)
+class ExpectedContents(collections.namedtuple(
+ 'ExpectedContents', ('shebang', 'rest', 'pragma_status'),
+)):
+ """
+ pragma_status:
+ - True: has exactly the coding pragma expected
+ - False: missing coding pragma entirely
+ - None: has a coding pragma, but it does not match
+ """
+ __slots__ = ()
- # Ok case: the file is empty
- if not (first_line + second_line + old).strip():
- return 0
+ @property
+ def has_any_pragma(self):
+ return self.pragma_status is not False
- # Ok case: we specify pragma as the first line
- if first_line == expected_pragma:
- return 0
+ def is_expected_pragma(self, remove):
+ expected_pragma_status = not remove
+ return self.pragma_status is expected_pragma_status
- # OK case: we have a shebang as first line and pragma on second line
- if first_line.startswith(b'#!') and second_line == expected_pragma:
- return 0
- # Otherwise we need to rewrite stuff!
+def _get_expected_contents(first_line, second_line, rest):
if first_line.startswith(b'#!'):
- if has_coding(second_line):
- f.write(first_line + expected_pragma + old)
- else:
- f.write(first_line + expected_pragma + second_line + old)
- elif has_coding(first_line):
- f.write(expected_pragma + second_line + old)
+ shebang = first_line
+ potential_coding = second_line
else:
- f.write(expected_pragma + first_line + second_line + old)
+ shebang = b''
+ potential_coding = first_line
+ rest = second_line + rest
+
+ if potential_coding == expected_pragma:
+ pragma_status = True
+ elif has_coding(potential_coding):
+ pragma_status = None
+ else:
+ pragma_status = False
+ rest = potential_coding + rest
+
+ return ExpectedContents(
+ shebang=shebang, rest=rest, pragma_status=pragma_status,
+ )
+
+
+def fix_encoding_pragma(f, remove=False):
+ expected = _get_expected_contents(f.readline(), f.readline(), f.read())
+
+ # Special cases for empty files
+ if not expected.rest.strip():
+ # If a file only has a shebang or a coding pragma, remove it
+ if expected.has_any_pragma or expected.shebang:
+ f.seek(0)
+ f.truncate()
+ f.write(b'')
+ return 1
+ else:
+ return 0
+
+ if expected.is_expected_pragma(remove):
+ return 0
+
+ # Otherwise, write out the new file
+ f.seek(0)
+ f.truncate()
+ f.write(expected.shebang)
+ if not remove:
+ f.write(expected_pragma)
+ f.write(expected.rest)
return 1
@@ -56,18 +94,25 @@
def main(argv=None):
parser = argparse.ArgumentParser('Fixes the encoding pragma of python files')
parser.add_argument('filenames', nargs='*', help='Filenames to fix')
+ parser.add_argument(
+ '--remove', action='store_true',
+ help='Remove the encoding pragma (Useful in a python3-only codebase)',
+ )
args = parser.parse_args(argv)
retv = 0
+ if args.remove:
+ fmt = 'Removed encoding pragma from {filename}'
+ else:
+ fmt = 'Added `{pragma}` to {filename}'
+
for filename in args.filenames:
- with io.open(filename, 'r+b') as f:
- file_ret = fix_encoding_pragma(f)
+ with open(filename, 'r+b') as f:
+ file_ret = fix_encoding_pragma(f, remove=args.remove)
retv |= file_ret
if file_ret:
- print('Added `{0}` to {1}'.format(
- expected_pragma.strip(), filename,
- ))
+ print(fmt.format(pragma=expected_pragma, filename=filename))
return retv
diff --git a/tests/fix_encoding_pragma_test.py b/tests/fix_encoding_pragma_test.py
index e000a33..a9502a2 100644
--- a/tests/fix_encoding_pragma_test.py
+++ b/tests/fix_encoding_pragma_test.py
@@ -10,32 +10,46 @@
def test_integration_inserting_pragma(tmpdir):
- file_path = tmpdir.join('foo.py').strpath
+ path = tmpdir.join('foo.py')
+ path.write_binary(b'import httplib\n')
- with open(file_path, 'wb') as file_obj:
- file_obj.write(b'import httplib\n')
+ assert main((path.strpath,)) == 1
- assert main([file_path]) == 1
-
- with open(file_path, 'rb') as file_obj:
- assert file_obj.read() == (
- b'# -*- coding: utf-8 -*-\n'
- b'import httplib\n'
- )
+ assert path.read_binary() == (
+ b'# -*- coding: utf-8 -*-\n'
+ b'import httplib\n'
+ )
def test_integration_ok(tmpdir):
- file_path = tmpdir.join('foo.py').strpath
- with open(file_path, 'wb') as file_obj:
- file_obj.write(b'# -*- coding: utf-8 -*-\nx = 1\n')
- assert main([file_path]) == 0
+ path = tmpdir.join('foo.py')
+ path.write_binary(b'# -*- coding: utf-8 -*-\nx = 1\n')
+ assert main((path.strpath,)) == 0
+
+
+def test_integration_remove(tmpdir):
+ path = tmpdir.join('foo.py')
+ path.write_binary(b'# -*- coding: utf-8 -*-\nx = 1\n')
+
+ assert main((path.strpath, '--remove')) == 1
+
+ assert path.read_binary() == b'x = 1\n'
+
+
+def test_integration_remove_ok(tmpdir):
+ path = tmpdir.join('foo.py')
+ path.write_binary(b'x = 1\n')
+ assert main((path.strpath, '--remove')) == 0
@pytest.mark.parametrize(
'input_str',
(
b'',
- b'# -*- coding: utf-8 -*-\n',
+ (
+ b'# -*- coding: utf-8 -*-\n'
+ b'x = 1\n'
+ ),
(
b'#!/usr/bin/env python\n'
b'# -*- coding: utf-8 -*-\n'
@@ -59,20 +73,32 @@
b'import httplib\n',
),
(
- b'#!/usr/bin/env python\n',
+ b'#!/usr/bin/env python\n'
+ b'x = 1\n',
b'#!/usr/bin/env python\n'
b'# -*- coding: utf-8 -*-\n'
+ b'x = 1\n',
),
(
- b'#coding=utf-8\n',
+ b'#coding=utf-8\n'
+ b'x = 1\n',
b'# -*- coding: utf-8 -*-\n'
+ b'x = 1\n',
),
(
b'#!/usr/bin/env python\n'
- b'#coding=utf8\n',
+ b'#coding=utf8\n'
+ b'x = 1\n',
b'#!/usr/bin/env python\n'
- b'# -*- coding: utf-8 -*-\n',
+ b'# -*- coding: utf-8 -*-\n'
+ b'x = 1\n',
),
+ # These should each get truncated
+ (b'#coding: utf-8\n', b''),
+ (b'# -*- coding: utf-8 -*-\n', b''),
+ (b'#!/usr/bin/env python\n', b''),
+ (b'#!/usr/bin/env python\n#coding: utf8\n', b''),
+ (b'#!/usr/bin/env python\n# -*- coding: utf-8 -*-\n', b''),
)
)
def test_not_ok_inputs(input_str, output):