Add encoding pragma hook. Resolves pre-commit/pre-commit#15
diff --git a/README.md b/README.md
index 8148cee..9229f8f 100644
--- a/README.md
+++ b/README.md
@@ -43,6 +43,7 @@
- `double-quote-string-fixer` - This hook replaces double quoted strings
with single quoted strings.
- `end-of-file-fixer` - Makes sure files end in a newline and only a newline.
+- `fix-encoding-pragma` - Add # -*- coding: utf-8 -*- to the top of python files
- `flake8` - Run flake8 on your python files.
- `name-tests-test` - Assert that files in tests/ end in `_test.py`.
- Use `args: ['--django']` to match `test*.py` instead.
diff --git a/hooks.yaml b/hooks.yaml
index 7817d1b..d4ef521 100644
--- a/hooks.yaml
+++ b/hooks.yaml
@@ -92,6 +92,12 @@
entry: end-of-file-fixer
language: python
files: \.(c|cpp|html|erb|slim|haml|ejs|jade|js|coffee|json|rb|md|py|css|scss|less|sh|tmpl|txt|yaml|yml|pp)$
+- id: fix-encoding-pragma
+ name: Fix python encoding pragma
+ language: python
+ entry: fix-encoding-pragma
+ description: 'Add # -*- coding: utf-8 -*- to the top of python files'
+ files: \.py$
- id: flake8
name: Flake8
description: This hook runs flake8.
diff --git a/pre_commit_hooks/fix_encoding_pragma.py b/pre_commit_hooks/fix_encoding_pragma.py
new file mode 100644
index 0000000..48fc9c7
--- /dev/null
+++ b/pre_commit_hooks/fix_encoding_pragma.py
@@ -0,0 +1,75 @@
+from __future__ import absolute_import
+from __future__ import print_function
+from __future__ import unicode_literals
+
+import argparse
+import io
+
+expected_pragma = b'# -*- coding: utf-8 -*-\n'
+
+
+def has_coding(line):
+ if not line.strip():
+ return False
+ return (
+ line.lstrip()[0:1] == b'#' and (
+ b'unicode' in line or
+ b'encoding' in line or
+ b'coding:' in line or
+ b'coding=' in line
+ )
+ )
+
+
+def fix_encoding_pragma(f):
+ first_line = f.readline()
+ second_line = f.readline()
+ old = f.read()
+ f.seek(0)
+
+ # Ok case: the file is empty
+ if not (first_line + second_line + old).strip():
+ return 0
+
+ # Ok case: we specify pragma as the first line
+ if first_line == expected_pragma:
+ return 0
+
+ # OK case: we have a shebang as first line and pragma on second line
+ if first_line.startswith(b'#!') and second_line == expected_pragma:
+ return 0
+
+ # Otherwise we need to rewrite stuff!
+ if first_line.startswith(b'#!'):
+ if has_coding(second_line):
+ f.write(first_line + expected_pragma + old)
+ else:
+ f.write(first_line + expected_pragma + second_line + old)
+ elif has_coding(first_line):
+ f.write(expected_pragma + second_line + old)
+ else:
+ f.write(expected_pragma + first_line + second_line + old)
+
+ return 1
+
+
+def main(argv=None):
+ parser = argparse.ArgumentParser('Fixes the encoding pragma of python files')
+ parser.add_argument('filenames', nargs='*', help='Filenames to fix')
+ args = parser.parse_args(argv)
+
+ retv = 0
+
+ for filename in args.filenames:
+ with io.open(filename, 'r+b') as f:
+ file_ret = fix_encoding_pragma(f)
+ retv |= file_ret
+ if file_ret:
+ print('Added `{0}` to {1}'.format(
+ expected_pragma.strip(), filename,
+ ))
+
+ return retv
+
+if __name__ == "__main__":
+ exit(main())
diff --git a/setup.py b/setup.py
index 4fefeaa..7779089 100644
--- a/setup.py
+++ b/setup.py
@@ -50,6 +50,7 @@
'detect-private-key = pre_commit_hooks.detect_private_key:detect_private_key',
'double-quote-string-fixer = pre_commit_hooks.string_fixer:main',
'end-of-file-fixer = pre_commit_hooks.end_of_file_fixer:end_of_file_fixer',
+ 'fix-encoding-pragma = pre_commit_hooks.fix_encoding_pragma:main',
'name-tests-test = pre_commit_hooks.tests_should_end_in_test:validate_files',
'pretty-format-json = pre_commit_hooks.pretty_format_json:pretty_format_json',
'requirements-txt-fixer = pre_commit_hooks.requirements_txt_fixer:fix_requirements_txt',
diff --git a/tests/fix_encoding_pragma_test.py b/tests/fix_encoding_pragma_test.py
new file mode 100644
index 0000000..e000a33
--- /dev/null
+++ b/tests/fix_encoding_pragma_test.py
@@ -0,0 +1,82 @@
+from __future__ import absolute_import
+from __future__ import unicode_literals
+
+import io
+
+import pytest
+
+from pre_commit_hooks.fix_encoding_pragma import fix_encoding_pragma
+from pre_commit_hooks.fix_encoding_pragma import main
+
+
+def test_integration_inserting_pragma(tmpdir):
+ file_path = tmpdir.join('foo.py').strpath
+
+ with open(file_path, 'wb') as file_obj:
+ file_obj.write(b'import httplib\n')
+
+ assert main([file_path]) == 1
+
+ with open(file_path, 'rb') as file_obj:
+ assert file_obj.read() == (
+ b'# -*- coding: utf-8 -*-\n'
+ b'import httplib\n'
+ )
+
+
+def test_integration_ok(tmpdir):
+ file_path = tmpdir.join('foo.py').strpath
+ with open(file_path, 'wb') as file_obj:
+ file_obj.write(b'# -*- coding: utf-8 -*-\nx = 1\n')
+ assert main([file_path]) == 0
+
+
+@pytest.mark.parametrize(
+ 'input_str',
+ (
+ b'',
+ b'# -*- coding: utf-8 -*-\n',
+ (
+ b'#!/usr/bin/env python\n'
+ b'# -*- coding: utf-8 -*-\n'
+ b'foo = "bar"\n'
+ ),
+ )
+)
+def test_ok_inputs(input_str):
+ bytesio = io.BytesIO(input_str)
+ assert fix_encoding_pragma(bytesio) == 0
+ bytesio.seek(0)
+ assert bytesio.read() == input_str
+
+
+@pytest.mark.parametrize(
+ ('input_str', 'output'),
+ (
+ (
+ b'import httplib\n',
+ b'# -*- coding: utf-8 -*-\n'
+ b'import httplib\n',
+ ),
+ (
+ b'#!/usr/bin/env python\n',
+ b'#!/usr/bin/env python\n'
+ b'# -*- coding: utf-8 -*-\n'
+ ),
+ (
+ b'#coding=utf-8\n',
+ b'# -*- coding: utf-8 -*-\n'
+ ),
+ (
+ b'#!/usr/bin/env python\n'
+ b'#coding=utf8\n',
+ b'#!/usr/bin/env python\n'
+ b'# -*- coding: utf-8 -*-\n',
+ ),
+ )
+)
+def test_not_ok_inputs(input_str, output):
+ bytesio = io.BytesIO(input_str)
+ assert fix_encoding_pragma(bytesio) == 1
+ bytesio.seek(0)
+ assert bytesio.read() == output