Merge pull request #107 from pre-commit/utf8_bom

Forbid files with a UTF-8 BOM
diff --git a/README.md b/README.md
index b13b697..773fb81 100644
--- a/README.md
+++ b/README.md
@@ -29,6 +29,7 @@
 - `check-added-large-files` - Prevent giant files from being committed.
     - Specify what is "too large" with `args: ['--maxkb=123']` (default=500kB).
 - `check-ast` - Simply check whether files parse as valid python.
+- `check-byte-order-marker` - Forbid files which have a UTF-8 byte-order marker
 - `check-case-conflict` - Check for files with names that would conflict on a
   case-insensitive filesystem like MacOS HFS+ or Windows FAT.
 - `check-docstring-first` - Checks for a common error of placing code before
diff --git a/hooks.yaml b/hooks.yaml
index 1253649..08f01c5 100644
--- a/hooks.yaml
+++ b/hooks.yaml
@@ -18,6 +18,12 @@
     entry: check-ast
     language: python
     files: '\.py$'
+-   id: check-byte-order-marker
+    name: Check for byte-order marker
+    description: Forbid files which have a UTF-8 byte-order marker
+    entry: check-byte-order-marker
+    language: python
+    files: '\.py$'
 -   id: check-case-conflict
     name: Check for case conflicts
     description: Check for files that would conflict in case-insensitive filesystems
diff --git a/pre_commit_hooks/check_byte_order_marker.py b/pre_commit_hooks/check_byte_order_marker.py
new file mode 100644
index 0000000..274f949
--- /dev/null
+++ b/pre_commit_hooks/check_byte_order_marker.py
@@ -0,0 +1,25 @@
+from __future__ import absolute_import
+from __future__ import print_function
+from __future__ import unicode_literals
+
+import argparse
+
+
+def main(argv=None):
+    parser = argparse.ArgumentParser()
+    parser.add_argument('filenames', nargs='*', help='Filenames to check')
+    args = parser.parse_args(argv)
+
+    retv = 0
+
+    for filename in args.filenames:
+        with open(filename, 'rb') as f:
+            if f.read(3) == b'\xef\xbb\xbf':
+                retv = 1
+                print('{0}: Has a byte-order marker'.format(filename))
+
+    return retv
+
+
+if __name__ == '__main__':
+    exit(main())
diff --git a/setup.py b/setup.py
index 019e10b..5052642 100644
--- a/setup.py
+++ b/setup.py
@@ -39,6 +39,7 @@
             'autopep8-wrapper = pre_commit_hooks.autopep8_wrapper:main',
             'check-added-large-files = pre_commit_hooks.check_added_large_files:main',
             'check-ast = pre_commit_hooks.check_ast:check_ast',
+            'check-byte-order-marker = pre_commit_hooks.check_byte_order_marker:main',
             'check-case-conflict = pre_commit_hooks.check_case_conflict:main',
             'check-docstring-first = pre_commit_hooks.check_docstring_first:main',
             'check-json = pre_commit_hooks.check_json:check_json',
diff --git a/tests/check_byte_order_marker_test.py b/tests/check_byte_order_marker_test.py
new file mode 100644
index 0000000..53cb4a1
--- /dev/null
+++ b/tests/check_byte_order_marker_test.py
@@ -0,0 +1,16 @@
+from __future__ import absolute_import
+from __future__ import unicode_literals
+
+from pre_commit_hooks import check_byte_order_marker
+
+
+def test_failure(tmpdir):
+    f = tmpdir.join('f.txt')
+    f.write_text('ohai', encoding='utf-8-sig')
+    assert check_byte_order_marker.main((f.strpath,)) == 1
+
+
+def test_success(tmpdir):
+    f = tmpdir.join('f.txt')
+    f.write_text('ohai', encoding='utf-8')
+    assert check_byte_order_marker.main((f.strpath,)) == 0