Merge pull request #522 from jgowdy/byte-order-marker-fix

Attempt to add fixing of BOMs
diff --git a/.pre-commit-hooks.yaml b/.pre-commit-hooks.yaml
index 3e4dc9e..a47f733 100644
--- a/.pre-commit-hooks.yaml
+++ b/.pre-commit-hooks.yaml
@@ -17,8 +17,8 @@
     language: python
     types: [python]
 -   id: check-byte-order-marker
-    name: Check for byte-order marker
-    description: Forbid files which have a UTF-8 byte-order marker
+    name: 'check BOM - deprecated: use fix-byte-order-marker'
+    description: forbid files which have a UTF-8 byte-order marker
     entry: check-byte-order-marker
     language: python
     types: [text]
@@ -131,6 +131,12 @@
     entry: file-contents-sorter
     language: python
     files: '^$'
+-   id: fix-byte-order-marker
+    name: fix UTF-8 byte order marker
+    description: removes UTF-8 byte order marker
+    entry: fix-byte-order-marker
+    language: python
+    types: [text]
 -   id: fix-encoding-pragma
     name: Fix python encoding pragma
     language: python
diff --git a/README.md b/README.md
index a6b62ab..18340bf 100644
--- a/README.md
+++ b/README.md
@@ -42,9 +42,6 @@
   - Ignore this requirement for specific builtin types with `--ignore=type1,type2,…`.
   - Forbid `dict` keyword syntax with `--no-allow-dict-kwargs`.
 
-#### `check-byte-order-marker`
-Forbid files which have a UTF-8 byte-order marker
-
 #### `check-case-conflict`
 Check for files with names that would conflict on a case-insensitive filesystem like MacOS HFS+ or Windows FAT.
 
@@ -102,6 +99,9 @@
 #### `end-of-file-fixer`
 Makes sure files end in a newline and only a newline.
 
+#### `fix-byte-order-marker`
+removes UTF-8 byte order marker
+
 #### `fix-encoding-pragma`
 Add `# -*- coding: utf-8 -*-` to the top of python files.
   - To remove the coding pragma pass `--remove` (useful in a python3-only codebase)
@@ -183,6 +183,7 @@
   [mirrors-autopep8](https://github.com/pre-commit/mirrors-autopep8)
 - `pyflakes`: instead use `flake8`
 - `flake8`: instead use [upstream flake8](https://gitlab.com/pycqa/flake8)
+- `check-byte-order-marker`: instead use fix-byte-order-marker
 
 ### As a standalone package
 
diff --git a/pre_commit_hooks/fix_byte_order_marker.py b/pre_commit_hooks/fix_byte_order_marker.py
new file mode 100644
index 0000000..1ffe047
--- /dev/null
+++ b/pre_commit_hooks/fix_byte_order_marker.py
@@ -0,0 +1,30 @@
+import argparse
+from typing import Optional
+from typing import Sequence
+
+
+def main(argv: Optional[Sequence[str]] = None) -> int:
+    parser = argparse.ArgumentParser()
+    parser.add_argument('filenames', nargs='*', help='Filenames to check')
+    args = parser.parse_args(argv)
+
+    retv = 0
+
+    for filename in args.filenames:
+        with open(filename, 'rb') as f_b:
+            bts = f_b.read(3)
+
+        if bts == b'\xef\xbb\xbf':
+            with open(filename, newline='', encoding='utf-8-sig') as f:
+                contents = f.read()
+            with open(filename, 'w', newline='', encoding='utf-8') as f:
+                f.write(contents)
+
+            print(f'{filename}: removed byte-order marker')
+            retv = 1
+
+    return retv
+
+
+if __name__ == '__main__':
+    exit(main())
diff --git a/setup.cfg b/setup.cfg
index 47b8bb6..c8677f5 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -48,6 +48,7 @@
     double-quote-string-fixer = pre_commit_hooks.string_fixer:main
     end-of-file-fixer = pre_commit_hooks.end_of_file_fixer:main
     file-contents-sorter = pre_commit_hooks.file_contents_sorter:main
+    fix-byte-order-marker = pre_commit_hooks.fix_byte_order_marker:main
     fix-encoding-pragma = pre_commit_hooks.fix_encoding_pragma:main
     forbid-new-submodules = pre_commit_hooks.forbid_new_submodules:main
     mixed-line-ending = pre_commit_hooks.mixed_line_ending:main
diff --git a/tests/fix_byte_order_marker_test.py b/tests/fix_byte_order_marker_test.py
new file mode 100644
index 0000000..da150e3
--- /dev/null
+++ b/tests/fix_byte_order_marker_test.py
@@ -0,0 +1,13 @@
+from pre_commit_hooks import fix_byte_order_marker
+
+
+def test_failure(tmpdir):
+    f = tmpdir.join('f.txt')
+    f.write_text('ohai', encoding='utf-8-sig')
+    assert fix_byte_order_marker.main((str(f),)) == 1
+
+
+def test_success(tmpdir):
+    f = tmpdir.join('f.txt')
+    f.write_text('ohai', encoding='utf-8')
+    assert fix_byte_order_marker.main((str(f),)) == 0