Merge pull request #58 from dupuy/markdown-trailing-whitespace

Implement Markdown trailing space line break preservation
diff --git a/README.md b/README.md
index b4c7ee8..7a919e6 100644
--- a/README.md
+++ b/README.md
@@ -23,23 +23,34 @@
 
 ### Hooks available
 
-- `autopep8-wrapper` - Runs autopep8 over python source. (You'll want `args: ['-i]` when using this hook, see `.pre-commit-config.yaml` for an example.)
+- `autopep8-wrapper` - Runs autopep8 over python source.
+    - Ignore PEP 8 violation types with `args: ['-i', '--ignore=E000,...']`,
+      see `.pre-commit-config.yaml` in this repository for an example.
 - `check-added-large-files` - Prevent giant files from being committed.
-- `check-case-conflict` - Check for files that would conflict in case-insensitive filesystems.
-- `check-docstring-first` - Checks a common error of defining a docstring after code.
+    - Specify what is "too large" with `args: ['--maxkb=123']` (default=500kB).
+- `check-case-conflict` - Check for files with names that would conflict on a
+  case-insensitive filesystem like MacOS HFS+ or Windows FAT.
+- `check-docstring-first` - Checks for a common error of placing code before
+  the docstring.
 - `check-json` - Attempts to load all json files to verify syntax.
 - `check-merge-conflict` - Check for files that contain merge conflict strings.
 - `check-xml` - Attempts to load all xml files to verify syntax.
 - `check-yaml` - Attempts to load all yaml files to verify syntax.
 - `debug-statements` - Check for pdb / ipdb / pudb statements in code.
-- `detect-private-key` - Checks for the existence of private keys
-- `double-quote-string-fixer` - This hook replaces double quoted strings with single quoted strings
+- `detect-private-key` - Checks for the existence of private keys.
+- `double-quote-string-fixer` - This hook replaces double quoted strings
+  with single quoted strings.
 - `end-of-file-fixer` - Makes sure files end in a newline and only a newline.
-- `flake8` - Run flake8 on your python files
-- `name-tests-test` - Assert that files in tests/ end in _test.py
-- `pyflakes` - Run pyflakes on your python files
+- `flake8` - Run flake8 on your python files.
+- `name-tests-test` - Assert that files in tests/ end in `_test.py`.
+    - Use `args: ['--django']` to match `test*.py` instead.
+- `pyflakes` - Run pyflakes on your python files.
 - `requirements-txt-fixer` - Sorts entries in requirements.txt
 - `trailing-whitespace` - Trims trailing whitespace.
+    - Markdown linebreak trailing spaces preserved for `.md` and`.markdown`;
+      use `args: ['--markdown-linebreak-ext=txt,text']` to add other extensions,
+      `args: ['--markdown-linebreak-ext=*']` to preserve them for all files,
+      or `args: ['--no-markdown-linebreak-ext']` to disable and always trim.
 
 ### As a standalone package
 
diff --git a/pre_commit_hooks/detect_private_key.py b/pre_commit_hooks/detect_private_key.py
index 98dfeda..215ad56 100644
--- a/pre_commit_hooks/detect_private_key.py
+++ b/pre_commit_hooks/detect_private_key.py
@@ -1,10 +1,9 @@
 from __future__ import print_function
 
+import argparse
 import io
 import sys
 
-import argparse
-
 
 def detect_private_key(argv=None):
     parser = argparse.ArgumentParser()
diff --git a/pre_commit_hooks/trailing_whitespace_fixer.py b/pre_commit_hooks/trailing_whitespace_fixer.py
index 0642ac0..c159071 100644
--- a/pre_commit_hooks/trailing_whitespace_fixer.py
+++ b/pre_commit_hooks/trailing_whitespace_fixer.py
@@ -2,18 +2,44 @@
 
 import argparse
 import fileinput
+import os
 import sys
 
 from pre_commit_hooks.util import cmd_output
 
 
-def _fix_file(filename):
+def _fix_file(filename, markdown=False):
     for line in fileinput.input([filename], inplace=True):
+        # preserve trailing two-space for non-blank lines in markdown files
+        if markdown and (not line.isspace()) and (line.endswith("  \n")):
+            line = line.rstrip(' \n')
+            # only preserve if there are no trailing tabs or unusual whitespace
+            if not line[-1].isspace():
+                print(line + "  ")
+                continue
+
         print(line.rstrip())
 
 
 def fix_trailing_whitespace(argv=None):
     parser = argparse.ArgumentParser()
+    parser.add_argument(
+        '--no-markdown-linebreak-ext',
+        action='store_const',
+        const=[],
+        default=argparse.SUPPRESS,
+        dest='markdown_linebreak_ext',
+        help='Do not preserve linebreak spaces in Markdown'
+    )
+    parser.add_argument(
+        '--markdown-linebreak-ext',
+        action='append',
+        const='',
+        default=['md,markdown'],
+        metavar='*|EXT[,EXT,...]',
+        nargs='?',
+        help='Markdown extensions (or *) for linebreak spaces'
+    )
     parser.add_argument('filenames', nargs='*', help='Filenames to fix')
     args = parser.parse_args(argv)
 
@@ -21,10 +47,28 @@
         'grep', '-l', '[[:space:]]$', *args.filenames, retcode=None
     ).strip().splitlines()
 
+    md_args = args.markdown_linebreak_ext
+    if '' in md_args:
+        parser.error('--markdown-linebreak-ext requires a non-empty argument')
+    all_markdown = '*' in md_args
+    # normalize all extensions; split at ',', lowercase, and force 1 leading '.'
+    md_exts = ['.' + x.lower().lstrip('.')
+               for x in ','.join(md_args).split(',')]
+
+    # reject probable "eaten" filename as extension (skip leading '.' with [1:])
+    for ext in md_exts:
+        if any(c in ext[1:] for c in r'./\:'):
+            parser.error(
+                "bad --markdown-linebreak-ext extension '{0}' (has . / \\ :)\n"
+                "  (probably filename; use '--markdown-linebreak-ext=EXT')"
+                .format(ext)
+            )
+
     if bad_whitespace_files:
         for bad_whitespace_file in bad_whitespace_files:
             print('Fixing {0}'.format(bad_whitespace_file))
-            _fix_file(bad_whitespace_file)
+            _, extension = os.path.splitext(bad_whitespace_file.lower())
+            _fix_file(bad_whitespace_file, all_markdown or extension in md_exts)
         return 1
     else:
         return 0
diff --git a/pylintrc b/pylintrc
index bbd11ba..c905a37 100644
--- a/pylintrc
+++ b/pylintrc
@@ -1,5 +1,5 @@
 [MESSAGES CONTROL]
-disable=bad-open-mode,invalid-name,missing-docstring,redefined-outer-name,star-args,locally-disabled
+disable=bad-open-mode,invalid-name,missing-docstring,redefined-outer-name,star-args,locally-disabled,locally-enabled
 
 [REPORTS]
 output-format=colorized
diff --git a/tests/trailing_whitespace_fixer_test.py b/tests/trailing_whitespace_fixer_test.py
index e24a722..4d56762 100644
--- a/tests/trailing_whitespace_fixer_test.py
+++ b/tests/trailing_whitespace_fixer_test.py
@@ -1,6 +1,10 @@
 from __future__ import absolute_import
 from __future__ import unicode_literals
 
+import sys
+
+import pytest
+
 from pre_commit_hooks.trailing_whitespace_fixer import fix_trailing_whitespace
 from testing.util import cwd
 
@@ -12,7 +16,7 @@
                 ('bar.py', 'bar\t\nbaz\t\n'),
         ):
             with open(filename, 'w') as file_obj:
-                file_obj.write(contents)  # pragma: no cover (26 coverage bug)
+                file_obj.write(contents)  # pragma: no branch (26 coverage bug)
 
         ret = fix_trailing_whitespace(['foo.py', 'bar.py'])
         assert ret == 1
@@ -24,5 +28,103 @@
             assert open(filename).read() == after_contents
 
 
+# filename, expected input, expected output
+# pylint: disable=bad-whitespace
+MD_TESTS_1 = (
+    ('foo.md',        'foo  \nbar \n  ',         'foo  \nbar\n\n'),
+    ('bar.Markdown',  'bar   \nbaz\t\n\t\n',     'bar  \nbaz\n\n'),
+    ('.md',           'baz   \nquux  \t\n\t\n',  'baz\nquux\n\n'),
+    ('txt',           'foo   \nbaz \n\t\n',      'foo\nbaz\n\n'),
+)
+# pylint: enable=bad-whitespace
+
+
+@pytest.mark.parametrize(('filename', 'input_s', 'output'), MD_TESTS_1)
+def test_fixes_trailing_markdown_whitespace(filename, input_s, output, tmpdir):
+    with cwd(tmpdir.strpath):
+        with open(filename, 'w') as file_obj:
+            file_obj.write(input_s)  # pragma: no branch (26 coverage bug)
+
+        ret = fix_trailing_whitespace([filename])
+        assert ret == 1
+        assert open(filename).read() == output
+
+
+# filename, expected input, expected output
+# pylint: disable=bad-whitespace
+MD_TESTS_2 = (
+    ('foo.txt',       'foo  \nbar \n  \n',       'foo  \nbar\n\n'),
+    ('bar.Markdown',  'bar   \nbaz\t\n\t\n',     'bar  \nbaz\n\n'),
+    ('bar.MD',        'bar   \nbaz\t   \n\t\n',  'bar  \nbaz\n\n'),
+    ('.txt',          'baz   \nquux  \t\n\t\n',  'baz\nquux\n\n'),
+    ('txt',           'foo   \nbaz \n\t\n',      'foo\nbaz\n\n'),
+)
+# pylint: enable=bad-whitespace
+
+
+@pytest.mark.parametrize(('filename', 'input_s', 'output'), MD_TESTS_2)
+def test_markdown_linebreak_ext_opt(filename, input_s, output, tmpdir):
+    with cwd(tmpdir.strpath):
+        with open(filename, 'w') as file_obj:
+            file_obj.write(input_s)  # pragma: no branch (26 coverage bug)
+
+        ret = fix_trailing_whitespace(['--markdown-linebreak-ext=TxT',
+                                       filename])
+        assert ret == 1
+        assert open(filename).read() == output
+
+
+# filename, expected input, expected output
+# pylint: disable=bad-whitespace
+MD_TESTS_3 = (
+    ('foo.baz',       'foo  \nbar \n  ',         'foo  \nbar\n\n'),
+    ('bar',           'bar   \nbaz\t\n\t\n',     'bar  \nbaz\n\n'),
+)
+# pylint: enable=bad-whitespace
+
+
+@pytest.mark.parametrize(('filename', 'input_s', 'output'), MD_TESTS_3)
+def test_markdown_linebreak_ext_opt_all(filename, input_s, output, tmpdir):
+    with cwd(tmpdir.strpath):
+        with open(filename, 'w') as file_obj:
+            file_obj.write(input_s)  # pragma: no branch (26 coverage bug)
+
+        # need to make sure filename is not treated as argument to option
+        ret = fix_trailing_whitespace(['--markdown-linebreak-ext=*',
+                                       filename])
+        assert ret == 1
+        assert open(filename).read() == output
+
+
+@pytest.mark.parametrize(('arg'), ('--', 'a.b', 'a/b'))
+def test_markdown_linebreak_ext_badopt(arg):
+    try:
+        ret = fix_trailing_whitespace(['--markdown-linebreak-ext', arg])
+    except SystemExit:
+        ret = sys.exc_info()[1].code
+    finally:
+        assert ret == 2
+
+
+# filename, expected input, expected output
+# pylint: disable=bad-whitespace
+MD_TESTS_4 = (
+    ('bar.md',        'bar   \nbaz\t   \n\t\n',  'bar\nbaz\n\n'),
+    ('bar.markdown',  'baz   \nquux  \n',        'baz\nquux\n'),
+)
+# pylint: enable=bad-whitespace
+
+
+@pytest.mark.parametrize(('filename', 'input_s', 'output'), MD_TESTS_4)
+def test_no_markdown_linebreak_ext_opt(filename, input_s, output, tmpdir):
+    with cwd(tmpdir.strpath):
+        with open(filename, 'w') as file_obj:
+            file_obj.write(input_s)  # pragma: no branch (26 coverage bug)
+
+        ret = fix_trailing_whitespace(['--no-markdown-linebreak-ext', filename])
+        assert ret == 1
+        assert open(filename).read() == output
+
+
 def test_returns_zero_for_no_changes():
     assert fix_trailing_whitespace([__file__]) == 0