Simplify mixed-line-ending hook
diff --git a/pre_commit_hooks/mixed_line_ending.py b/pre_commit_hooks/mixed_line_ending.py
index 76512a7..301c654 100644
--- a/pre_commit_hooks/mixed_line_ending.py
+++ b/pre_commit_hooks/mixed_line_ending.py
@@ -1,212 +1,83 @@
+from __future__ import absolute_import
+from __future__ import print_function
+from __future__ import unicode_literals
+
import argparse
-import re
-import sys
-
-from enum import Enum
+import collections
-class LineEnding(Enum):
- CR = b'\r', 'cr', re.compile(b'\r(?!\n)', re.DOTALL)
- CRLF = b'\r\n', 'crlf', re.compile(b'\r\n', re.DOTALL)
- LF = b'\n', 'lf', re.compile(b'(?<!\r)\n', re.DOTALL)
-
- def __init__(self, string, opt_name, regex):
- self.string = string
- self.str_print = repr(string)
- self.opt_name = opt_name
- self.regex = regex
+CRLF = b'\r\n'
+LF = b'\n'
+CR = b'\r'
+# Prefer LF to CRLF to CR, but detect CRLF before LF
+ALL_ENDINGS = (CR, CRLF, LF)
+FIX_TO_LINE_ENDING = {'cr': CR, 'crlf': CRLF, 'lf': LF}
-class MixedLineEndingOption(Enum):
- AUTO = 'auto', None
- NO = 'no', None
- CRLF = LineEnding.CRLF.opt_name, LineEnding.CRLF
- LF = LineEnding.LF.opt_name, LineEnding.LF
-
- def __init__(self, opt_name, line_ending_enum):
- self.opt_name = opt_name
- self.line_ending_enum = line_ending_enum
+def _fix(filename, contents, ending):
+ new_contents = b''.join(
+ line.rstrip(b'\r\n') + ending for line in contents.splitlines(True)
+ )
+ with open(filename, 'wb') as f:
+ f.write(new_contents)
-class MixedLineDetection(Enum):
- NOT_MIXED = 1, False, None
- UNKNOWN = 2, False, None
- MIXED_MOSTLY_CRLF = 3, True, LineEnding.CRLF
- MIXED_MOSTLY_LF = 4, True, LineEnding.LF
- MIXED_MOSTLY_CR = 5, True, LineEnding.CR
+def fix_filename(filename, fix):
+ with open(filename, 'rb') as f:
+ contents = f.read()
- def __init__(self, index, mle_found, line_ending_enum):
- # TODO hack to prevent enum overriding
- self.index = index
- self.mle_found = mle_found
- self.line_ending_enum = line_ending_enum
+ counts = collections.defaultdict(int)
+ for line in contents.splitlines(True):
+ for ending in ALL_ENDINGS:
+ if line.endswith(ending):
+ counts[ending] += 1
+ break
-ANY_LINE_ENDING_PATTERN = re.compile(
- b'(' + LineEnding.CRLF.regex.pattern +
- b'|' + LineEnding.LF.regex.pattern +
- b'|' + LineEnding.CR.regex.pattern + b')',
-)
+ # Some amount of mixed line endings
+ mixed = sum(bool(x) for x in counts.values()) > 1
+ if fix == 'no' or (fix == 'auto' and not mixed):
+ return mixed
-def mixed_line_ending(argv=None):
- options = _parse_arguments(argv)
+ if fix == 'auto':
+ max_ending = LF
+ max_lines = 0
+ # ordering is important here such that lf > crlf > cr
+ for ending_type in ALL_ENDINGS:
+ # also important, using >= to find a max that prefers the last
+ if counts[ending_type] >= max_lines:
+ max_ending = ending_type
+ max_lines = counts[ending_type]
- filenames = options['filenames']
- fix_option = options['fix']
-
- if fix_option == MixedLineEndingOption.NO:
- return _process_no_fix(filenames)
- elif fix_option == MixedLineEndingOption.AUTO:
- return _process_fix_auto(filenames)
- # when a line ending character is forced with --fix option
+ _fix(filename, contents, max_ending)
+ return 1
else:
- return _process_fix_force(filenames, fix_option.line_ending_enum)
+ target_ending = FIX_TO_LINE_ENDING[fix]
+ # find if there are lines with *other* endings
+ del counts[target_ending]
+ other_endings = bool(sum(counts.values()))
+ if other_endings:
+ _fix(filename, contents, target_ending)
+ return other_endings
-def _parse_arguments(argv=None):
+def main(argv=None):
parser = argparse.ArgumentParser()
parser.add_argument(
- '-f',
- '--fix',
- choices=[m.opt_name for m in MixedLineEndingOption],
- default=MixedLineEndingOption.AUTO.opt_name,
+ '-f', '--fix',
+ choices=('auto', 'no') + tuple(FIX_TO_LINE_ENDING),
+ default='auto',
help='Replace line ending with the specified. Default is "auto"',
)
parser.add_argument('filenames', nargs='*', help='Filenames to fix')
args = parser.parse_args(argv)
- fix, = (
- member for name, member
- in MixedLineEndingOption.__members__.items()
- if member.opt_name == args.fix
- )
-
- options = {
- 'fix': fix, 'filenames': args.filenames,
- }
-
- return options
-
-
-def _detect_line_ending(filename):
- with open(filename, 'rb') as f:
- buf = f.read()
-
- le_counts = {}
-
- for le_enum in LineEnding:
- le_counts[le_enum] = len(le_enum.regex.findall(buf))
-
- mixed = False
- le_found_previously = False
- most_le = None
- max_le_count = 0
-
- for le, le_count in le_counts.items():
- le_found_cur = le_count > 0
-
- mixed |= le_found_previously and le_found_cur
- le_found_previously |= le_found_cur
-
- if le_count == max_le_count:
- most_le = None
- elif le_count > max_le_count:
- max_le_count = le_count
- most_le = le
-
- if not mixed:
- return MixedLineDetection.NOT_MIXED
-
- for mld in MixedLineDetection:
- if (
- mld.line_ending_enum is not None and
- mld.line_ending_enum == most_le
- ):
- return mld
-
- return MixedLineDetection.UNKNOWN
-
-
-def _process_no_fix(filenames):
- print('Checking if the files have mixed line ending.')
-
- mle_filenames = []
- for filename in filenames:
- detect_result = _detect_line_ending(filename)
-
- if detect_result.mle_found:
- mle_filenames.append(filename)
-
- mle_found = len(mle_filenames) > 0
-
- if mle_found:
- print(
- 'The following files have mixed line endings:\n\t%s',
- '\n\t'.join(mle_filenames),
- )
-
- return 1 if mle_found else 0
-
-
-def _process_fix_auto(filenames):
- mle_found = False
-
- for filename in filenames:
- detect_result = _detect_line_ending(filename)
-
- if detect_result == MixedLineDetection.NOT_MIXED:
- print('The file %s has no mixed line ending', filename)
- elif detect_result == MixedLineDetection.UNKNOWN:
- print(
- 'Could not define most frequent line ending in '
- 'file %s. File skiped.', filename,
- )
-
- mle_found = True
- else:
- le_enum = detect_result.line_ending_enum
-
- print(
- 'The file %s has mixed line ending with a '
- 'majority of %s. Converting...', filename, le_enum.str_print,
- )
-
- _convert_line_ending(filename, le_enum.string)
- mle_found = True
-
- print(
- 'The file %s has been converted to %s line ending.',
- filename, le_enum.str_print,
- )
-
- return 1 if mle_found else 0
-
-
-def _process_fix_force(filenames, line_ending_enum):
- for filename in filenames:
- _convert_line_ending(filename, line_ending_enum.string)
-
- print(
- 'The file %s has been forced to %s line ending.',
- filename, line_ending_enum.str_print,
- )
-
- return 1
-
-
-def _convert_line_ending(filename, line_ending):
- with open(filename, 'rb+') as f:
- bufin = f.read()
-
- # convert line ending
- bufout = ANY_LINE_ENDING_PATTERN.sub(line_ending, bufin)
-
- # write the result in the file replacing the existing content
- f.seek(0)
- f.write(bufout)
- f.truncate()
+ retv = 0
+ for filename in args.filenames:
+ retv |= fix_filename(filename, args.fix)
+ return retv
if __name__ == '__main__':
- sys.exit(mixed_line_ending())
+ exit(main())
diff --git a/setup.py b/setup.py
index 432c19a..2563095 100644
--- a/setup.py
+++ b/setup.py
@@ -31,7 +31,6 @@
'simplejson',
'six',
],
- extras_require={':python_version=="2.7"': ['enum34']},
entry_points={
'console_scripts': [
'autopep8-wrapper = pre_commit_hooks.autopep8_wrapper:main',
diff --git a/tests/mixed_line_ending_test.py b/tests/mixed_line_ending_test.py
index 20fd22c..808295b 100644
--- a/tests/mixed_line_ending_test.py
+++ b/tests/mixed_line_ending_test.py
@@ -1,154 +1,103 @@
+from __future__ import absolute_import
+from __future__ import unicode_literals
+
import pytest
-from pre_commit_hooks.mixed_line_ending import mixed_line_ending
-
-# Input, expected return value, expected output
-TESTS_FIX_AUTO = (
- # only 'LF'
- (b'foo\nbar\nbaz\n', 0, b'foo\nbar\nbaz\n'),
- # only 'CRLF'
- (b'foo\r\nbar\r\nbaz\r\n', 0, b'foo\r\nbar\r\nbaz\r\n'),
- # only 'CR'
- (b'foo\rbar\rbaz\r', 0, b'foo\rbar\rbaz\r'),
- # mixed with majority of 'LF'
- (b'foo\r\nbar\nbaz\n', 1, b'foo\nbar\nbaz\n'),
- # mixed with majority of 'CRLF'
- (b'foo\r\nbar\nbaz\r\n', 1, b'foo\r\nbar\r\nbaz\r\n'),
- # mixed with majority of 'CR'
- (b'foo\rbar\nbaz\r', 1, b'foo\rbar\rbaz\r'),
- # mixed with as much 'LF' as 'CRLF'
- (b'foo\r\nbar\nbaz', 1, b'foo\r\nbar\nbaz'),
- # mixed with as much 'LF' as 'CR'
- (b'foo\rbar\nbaz', 1, b'foo\rbar\nbaz'),
- # mixed with as much 'CRLF' as 'CR'
- (b'foo\r\nbar\nbaz', 1, b'foo\r\nbar\nbaz'),
- # mixed with as much 'CRLF' as 'LF' as 'CR'
- (b'foo\r\nbar\nbaz\r', 1, b'foo\r\nbar\nbaz\r'),
-)
+from pre_commit_hooks.mixed_line_ending import main
@pytest.mark.parametrize(
- ('input_s', 'expected_retval', 'output'),
- TESTS_FIX_AUTO,
+ ('input_s', 'output'),
+ (
+ # mixed with majority of 'LF'
+ (b'foo\r\nbar\nbaz\n', b'foo\nbar\nbaz\n'),
+ # mixed with majority of 'CRLF'
+ (b'foo\r\nbar\nbaz\r\n', b'foo\r\nbar\r\nbaz\r\n'),
+ # mixed with majority of 'CR'
+ (b'foo\rbar\nbaz\r', b'foo\rbar\rbaz\r'),
+ # mixed with as much 'LF' as 'CRLF'
+ (b'foo\r\nbar\n', b'foo\nbar\n'),
+ # mixed with as much 'LF' as 'CR'
+ (b'foo\rbar\n', b'foo\nbar\n'),
+ # mixed with as much 'CRLF' as 'CR'
+ (b'foo\r\nbar\r', b'foo\r\nbar\r\n'),
+ # mixed with as much 'CRLF' as 'LF' as 'CR'
+ (b'foo\r\nbar\nbaz\r', b'foo\nbar\nbaz\n'),
+ ),
)
-def test_mixed_line_ending_fix_auto(input_s, expected_retval, output, tmpdir):
+def test_mixed_line_ending_fixes_auto(input_s, output, tmpdir):
path = tmpdir.join('file.txt')
path.write_binary(input_s)
- ret = mixed_line_ending(('--fix=auto', path.strpath))
+ ret = main((path.strpath,))
- assert ret == expected_retval
+ assert ret == 1
assert path.read_binary() == output
-# Input, expected return value, expected output
-TESTS_NO_FIX = (
- # only 'LF'
- (b'foo\nbar\nbaz\n', 0, b'foo\nbar\nbaz\n'),
- # only 'CRLF'
- (b'foo\r\nbar\r\nbaz\r\n', 0, b'foo\r\nbar\r\nbaz\r\n'),
- # only 'CR'
- (b'foo\rbar\rbaz\r', 0, b'foo\rbar\rbaz\r'),
- # mixed with majority of 'LF'
- (b'foo\r\nbar\nbaz\n', 1, b'foo\r\nbar\nbaz\n'),
- # mixed with majority of 'CRLF'
- (b'foo\r\nbar\nbaz\r\n', 1, b'foo\r\nbar\nbaz\r\n'),
- # mixed with majority of 'CR'
- (b'foo\rbar\nbaz\r', 1, b'foo\rbar\nbaz\r'),
- # mixed with as much 'LF' as 'CR'
- (b'foo\rbar\nbaz', 0, b'foo\rbar\nbaz'),
- # mixed with as much 'CRLF' as 'CR'
- (b'foo\r\nbar\nbaz', 0, b'foo\r\nbar\nbaz'),
- # mixed with as much 'CRLF' as 'LF' as 'CR'
- (b'foo\r\nbar\nbaz\r', 0, b'foo\r\nbar\nbaz\r'),
-)
+def test_non_mixed_no_newline_end_of_file(tmpdir):
+ path = tmpdir.join('f.txt')
+ path.write_binary(b'foo\nbar\nbaz')
+ assert not main((path.strpath,))
+ # the hook *could* fix the end of the file, but leaves it alone
+ # this is mostly to document the current behaviour
+ assert path.read_binary() == b'foo\nbar\nbaz'
+
+
+def test_mixed_no_newline_end_of_file(tmpdir):
+ path = tmpdir.join('f.txt')
+ path.write_binary(b'foo\r\nbar\nbaz')
+ assert main((path.strpath,))
+ # the hook rewrites the end of the file, this is slightly inconsistent
+ # with the non-mixed case but I think this is the better behaviour
+ # this is mostly to document the current behaviour
+ assert path.read_binary() == b'foo\nbar\nbaz\n'
@pytest.mark.parametrize(
- ('input_s', 'expected_retval', 'output'),
- TESTS_NO_FIX,
+ ('fix_option', 'input_s'),
+ (
+ # All --fix=auto with uniform line endings should be ok
+ ('--fix=auto', b'foo\r\nbar\r\nbaz\r\n'),
+ ('--fix=auto', b'foo\rbar\rbaz\r'),
+ ('--fix=auto', b'foo\nbar\nbaz\n'),
+ # --fix=crlf with crlf endings
+ ('--fix=crlf', b'foo\r\nbar\r\nbaz\r\n'),
+ # --fix=lf with lf endings
+ ('--fix=lf', b'foo\nbar\nbaz\n'),
+ ),
)
-def test_detect_mixed_line_ending(input_s, expected_retval, output, tmpdir):
- path = tmpdir.join('file.txt')
+def test_line_endings_ok(fix_option, input_s, tmpdir):
+ path = tmpdir.join('input.txt')
path.write_binary(input_s)
- ret = mixed_line_ending(('--fix=no', path.strpath))
+ ret = main((fix_option, path.strpath))
- assert ret == expected_retval
- assert path.read_binary() == output
+ assert ret == 0
+ assert path.read_binary() == input_s
-# Input, expected return value, expected output
-TESTS_FIX_FORCE_LF = (
- # only 'LF'
- (b'foo\nbar\nbaz\n', 1, b'foo\nbar\nbaz\n'),
- # only 'CRLF'
- (b'foo\r\nbar\r\nbaz\r\n', 1, b'foo\nbar\nbaz\n'),
- # only 'CR'
- (b'foo\rbar\rbaz\r', 1, b'foo\nbar\nbaz\n'),
- # mixed with majority of 'LF'
- (b'foo\r\nbar\nbaz\n', 1, b'foo\nbar\nbaz\n'),
- # mixed with majority of 'CRLF'
- (b'foo\r\nbar\nbaz\r\n', 1, b'foo\nbar\nbaz\n'),
- # mixed with majority of 'CR'
- (b'foo\rbar\nbaz\r', 1, b'foo\nbar\nbaz\n'),
- # mixed with as much 'LF' as 'CR'
- (b'foo\rbar\nbaz', 1, b'foo\nbar\nbaz'),
- # mixed with as much 'CRLF' as 'CR'
- (b'foo\r\nbar\nbaz', 1, b'foo\nbar\nbaz'),
- # mixed with as much 'CRLF' as 'LF' as 'CR'
- (b'foo\r\nbar\nbaz\r', 1, b'foo\nbar\nbaz\n'),
-)
+def test_no_fix_does_not_modify(tmpdir):
+ path = tmpdir.join('input.txt')
+ contents = b'foo\r\nbar\rbaz\nwomp\n'
+ path.write_binary(contents)
+ ret = main(('--fix=no', path.strpath))
+
+ assert ret == 1
+ assert path.read_binary() == contents
-@pytest.mark.parametrize(
- ('input_s', 'expected_retval', 'output'),
- TESTS_FIX_FORCE_LF,
-)
-def test_mixed_line_ending_fix_force_lf(
- input_s, expected_retval, output,
- tmpdir,
-):
- path = tmpdir.join('file.txt')
- path.write_binary(input_s)
- ret = mixed_line_ending(('--fix=lf', path.strpath))
+def test_fix_lf(tmpdir):
+ path = tmpdir.join('input.txt')
+ path.write_binary(b'foo\r\nbar\rbaz\n')
+ ret = main(('--fix=lf', path.strpath))
- assert ret == expected_retval
- assert path.read_binary() == output
+ assert ret == 1
+ assert path.read_binary() == b'foo\nbar\nbaz\n'
-# Input, expected return value, expected output
-TESTS_FIX_FORCE_CRLF = (
- # only 'LF'
- (b'foo\nbar\nbaz\n', 1, b'foo\r\nbar\r\nbaz\r\n'),
- # only 'CRLF'
- (b'foo\r\nbar\r\nbaz\r\n', 1, b'foo\r\nbar\r\nbaz\r\n'),
- # only 'CR'
- (b'foo\rbar\rbaz\r', 1, b'foo\r\nbar\r\nbaz\r\n'),
- # mixed with majority of 'LF'
- (b'foo\r\nbar\nbaz\n', 1, b'foo\r\nbar\r\nbaz\r\n'),
- # mixed with majority of 'CRLF'
- (b'foo\r\nbar\nbaz\r\n', 1, b'foo\r\nbar\r\nbaz\r\n'),
- # mixed with majority of 'CR'
- (b'foo\rbar\nbaz\r', 1, b'foo\r\nbar\r\nbaz\r\n'),
- # mixed with as much 'LF' as 'CR'
- (b'foo\rbar\nbaz', 1, b'foo\r\nbar\r\nbaz'),
- # mixed with as much 'CRLF' as 'CR'
- (b'foo\r\nbar\nbaz', 1, b'foo\r\nbar\r\nbaz'),
- # mixed with as much 'CRLF' as 'LF' as 'CR'
- (b'foo\r\nbar\nbaz\r', 1, b'foo\r\nbar\r\nbaz\r\n'),
-)
+def test_fix_crlf(tmpdir):
+ path = tmpdir.join('input.txt')
+ path.write_binary(b'foo\r\nbar\rbaz\n')
+ ret = main(('--fix=crlf', path.strpath))
-
-@pytest.mark.parametrize(
- ('input_s', 'expected_retval', 'output'),
- TESTS_FIX_FORCE_CRLF,
-)
-def test_mixed_line_ending_fix_force_crlf(
- input_s, expected_retval, output,
- tmpdir,
-):
- path = tmpdir.join('file.txt')
- path.write_binary(input_s)
- ret = mixed_line_ending(('--fix=crlf', path.strpath))
-
- assert ret == expected_retval
- assert path.read_binary() == output
+ assert ret == 1
+ assert path.read_binary() == b'foo\r\nbar\r\nbaz\r\n'