Merge branch 'master' into file_contents_sorter_hook
diff --git a/.pre-commit-hooks.yaml b/.pre-commit-hooks.yaml index eea7bed..c681c45 100644 --- a/.pre-commit-hooks.yaml +++ b/.pre-commit-hooks.yaml
@@ -153,6 +153,12 @@ entry: requirements-txt-fixer language: python files: requirements.*\.txt$ +- id: sort-simple-yaml + name: Sort simple YAML files + language: python + entry: sort-simple-yaml + description: Sorts simple YAML files which consist only of top-level keys, preserving comments and blocks. + files: '^$' - id: trailing-whitespace name: Trim Trailing Whitespace description: This hook trims trailing whitespace.
diff --git a/README.md b/README.md index 894bd83..92fb408 100644 --- a/README.md +++ b/README.md
@@ -68,6 +68,7 @@ - `--no-sort-keys` - when autofixing, retain the original key ordering (instead of sorting the keys) - `--top-keys comma,separated,keys` - Keys to keep at the top of mappings. - `requirements-txt-fixer` - Sorts entries in requirements.txt +- `sort-simple-yaml` - Sorts simple YAML files which consist only of top-level keys, preserving comments and blocks. - `trailing-whitespace` - Trims trailing whitespace. - Markdown linebreak trailing spaces preserved for `.md` and`.markdown`; use `args: ['--markdown-linebreak-ext=txt,text']` to add other extensions,
diff --git a/hooks.yaml b/hooks.yaml index eea7bed..c681c45 100644 --- a/hooks.yaml +++ b/hooks.yaml
@@ -153,6 +153,12 @@ entry: requirements-txt-fixer language: python files: requirements.*\.txt$ +- id: sort-simple-yaml + name: Sort simple YAML files + language: python + entry: sort-simple-yaml + description: Sorts simple YAML files which consist only of top-level keys, preserving comments and blocks. + files: '^$' - id: trailing-whitespace name: Trim Trailing Whitespace description: This hook trims trailing whitespace.
diff --git a/pre_commit_hooks/requirements_txt_fixer.py b/pre_commit_hooks/requirements_txt_fixer.py index efa1906..41e1ffc 100644 --- a/pre_commit_hooks/requirements_txt_fixer.py +++ b/pre_commit_hooks/requirements_txt_fixer.py
@@ -30,21 +30,25 @@ def fix_requirements(f): requirements = [] - before = [] + before = list(f) after = [] - for line in f: - before.append(line) + before_string = b''.join(before) - # If the most recent requirement object has a value, then it's time to - # start building the next requirement object. + # If the file is empty (i.e. only whitespace/newlines) exit early + if before_string.strip() == b'': + return 0 + + for line in before: + # If the most recent requirement object has a value, then it's + # time to start building the next requirement object. if not len(requirements) or requirements[-1].value is not None: requirements.append(Requirement()) requirement = requirements[-1] - # If we see a newline before any requirements, then this is a top of - # file comment. + # If we see a newline before any requirements, then this is a + # top of file comment. if len(requirements) == 1 and line.strip() == b'': if len(requirement.comments) and requirement.comments[0].startswith(b'#'): requirement.value = b'\n' @@ -60,7 +64,6 @@ after.append(comment) after.append(requirement.value) - before_string = b''.join(before) after_string = b''.join(after) if before_string == after_string:
diff --git a/pre_commit_hooks/sort_simple_yaml.py b/pre_commit_hooks/sort_simple_yaml.py new file mode 100755 index 0000000..7afae91 --- /dev/null +++ b/pre_commit_hooks/sort_simple_yaml.py
@@ -0,0 +1,123 @@ +#!/usr/bin/env python +"""Sort a simple YAML file, keeping blocks of comments and definitions +together. + +We assume a strict subset of YAML that looks like: + + # block of header comments + # here that should always + # be at the top of the file + + # optional comments + # can go here + key: value + key: value + + key: value + +In other words, we don't sort deeper than the top layer, and might corrupt +complicated YAML files. +""" +from __future__ import print_function + +import argparse + + +QUOTES = ["'", '"'] + + +def sort(lines): + """Sort a YAML file in alphabetical order, keeping blocks together. + + :param lines: array of strings (without newlines) + :return: sorted array of strings + """ + # make a copy of lines since we will clobber it + lines = list(lines) + new_lines = parse_block(lines, header=True) + + for block in sorted(parse_blocks(lines), key=first_key): + if new_lines: + new_lines.append('') + new_lines.extend(block) + + return new_lines + + +def parse_block(lines, header=False): + """Parse and return a single block, popping off the start of `lines`. + + If parsing a header block, we stop after we reach a line that is not a + comment. Otherwise, we stop after reaching an empty line. + + :param lines: list of lines + :param header: whether we are parsing a header block + :return: list of lines that form the single block + """ + block_lines = [] + while lines and lines[0] and (not header or lines[0].startswith('#')): + block_lines.append(lines.pop(0)) + return block_lines + + +def parse_blocks(lines): + """Parse and return all possible blocks, popping off the start of `lines`. + + :param lines: list of lines + :return: list of blocks, where each block is a list of lines + """ + blocks = [] + + while lines: + if lines[0] == '': + lines.pop(0) + else: + blocks.append(parse_block(lines)) + + return blocks + + +def first_key(lines): + """Returns a string representing the sort key of a block. + + The sort key is the first YAML key we encounter, ignoring comments, and + stripping leading quotes. + + >>> print(test) + # some comment + 'foo': true + >>> first_key(test) + 'foo' + """ + for line in lines: + if line.startswith('#'): + continue + if any(line.startswith(quote) for quote in QUOTES): + return line[1:] + return line + + +def main(argv=None): + parser = argparse.ArgumentParser() + parser.add_argument('filenames', nargs='*', help='Filenames to fix') + args = parser.parse_args(argv) + + retval = 0 + + for filename in args.filenames: + with open(filename, 'r+') as f: + lines = [line.rstrip() for line in f.readlines()] + new_lines = sort(lines) + + if lines != new_lines: + print("Fixing file `{filename}`".format(filename=filename)) + f.seek(0) + f.write("\n".join(new_lines) + "\n") + f.truncate() + retval = 1 + + return retval + + +if __name__ == '__main__': + exit(main())
diff --git a/setup.py b/setup.py index 3f761f6..c5cceb7 100644 --- a/setup.py +++ b/setup.py
@@ -56,6 +56,7 @@ 'no-commit-to-branch = pre_commit_hooks.no_commit_to_branch:main', 'pretty-format-json = pre_commit_hooks.pretty_format_json:pretty_format_json', 'requirements-txt-fixer = pre_commit_hooks.requirements_txt_fixer:fix_requirements_txt', + 'sort-simple-yaml = pre_commit_hooks.sort_simple_yaml:main', 'trailing-whitespace-fixer = pre_commit_hooks.trailing_whitespace_fixer:fix_trailing_whitespace', ], },
diff --git a/tests/requirements_txt_fixer_test.py b/tests/requirements_txt_fixer_test.py index 1c590a5..33f6a47 100644 --- a/tests/requirements_txt_fixer_test.py +++ b/tests/requirements_txt_fixer_test.py
@@ -5,6 +5,8 @@ # Input, expected return value, expected output TESTS = ( + (b'', 0, b''), + (b'\n', 0, b'\n'), (b'foo\nbar\n', 1, b'bar\nfoo\n'), (b'bar\nfoo\n', 0, b'bar\nfoo\n'), (b'#comment1\nfoo\n#comment2\nbar\n', 1, b'#comment2\nbar\n#comment1\nfoo\n'),
diff --git a/tests/sort_simple_yaml_test.py b/tests/sort_simple_yaml_test.py new file mode 100644 index 0000000..176d12f --- /dev/null +++ b/tests/sort_simple_yaml_test.py
@@ -0,0 +1,120 @@ +from __future__ import absolute_import +from __future__ import unicode_literals + +import os + +import pytest + +from pre_commit_hooks.sort_simple_yaml import first_key +from pre_commit_hooks.sort_simple_yaml import main +from pre_commit_hooks.sort_simple_yaml import parse_block +from pre_commit_hooks.sort_simple_yaml import parse_blocks +from pre_commit_hooks.sort_simple_yaml import sort + +RETVAL_GOOD = 0 +RETVAL_BAD = 1 +TEST_SORTS = [ + ( + ['c: true', '', 'b: 42', 'a: 19'], + ['b: 42', 'a: 19', '', 'c: true'], + RETVAL_BAD, + ), + + ( + ['# i am', '# a header', '', 'c: true', '', 'b: 42', 'a: 19'], + ['# i am', '# a header', '', 'b: 42', 'a: 19', '', 'c: true'], + RETVAL_BAD, + ), + + ( + ['# i am', '# a header', '', 'already: sorted', '', 'yup: i am'], + ['# i am', '# a header', '', 'already: sorted', '', 'yup: i am'], + RETVAL_GOOD, + ), + + ( + ['# i am', '# a header'], + ['# i am', '# a header'], + RETVAL_GOOD, + ), +] + + +@pytest.mark.parametrize('bad_lines,good_lines,retval', TEST_SORTS) +def test_integration_good_bad_lines(tmpdir, bad_lines, good_lines, retval): + file_path = os.path.join(tmpdir.strpath, 'foo.yaml') + + with open(file_path, 'w') as f: + f.write("\n".join(bad_lines) + "\n") + + assert main([file_path]) == retval + + with open(file_path, 'r') as f: + assert [line.rstrip() for line in f.readlines()] == good_lines + + +def test_parse_header(): + lines = ['# some header', '# is here', '', 'this is not a header'] + assert parse_block(lines, header=True) == ['# some header', '# is here'] + assert lines == ['', 'this is not a header'] + + lines = ['this is not a header'] + assert parse_block(lines, header=True) == [] + assert lines == ['this is not a header'] + + +def test_parse_block(): + # a normal block + lines = ['a: 42', 'b: 17', '', 'c: 19'] + assert parse_block(lines) == ['a: 42', 'b: 17'] + assert lines == ['', 'c: 19'] + + # a block at the end + lines = ['c: 19'] + assert parse_block(lines) == ['c: 19'] + assert lines == [] + + # no block + lines = [] + assert parse_block(lines) == [] + assert lines == [] + + +def test_parse_blocks(): + # normal blocks + lines = ['a: 42', 'b: 17', '', 'c: 19'] + assert parse_blocks(lines) == [['a: 42', 'b: 17'], ['c: 19']] + assert lines == [] + + # a single block + lines = ['a: 42', 'b: 17'] + assert parse_blocks(lines) == [['a: 42', 'b: 17']] + assert lines == [] + + # no blocks + lines = [] + assert parse_blocks(lines) == [] + assert lines == [] + + +def test_first_key(): + # first line + lines = ['a: 42', 'b: 17', '', 'c: 19'] + assert first_key(lines) == 'a: 42' + + # second line + lines = ['# some comment', 'a: 42', 'b: 17', '', 'c: 19'] + assert first_key(lines) == 'a: 42' + + # second line with quotes + lines = ['# some comment', '"a": 42', 'b: 17', '', 'c: 19'] + assert first_key(lines) == 'a": 42' + + # no lines + lines = [] + assert first_key(lines) is None + + +@pytest.mark.parametrize('bad_lines,good_lines,_', TEST_SORTS) +def test_sort(bad_lines, good_lines, _): + assert sort(bad_lines) == good_lines