Merge branch 'master' into file_contents_sorter_hook
diff --git a/.pre-commit-hooks.yaml b/.pre-commit-hooks.yaml
index eea7bed..c681c45 100644
--- a/.pre-commit-hooks.yaml
+++ b/.pre-commit-hooks.yaml
@@ -153,6 +153,12 @@
entry: requirements-txt-fixer
language: python
files: requirements.*\.txt$
+- id: sort-simple-yaml
+ name: Sort simple YAML files
+ language: python
+ entry: sort-simple-yaml
+ description: Sorts simple YAML files which consist only of top-level keys, preserving comments and blocks.
+ files: '^$'
- id: trailing-whitespace
name: Trim Trailing Whitespace
description: This hook trims trailing whitespace.
diff --git a/README.md b/README.md
index 894bd83..92fb408 100644
--- a/README.md
+++ b/README.md
@@ -68,6 +68,7 @@
- `--no-sort-keys` - when autofixing, retain the original key ordering (instead of sorting the keys)
- `--top-keys comma,separated,keys` - Keys to keep at the top of mappings.
- `requirements-txt-fixer` - Sorts entries in requirements.txt
+- `sort-simple-yaml` - Sorts simple YAML files which consist only of top-level keys, preserving comments and blocks.
- `trailing-whitespace` - Trims trailing whitespace.
- Markdown linebreak trailing spaces preserved for `.md` and`.markdown`;
use `args: ['--markdown-linebreak-ext=txt,text']` to add other extensions,
diff --git a/hooks.yaml b/hooks.yaml
index eea7bed..c681c45 100644
--- a/hooks.yaml
+++ b/hooks.yaml
@@ -153,6 +153,12 @@
entry: requirements-txt-fixer
language: python
files: requirements.*\.txt$
+- id: sort-simple-yaml
+ name: Sort simple YAML files
+ language: python
+ entry: sort-simple-yaml
+ description: Sorts simple YAML files which consist only of top-level keys, preserving comments and blocks.
+ files: '^$'
- id: trailing-whitespace
name: Trim Trailing Whitespace
description: This hook trims trailing whitespace.
diff --git a/pre_commit_hooks/requirements_txt_fixer.py b/pre_commit_hooks/requirements_txt_fixer.py
index efa1906..41e1ffc 100644
--- a/pre_commit_hooks/requirements_txt_fixer.py
+++ b/pre_commit_hooks/requirements_txt_fixer.py
@@ -30,21 +30,25 @@
def fix_requirements(f):
requirements = []
- before = []
+ before = list(f)
after = []
- for line in f:
- before.append(line)
+ before_string = b''.join(before)
- # If the most recent requirement object has a value, then it's time to
- # start building the next requirement object.
+ # If the file is empty (i.e. only whitespace/newlines) exit early
+ if before_string.strip() == b'':
+ return 0
+
+ for line in before:
+ # If the most recent requirement object has a value, then it's
+ # time to start building the next requirement object.
if not len(requirements) or requirements[-1].value is not None:
requirements.append(Requirement())
requirement = requirements[-1]
- # If we see a newline before any requirements, then this is a top of
- # file comment.
+ # If we see a newline before any requirements, then this is a
+ # top of file comment.
if len(requirements) == 1 and line.strip() == b'':
if len(requirement.comments) and requirement.comments[0].startswith(b'#'):
requirement.value = b'\n'
@@ -60,7 +64,6 @@
after.append(comment)
after.append(requirement.value)
- before_string = b''.join(before)
after_string = b''.join(after)
if before_string == after_string:
diff --git a/pre_commit_hooks/sort_simple_yaml.py b/pre_commit_hooks/sort_simple_yaml.py
new file mode 100755
index 0000000..7afae91
--- /dev/null
+++ b/pre_commit_hooks/sort_simple_yaml.py
@@ -0,0 +1,123 @@
+#!/usr/bin/env python
+"""Sort a simple YAML file, keeping blocks of comments and definitions
+together.
+
+We assume a strict subset of YAML that looks like:
+
+ # block of header comments
+ # here that should always
+ # be at the top of the file
+
+ # optional comments
+ # can go here
+ key: value
+ key: value
+
+ key: value
+
+In other words, we don't sort deeper than the top layer, and might corrupt
+complicated YAML files.
+"""
+from __future__ import print_function
+
+import argparse
+
+
+QUOTES = ["'", '"']
+
+
+def sort(lines):
+ """Sort a YAML file in alphabetical order, keeping blocks together.
+
+ :param lines: array of strings (without newlines)
+ :return: sorted array of strings
+ """
+ # make a copy of lines since we will clobber it
+ lines = list(lines)
+ new_lines = parse_block(lines, header=True)
+
+ for block in sorted(parse_blocks(lines), key=first_key):
+ if new_lines:
+ new_lines.append('')
+ new_lines.extend(block)
+
+ return new_lines
+
+
+def parse_block(lines, header=False):
+ """Parse and return a single block, popping off the start of `lines`.
+
+ If parsing a header block, we stop after we reach a line that is not a
+ comment. Otherwise, we stop after reaching an empty line.
+
+ :param lines: list of lines
+ :param header: whether we are parsing a header block
+ :return: list of lines that form the single block
+ """
+ block_lines = []
+ while lines and lines[0] and (not header or lines[0].startswith('#')):
+ block_lines.append(lines.pop(0))
+ return block_lines
+
+
+def parse_blocks(lines):
+ """Parse and return all possible blocks, popping off the start of `lines`.
+
+ :param lines: list of lines
+ :return: list of blocks, where each block is a list of lines
+ """
+ blocks = []
+
+ while lines:
+ if lines[0] == '':
+ lines.pop(0)
+ else:
+ blocks.append(parse_block(lines))
+
+ return blocks
+
+
+def first_key(lines):
+ """Returns a string representing the sort key of a block.
+
+ The sort key is the first YAML key we encounter, ignoring comments, and
+ stripping leading quotes.
+
+ >>> print(test)
+ # some comment
+ 'foo': true
+ >>> first_key(test)
+ 'foo'
+ """
+ for line in lines:
+ if line.startswith('#'):
+ continue
+ if any(line.startswith(quote) for quote in QUOTES):
+ return line[1:]
+ return line
+
+
+def main(argv=None):
+ parser = argparse.ArgumentParser()
+ parser.add_argument('filenames', nargs='*', help='Filenames to fix')
+ args = parser.parse_args(argv)
+
+ retval = 0
+
+ for filename in args.filenames:
+ with open(filename, 'r+') as f:
+ lines = [line.rstrip() for line in f.readlines()]
+ new_lines = sort(lines)
+
+ if lines != new_lines:
+ print("Fixing file `{filename}`".format(filename=filename))
+ f.seek(0)
+ f.write("\n".join(new_lines) + "\n")
+ f.truncate()
+ retval = 1
+
+ return retval
+
+
+if __name__ == '__main__':
+ exit(main())
diff --git a/setup.py b/setup.py
index 3f761f6..c5cceb7 100644
--- a/setup.py
+++ b/setup.py
@@ -56,6 +56,7 @@
'no-commit-to-branch = pre_commit_hooks.no_commit_to_branch:main',
'pretty-format-json = pre_commit_hooks.pretty_format_json:pretty_format_json',
'requirements-txt-fixer = pre_commit_hooks.requirements_txt_fixer:fix_requirements_txt',
+ 'sort-simple-yaml = pre_commit_hooks.sort_simple_yaml:main',
'trailing-whitespace-fixer = pre_commit_hooks.trailing_whitespace_fixer:fix_trailing_whitespace',
],
},
diff --git a/tests/requirements_txt_fixer_test.py b/tests/requirements_txt_fixer_test.py
index 1c590a5..33f6a47 100644
--- a/tests/requirements_txt_fixer_test.py
+++ b/tests/requirements_txt_fixer_test.py
@@ -5,6 +5,8 @@
# Input, expected return value, expected output
TESTS = (
+ (b'', 0, b''),
+ (b'\n', 0, b'\n'),
(b'foo\nbar\n', 1, b'bar\nfoo\n'),
(b'bar\nfoo\n', 0, b'bar\nfoo\n'),
(b'#comment1\nfoo\n#comment2\nbar\n', 1, b'#comment2\nbar\n#comment1\nfoo\n'),
diff --git a/tests/sort_simple_yaml_test.py b/tests/sort_simple_yaml_test.py
new file mode 100644
index 0000000..176d12f
--- /dev/null
+++ b/tests/sort_simple_yaml_test.py
@@ -0,0 +1,120 @@
+from __future__ import absolute_import
+from __future__ import unicode_literals
+
+import os
+
+import pytest
+
+from pre_commit_hooks.sort_simple_yaml import first_key
+from pre_commit_hooks.sort_simple_yaml import main
+from pre_commit_hooks.sort_simple_yaml import parse_block
+from pre_commit_hooks.sort_simple_yaml import parse_blocks
+from pre_commit_hooks.sort_simple_yaml import sort
+
+RETVAL_GOOD = 0
+RETVAL_BAD = 1
+TEST_SORTS = [
+ (
+ ['c: true', '', 'b: 42', 'a: 19'],
+ ['b: 42', 'a: 19', '', 'c: true'],
+ RETVAL_BAD,
+ ),
+
+ (
+ ['# i am', '# a header', '', 'c: true', '', 'b: 42', 'a: 19'],
+ ['# i am', '# a header', '', 'b: 42', 'a: 19', '', 'c: true'],
+ RETVAL_BAD,
+ ),
+
+ (
+ ['# i am', '# a header', '', 'already: sorted', '', 'yup: i am'],
+ ['# i am', '# a header', '', 'already: sorted', '', 'yup: i am'],
+ RETVAL_GOOD,
+ ),
+
+ (
+ ['# i am', '# a header'],
+ ['# i am', '# a header'],
+ RETVAL_GOOD,
+ ),
+]
+
+
+@pytest.mark.parametrize('bad_lines,good_lines,retval', TEST_SORTS)
+def test_integration_good_bad_lines(tmpdir, bad_lines, good_lines, retval):
+ file_path = os.path.join(tmpdir.strpath, 'foo.yaml')
+
+ with open(file_path, 'w') as f:
+ f.write("\n".join(bad_lines) + "\n")
+
+ assert main([file_path]) == retval
+
+ with open(file_path, 'r') as f:
+ assert [line.rstrip() for line in f.readlines()] == good_lines
+
+
+def test_parse_header():
+ lines = ['# some header', '# is here', '', 'this is not a header']
+ assert parse_block(lines, header=True) == ['# some header', '# is here']
+ assert lines == ['', 'this is not a header']
+
+ lines = ['this is not a header']
+ assert parse_block(lines, header=True) == []
+ assert lines == ['this is not a header']
+
+
+def test_parse_block():
+ # a normal block
+ lines = ['a: 42', 'b: 17', '', 'c: 19']
+ assert parse_block(lines) == ['a: 42', 'b: 17']
+ assert lines == ['', 'c: 19']
+
+ # a block at the end
+ lines = ['c: 19']
+ assert parse_block(lines) == ['c: 19']
+ assert lines == []
+
+ # no block
+ lines = []
+ assert parse_block(lines) == []
+ assert lines == []
+
+
+def test_parse_blocks():
+ # normal blocks
+ lines = ['a: 42', 'b: 17', '', 'c: 19']
+ assert parse_blocks(lines) == [['a: 42', 'b: 17'], ['c: 19']]
+ assert lines == []
+
+ # a single block
+ lines = ['a: 42', 'b: 17']
+ assert parse_blocks(lines) == [['a: 42', 'b: 17']]
+ assert lines == []
+
+ # no blocks
+ lines = []
+ assert parse_blocks(lines) == []
+ assert lines == []
+
+
+def test_first_key():
+ # first line
+ lines = ['a: 42', 'b: 17', '', 'c: 19']
+ assert first_key(lines) == 'a: 42'
+
+ # second line
+ lines = ['# some comment', 'a: 42', 'b: 17', '', 'c: 19']
+ assert first_key(lines) == 'a: 42'
+
+ # second line with quotes
+ lines = ['# some comment', '"a": 42', 'b: 17', '', 'c: 19']
+ assert first_key(lines) == 'a": 42'
+
+ # no lines
+ lines = []
+ assert first_key(lines) is None
+
+
+@pytest.mark.parametrize('bad_lines,good_lines,_', TEST_SORTS)
+def test_sort(bad_lines, good_lines, _):
+ assert sort(bad_lines) == good_lines