Merge branch 'master' into file_contents_sorter_hook

commit: d188f2ecf80c23e906fd510e869c1e59f27cdad8 [log] [tgz]
author: Daniel Gallagher <daniellg@yelp.com> Sun Jun 25 09:39:21 2017 -0700
committer: Daniel Gallagher <daniellg@yelp.com> Sun Jun 25 09:39:21 2017 -0700
tree: fecdff532637df626f259f8b1cc6b18d0dac96a6
parent: 05d9c8c8051446dc14aec45a859f39ae22c01bdb [diff]
parent: d419bef35cd5870e637e0af93cb0df63e3520e07 [diff]
diff --git a/.pre-commit-hooks.yaml b/.pre-commit-hooks.yaml
index eea7bed..c681c45 100644
--- a/.pre-commit-hooks.yaml
+++ b/.pre-commit-hooks.yaml

@@ -153,6 +153,12 @@
     entry: requirements-txt-fixer
     language: python
     files: requirements.*\.txt$
+-   id: sort-simple-yaml
+    name: Sort simple YAML files
+    language: python
+    entry: sort-simple-yaml
+    description: Sorts simple YAML files which consist only of top-level keys, preserving comments and blocks.
+    files: '^$'
 -   id: trailing-whitespace
     name: Trim Trailing Whitespace
     description: This hook trims trailing whitespace.

diff --git a/README.md b/README.md
index 894bd83..92fb408 100644
--- a/README.md
+++ b/README.md

@@ -68,6 +68,7 @@
     - `--no-sort-keys` - when autofixing, retain the original key ordering (instead of sorting the keys)
     - `--top-keys comma,separated,keys` - Keys to keep at the top of mappings.
 - `requirements-txt-fixer` - Sorts entries in requirements.txt
+- `sort-simple-yaml` - Sorts simple YAML files which consist only of top-level keys, preserving comments and blocks.
 - `trailing-whitespace` - Trims trailing whitespace.
     - Markdown linebreak trailing spaces preserved for `.md` and`.markdown`;
       use `args: ['--markdown-linebreak-ext=txt,text']` to add other extensions,

diff --git a/hooks.yaml b/hooks.yaml
index eea7bed..c681c45 100644
--- a/hooks.yaml
+++ b/hooks.yaml

@@ -153,6 +153,12 @@
     entry: requirements-txt-fixer
     language: python
     files: requirements.*\.txt$
+-   id: sort-simple-yaml
+    name: Sort simple YAML files
+    language: python
+    entry: sort-simple-yaml
+    description: Sorts simple YAML files which consist only of top-level keys, preserving comments and blocks.
+    files: '^$'
 -   id: trailing-whitespace
     name: Trim Trailing Whitespace
     description: This hook trims trailing whitespace.

diff --git a/pre_commit_hooks/requirements_txt_fixer.py b/pre_commit_hooks/requirements_txt_fixer.py
index efa1906..41e1ffc 100644
--- a/pre_commit_hooks/requirements_txt_fixer.py
+++ b/pre_commit_hooks/requirements_txt_fixer.py

@@ -30,21 +30,25 @@
 
 def fix_requirements(f):
     requirements = []
-    before = []
+    before = list(f)
     after = []
 
-    for line in f:
-        before.append(line)
+    before_string = b''.join(before)
 
-        # If the most recent requirement object has a value, then it's time to
-        # start building the next requirement object.
+    # If the file is empty (i.e. only whitespace/newlines) exit early
+    if before_string.strip() == b'':
+        return 0
+
+    for line in before:
+        # If the most recent requirement object has a value, then it's
+        # time to start building the next requirement object.
         if not len(requirements) or requirements[-1].value is not None:
             requirements.append(Requirement())
 
         requirement = requirements[-1]
 
-        # If we see a newline before any requirements, then this is a top of
-        # file comment.
+        # If we see a newline before any requirements, then this is a
+        # top of file comment.
         if len(requirements) == 1 and line.strip() == b'':
             if len(requirement.comments) and requirement.comments[0].startswith(b'#'):
                 requirement.value = b'\n'
@@ -60,7 +64,6 @@
             after.append(comment)
         after.append(requirement.value)
 
-    before_string = b''.join(before)
     after_string = b''.join(after)
 
     if before_string == after_string:

diff --git a/pre_commit_hooks/sort_simple_yaml.py b/pre_commit_hooks/sort_simple_yaml.py
new file mode 100755
index 0000000..7afae91
--- /dev/null
+++ b/pre_commit_hooks/sort_simple_yaml.py

@@ -0,0 +1,123 @@
+#!/usr/bin/env python
+"""Sort a simple YAML file, keeping blocks of comments and definitions
+together.
+
+We assume a strict subset of YAML that looks like:
+
+    # block of header comments
+    # here that should always
+    # be at the top of the file
+
+    # optional comments
+    # can go here
+    key: value
+    key: value
+
+    key: value
+
+In other words, we don't sort deeper than the top layer, and might corrupt
+complicated YAML files.
+"""
+from __future__ import print_function
+
+import argparse
+
+
+QUOTES = ["'", '"']
+
+
+def sort(lines):
+    """Sort a YAML file in alphabetical order, keeping blocks together.
+
+    :param lines: array of strings (without newlines)
+    :return: sorted array of strings
+    """
+    # make a copy of lines since we will clobber it
+    lines = list(lines)
+    new_lines = parse_block(lines, header=True)
+
+    for block in sorted(parse_blocks(lines), key=first_key):
+        if new_lines:
+            new_lines.append('')
+        new_lines.extend(block)
+
+    return new_lines
+
+
+def parse_block(lines, header=False):
+    """Parse and return a single block, popping off the start of `lines`.
+
+    If parsing a header block, we stop after we reach a line that is not a
+    comment. Otherwise, we stop after reaching an empty line.
+
+    :param lines: list of lines
+    :param header: whether we are parsing a header block
+    :return: list of lines that form the single block
+    """
+    block_lines = []
+    while lines and lines[0] and (not header or lines[0].startswith('#')):
+        block_lines.append(lines.pop(0))
+    return block_lines
+
+
+def parse_blocks(lines):
+    """Parse and return all possible blocks, popping off the start of `lines`.
+
+    :param lines: list of lines
+    :return: list of blocks, where each block is a list of lines
+    """
+    blocks = []
+
+    while lines:
+        if lines[0] == '':
+            lines.pop(0)
+        else:
+            blocks.append(parse_block(lines))
+
+    return blocks
+
+
+def first_key(lines):
+    """Returns a string representing the sort key of a block.
+
+    The sort key is the first YAML key we encounter, ignoring comments, and
+    stripping leading quotes.
+
+    >>> print(test)
+    # some comment
+    'foo': true
+    >>> first_key(test)
+    'foo'
+    """
+    for line in lines:
+        if line.startswith('#'):
+            continue
+        if any(line.startswith(quote) for quote in QUOTES):
+            return line[1:]
+        return line
+
+
+def main(argv=None):
+    parser = argparse.ArgumentParser()
+    parser.add_argument('filenames', nargs='*', help='Filenames to fix')
+    args = parser.parse_args(argv)
+
+    retval = 0
+
+    for filename in args.filenames:
+        with open(filename, 'r+') as f:
+            lines = [line.rstrip() for line in f.readlines()]
+            new_lines = sort(lines)
+
+            if lines != new_lines:
+                print("Fixing file `{filename}`".format(filename=filename))
+                f.seek(0)
+                f.write("\n".join(new_lines) + "\n")
+                f.truncate()
+                retval = 1
+
+    return retval
+
+
+if __name__ == '__main__':
+    exit(main())

diff --git a/setup.py b/setup.py
index 3f761f6..c5cceb7 100644
--- a/setup.py
+++ b/setup.py

@@ -56,6 +56,7 @@
             'no-commit-to-branch = pre_commit_hooks.no_commit_to_branch:main',
             'pretty-format-json = pre_commit_hooks.pretty_format_json:pretty_format_json',
             'requirements-txt-fixer = pre_commit_hooks.requirements_txt_fixer:fix_requirements_txt',
+            'sort-simple-yaml = pre_commit_hooks.sort_simple_yaml:main',
             'trailing-whitespace-fixer = pre_commit_hooks.trailing_whitespace_fixer:fix_trailing_whitespace',
         ],
     },

diff --git a/tests/requirements_txt_fixer_test.py b/tests/requirements_txt_fixer_test.py
index 1c590a5..33f6a47 100644
--- a/tests/requirements_txt_fixer_test.py
+++ b/tests/requirements_txt_fixer_test.py

@@ -5,6 +5,8 @@
 
 # Input, expected return value, expected output
 TESTS = (
+    (b'', 0, b''),
+    (b'\n', 0, b'\n'),
     (b'foo\nbar\n', 1, b'bar\nfoo\n'),
     (b'bar\nfoo\n', 0, b'bar\nfoo\n'),
     (b'#comment1\nfoo\n#comment2\nbar\n', 1, b'#comment2\nbar\n#comment1\nfoo\n'),

diff --git a/tests/sort_simple_yaml_test.py b/tests/sort_simple_yaml_test.py
new file mode 100644
index 0000000..176d12f
--- /dev/null
+++ b/tests/sort_simple_yaml_test.py

@@ -0,0 +1,120 @@
+from __future__ import absolute_import
+from __future__ import unicode_literals
+
+import os
+
+import pytest
+
+from pre_commit_hooks.sort_simple_yaml import first_key
+from pre_commit_hooks.sort_simple_yaml import main
+from pre_commit_hooks.sort_simple_yaml import parse_block
+from pre_commit_hooks.sort_simple_yaml import parse_blocks
+from pre_commit_hooks.sort_simple_yaml import sort
+
+RETVAL_GOOD = 0
+RETVAL_BAD = 1
+TEST_SORTS = [
+    (
+        ['c: true', '', 'b: 42', 'a: 19'],
+        ['b: 42', 'a: 19', '', 'c: true'],
+        RETVAL_BAD,
+    ),
+
+    (
+        ['# i am', '# a header', '', 'c: true', '', 'b: 42', 'a: 19'],
+        ['# i am', '# a header', '', 'b: 42', 'a: 19', '', 'c: true'],
+        RETVAL_BAD,
+    ),
+
+    (
+        ['# i am', '# a header', '', 'already: sorted', '', 'yup: i am'],
+        ['# i am', '# a header', '', 'already: sorted', '', 'yup: i am'],
+        RETVAL_GOOD,
+    ),
+
+    (
+        ['# i am', '# a header'],
+        ['# i am', '# a header'],
+        RETVAL_GOOD,
+    ),
+]
+
+
+@pytest.mark.parametrize('bad_lines,good_lines,retval', TEST_SORTS)
+def test_integration_good_bad_lines(tmpdir, bad_lines, good_lines, retval):
+    file_path = os.path.join(tmpdir.strpath, 'foo.yaml')
+
+    with open(file_path, 'w') as f:
+        f.write("\n".join(bad_lines) + "\n")
+
+    assert main([file_path]) == retval
+
+    with open(file_path, 'r') as f:
+        assert [line.rstrip() for line in f.readlines()] == good_lines
+
+
+def test_parse_header():
+    lines = ['# some header', '# is here', '', 'this is not a header']
+    assert parse_block(lines, header=True) == ['# some header', '# is here']
+    assert lines == ['', 'this is not a header']
+
+    lines = ['this is not a header']
+    assert parse_block(lines, header=True) == []
+    assert lines == ['this is not a header']
+
+
+def test_parse_block():
+    # a normal block
+    lines = ['a: 42', 'b: 17', '', 'c: 19']
+    assert parse_block(lines) == ['a: 42', 'b: 17']
+    assert lines == ['', 'c: 19']
+
+    # a block at the end
+    lines = ['c: 19']
+    assert parse_block(lines) == ['c: 19']
+    assert lines == []
+
+    # no block
+    lines = []
+    assert parse_block(lines) == []
+    assert lines == []
+
+
+def test_parse_blocks():
+    # normal blocks
+    lines = ['a: 42', 'b: 17', '', 'c: 19']
+    assert parse_blocks(lines) == [['a: 42', 'b: 17'], ['c: 19']]
+    assert lines == []
+
+    # a single block
+    lines = ['a: 42', 'b: 17']
+    assert parse_blocks(lines) == [['a: 42', 'b: 17']]
+    assert lines == []
+
+    # no blocks
+    lines = []
+    assert parse_blocks(lines) == []
+    assert lines == []
+
+
+def test_first_key():
+    # first line
+    lines = ['a: 42', 'b: 17', '', 'c: 19']
+    assert first_key(lines) == 'a: 42'
+
+    # second line
+    lines = ['# some comment', 'a: 42', 'b: 17', '', 'c: 19']
+    assert first_key(lines) == 'a: 42'
+
+    # second line with quotes
+    lines = ['# some comment', '"a": 42', 'b: 17', '', 'c: 19']
+    assert first_key(lines) == 'a": 42'
+
+    # no lines
+    lines = []
+    assert first_key(lines) is None
+
+
+@pytest.mark.parametrize('bad_lines,good_lines,_', TEST_SORTS)
+def test_sort(bad_lines, good_lines, _):
+    assert sort(bad_lines) == good_lines
commit	d188f2ecf80c23e906fd510e869c1e59f27cdad8	[log] [tgz]
author	Daniel Gallagher <daniellg@yelp.com>	Sun Jun 25 09:39:21 2017 -0700
committer	Daniel Gallagher <daniellg@yelp.com>	Sun Jun 25 09:39:21 2017 -0700
tree	fecdff532637df626f259f8b1cc6b18d0dac96a6
parent	05d9c8c8051446dc14aec45a859f39ae22c01bdb [diff]
parent	d419bef35cd5870e637e0af93cb0df63e3520e07 [diff]