Add sort-simple-yaml hook (originally private hook from yelp_pre_commit_hooks)
diff --git a/.gitignore b/.gitignore
index 2626934..6fdf044 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,6 +2,7 @@
 *.iml
 *.py[co]
 .*.sw[a-z]
+.cache
 .coverage
 .idea
 .project
diff --git a/.pre-commit-hooks.yaml b/.pre-commit-hooks.yaml
index bda3f76..e7f433b 100644
--- a/.pre-commit-hooks.yaml
+++ b/.pre-commit-hooks.yaml
@@ -147,6 +147,12 @@
     entry: requirements-txt-fixer
     language: python
     files: requirements.*\.txt$
+-   id: sort-simple-yaml
+    name: Sort simple YAML files
+    language: python
+    entry: sort-simple-yaml
+    description: Sorts simple YAML files which consist only of top-level keys, preserving comments and blocks.
+    files: '^$'
 -   id: trailing-whitespace
     name: Trim Trailing Whitespace
     description: This hook trims trailing whitespace.
diff --git a/README.md b/README.md
index 3b62234..8db7eef 100644
--- a/README.md
+++ b/README.md
@@ -67,6 +67,7 @@
     - `--no-sort-keys` - when autofixing, retain the original key ordering (instead of sorting the keys)
     - `--top-keys comma,separated,keys` - Keys to keep at the top of mappings.
 - `requirements-txt-fixer` - Sorts entries in requirements.txt
+- `sort-simple-yaml` - Sorts simple YAML files which consist only of top-level keys, preserving comments and blocks.
 - `trailing-whitespace` - Trims trailing whitespace.
     - Markdown linebreak trailing spaces preserved for `.md` and`.markdown`;
       use `args: ['--markdown-linebreak-ext=txt,text']` to add other extensions,
diff --git a/hooks.yaml b/hooks.yaml
index bda3f76..e7f433b 100644
--- a/hooks.yaml
+++ b/hooks.yaml
@@ -147,6 +147,12 @@
     entry: requirements-txt-fixer
     language: python
     files: requirements.*\.txt$
+-   id: sort-simple-yaml
+    name: Sort simple YAML files
+    language: python
+    entry: sort-simple-yaml
+    description: Sorts simple YAML files which consist only of top-level keys, preserving comments and blocks.
+    files: '^$'
 -   id: trailing-whitespace
     name: Trim Trailing Whitespace
     description: This hook trims trailing whitespace.
diff --git a/pre_commit_hooks/sort_simple_yaml.py b/pre_commit_hooks/sort_simple_yaml.py
new file mode 100755
index 0000000..7afae91
--- /dev/null
+++ b/pre_commit_hooks/sort_simple_yaml.py
@@ -0,0 +1,123 @@
+#!/usr/bin/env python
+"""Sort a simple YAML file, keeping blocks of comments and definitions
+together.
+
+We assume a strict subset of YAML that looks like:
+
+    # block of header comments
+    # here that should always
+    # be at the top of the file
+
+    # optional comments
+    # can go here
+    key: value
+    key: value
+
+    key: value
+
+In other words, we don't sort deeper than the top layer, and might corrupt
+complicated YAML files.
+"""
+from __future__ import print_function
+
+import argparse
+
+
+QUOTES = ["'", '"']
+
+
+def sort(lines):
+    """Sort a YAML file in alphabetical order, keeping blocks together.
+
+    :param lines: array of strings (without newlines)
+    :return: sorted array of strings
+    """
+    # make a copy of lines since we will clobber it
+    lines = list(lines)
+    new_lines = parse_block(lines, header=True)
+
+    for block in sorted(parse_blocks(lines), key=first_key):
+        if new_lines:
+            new_lines.append('')
+        new_lines.extend(block)
+
+    return new_lines
+
+
+def parse_block(lines, header=False):
+    """Parse and return a single block, popping off the start of `lines`.
+
+    If parsing a header block, we stop after we reach a line that is not a
+    comment. Otherwise, we stop after reaching an empty line.
+
+    :param lines: list of lines
+    :param header: whether we are parsing a header block
+    :return: list of lines that form the single block
+    """
+    block_lines = []
+    while lines and lines[0] and (not header or lines[0].startswith('#')):
+        block_lines.append(lines.pop(0))
+    return block_lines
+
+
+def parse_blocks(lines):
+    """Parse and return all possible blocks, popping off the start of `lines`.
+
+    :param lines: list of lines
+    :return: list of blocks, where each block is a list of lines
+    """
+    blocks = []
+
+    while lines:
+        if lines[0] == '':
+            lines.pop(0)
+        else:
+            blocks.append(parse_block(lines))
+
+    return blocks
+
+
+def first_key(lines):
+    """Returns a string representing the sort key of a block.
+
+    The sort key is the first YAML key we encounter, ignoring comments, and
+    stripping leading quotes.
+
+    >>> print(test)
+    # some comment
+    'foo': true
+    >>> first_key(test)
+    'foo'
+    """
+    for line in lines:
+        if line.startswith('#'):
+            continue
+        if any(line.startswith(quote) for quote in QUOTES):
+            return line[1:]
+        return line
+
+
+def main(argv=None):
+    parser = argparse.ArgumentParser()
+    parser.add_argument('filenames', nargs='*', help='Filenames to fix')
+    args = parser.parse_args(argv)
+
+    retval = 0
+
+    for filename in args.filenames:
+        with open(filename, 'r+') as f:
+            lines = [line.rstrip() for line in f.readlines()]
+            new_lines = sort(lines)
+
+            if lines != new_lines:
+                print("Fixing file `{filename}`".format(filename=filename))
+                f.seek(0)
+                f.write("\n".join(new_lines) + "\n")
+                f.truncate()
+                retval = 1
+
+    return retval
+
+
+if __name__ == '__main__':
+    exit(main())
diff --git a/setup.py b/setup.py
index 4abb7a2..af21e16 100644
--- a/setup.py
+++ b/setup.py
@@ -55,6 +55,7 @@
             'no-commit-to-branch = pre_commit_hooks.no_commit_to_branch:main',
             'pretty-format-json = pre_commit_hooks.pretty_format_json:pretty_format_json',
             'requirements-txt-fixer = pre_commit_hooks.requirements_txt_fixer:fix_requirements_txt',
+            'sort-simple-yaml = pre_commit_hooks.sort_simple_yaml:main',
             'trailing-whitespace-fixer = pre_commit_hooks.trailing_whitespace_fixer:fix_trailing_whitespace',
         ],
     },
diff --git a/tests/sort_simple_yaml_test.py b/tests/sort_simple_yaml_test.py
new file mode 100644
index 0000000..176d12f
--- /dev/null
+++ b/tests/sort_simple_yaml_test.py
@@ -0,0 +1,120 @@
+from __future__ import absolute_import
+from __future__ import unicode_literals
+
+import os
+
+import pytest
+
+from pre_commit_hooks.sort_simple_yaml import first_key
+from pre_commit_hooks.sort_simple_yaml import main
+from pre_commit_hooks.sort_simple_yaml import parse_block
+from pre_commit_hooks.sort_simple_yaml import parse_blocks
+from pre_commit_hooks.sort_simple_yaml import sort
+
+RETVAL_GOOD = 0
+RETVAL_BAD = 1
+TEST_SORTS = [
+    (
+        ['c: true', '', 'b: 42', 'a: 19'],
+        ['b: 42', 'a: 19', '', 'c: true'],
+        RETVAL_BAD,
+    ),
+
+    (
+        ['# i am', '# a header', '', 'c: true', '', 'b: 42', 'a: 19'],
+        ['# i am', '# a header', '', 'b: 42', 'a: 19', '', 'c: true'],
+        RETVAL_BAD,
+    ),
+
+    (
+        ['# i am', '# a header', '', 'already: sorted', '', 'yup: i am'],
+        ['# i am', '# a header', '', 'already: sorted', '', 'yup: i am'],
+        RETVAL_GOOD,
+    ),
+
+    (
+        ['# i am', '# a header'],
+        ['# i am', '# a header'],
+        RETVAL_GOOD,
+    ),
+]
+
+
+@pytest.mark.parametrize('bad_lines,good_lines,retval', TEST_SORTS)
+def test_integration_good_bad_lines(tmpdir, bad_lines, good_lines, retval):
+    file_path = os.path.join(tmpdir.strpath, 'foo.yaml')
+
+    with open(file_path, 'w') as f:
+        f.write("\n".join(bad_lines) + "\n")
+
+    assert main([file_path]) == retval
+
+    with open(file_path, 'r') as f:
+        assert [line.rstrip() for line in f.readlines()] == good_lines
+
+
+def test_parse_header():
+    lines = ['# some header', '# is here', '', 'this is not a header']
+    assert parse_block(lines, header=True) == ['# some header', '# is here']
+    assert lines == ['', 'this is not a header']
+
+    lines = ['this is not a header']
+    assert parse_block(lines, header=True) == []
+    assert lines == ['this is not a header']
+
+
+def test_parse_block():
+    # a normal block
+    lines = ['a: 42', 'b: 17', '', 'c: 19']
+    assert parse_block(lines) == ['a: 42', 'b: 17']
+    assert lines == ['', 'c: 19']
+
+    # a block at the end
+    lines = ['c: 19']
+    assert parse_block(lines) == ['c: 19']
+    assert lines == []
+
+    # no block
+    lines = []
+    assert parse_block(lines) == []
+    assert lines == []
+
+
+def test_parse_blocks():
+    # normal blocks
+    lines = ['a: 42', 'b: 17', '', 'c: 19']
+    assert parse_blocks(lines) == [['a: 42', 'b: 17'], ['c: 19']]
+    assert lines == []
+
+    # a single block
+    lines = ['a: 42', 'b: 17']
+    assert parse_blocks(lines) == [['a: 42', 'b: 17']]
+    assert lines == []
+
+    # no blocks
+    lines = []
+    assert parse_blocks(lines) == []
+    assert lines == []
+
+
+def test_first_key():
+    # first line
+    lines = ['a: 42', 'b: 17', '', 'c: 19']
+    assert first_key(lines) == 'a: 42'
+
+    # second line
+    lines = ['# some comment', 'a: 42', 'b: 17', '', 'c: 19']
+    assert first_key(lines) == 'a: 42'
+
+    # second line with quotes
+    lines = ['# some comment', '"a": 42', 'b: 17', '', 'c: 19']
+    assert first_key(lines) == 'a": 42'
+
+    # no lines
+    lines = []
+    assert first_key(lines) is None
+
+
+@pytest.mark.parametrize('bad_lines,good_lines,_', TEST_SORTS)
+def test_sort(bad_lines, good_lines, _):
+    assert sort(bad_lines) == good_lines