| #!/usr/bin/env python |
| """Sort a simple YAML file, keeping blocks of comments and definitions |
| together. |
| |
| We assume a strict subset of YAML that looks like: |
| |
| # block of header comments |
| # here that should always |
| # be at the top of the file |
| |
| # optional comments |
| # can go here |
| key: value |
| key: value |
| |
| key: value |
| |
| In other words, we don't sort deeper than the top layer, and might corrupt |
| complicated YAML files. |
| """ |
| from __future__ import print_function |
| |
| import argparse |
| from typing import List |
| from typing import Optional |
| from typing import Sequence |
| |
| |
| QUOTES = ["'", '"'] |
| |
| |
| def sort(lines): # type: (List[str]) -> List[str] |
| """Sort a YAML file in alphabetical order, keeping blocks together. |
| |
| :param lines: array of strings (without newlines) |
| :return: sorted array of strings |
| """ |
| # make a copy of lines since we will clobber it |
| lines = list(lines) |
| new_lines = parse_block(lines, header=True) |
| |
| for block in sorted(parse_blocks(lines), key=first_key): |
| if new_lines: |
| new_lines.append('') |
| new_lines.extend(block) |
| |
| return new_lines |
| |
| |
| def parse_block(lines, header=False): # type: (List[str], bool) -> List[str] |
| """Parse and return a single block, popping off the start of `lines`. |
| |
| If parsing a header block, we stop after we reach a line that is not a |
| comment. Otherwise, we stop after reaching an empty line. |
| |
| :param lines: list of lines |
| :param header: whether we are parsing a header block |
| :return: list of lines that form the single block |
| """ |
| block_lines = [] |
| while lines and lines[0] and (not header or lines[0].startswith('#')): |
| block_lines.append(lines.pop(0)) |
| return block_lines |
| |
| |
| def parse_blocks(lines): # type: (List[str]) -> List[List[str]] |
| """Parse and return all possible blocks, popping off the start of `lines`. |
| |
| :param lines: list of lines |
| :return: list of blocks, where each block is a list of lines |
| """ |
| blocks = [] |
| |
| while lines: |
| if lines[0] == '': |
| lines.pop(0) |
| else: |
| blocks.append(parse_block(lines)) |
| |
| return blocks |
| |
| |
| def first_key(lines): # type: (List[str]) -> str |
| """Returns a string representing the sort key of a block. |
| |
| The sort key is the first YAML key we encounter, ignoring comments, and |
| stripping leading quotes. |
| |
| >>> print(test) |
| # some comment |
| 'foo': true |
| >>> first_key(test) |
| 'foo' |
| """ |
| for line in lines: |
| if line.startswith('#'): |
| continue |
| if any(line.startswith(quote) for quote in QUOTES): |
| return line[1:] |
| return line |
| else: |
| return '' # not actually reached in reality |
| |
| |
| def main(argv=None): # type: (Optional[Sequence[str]]) -> int |
| parser = argparse.ArgumentParser() |
| parser.add_argument('filenames', nargs='*', help='Filenames to fix') |
| args = parser.parse_args(argv) |
| |
| retval = 0 |
| |
| for filename in args.filenames: |
| with open(filename, 'r+') as f: |
| lines = [line.rstrip() for line in f.readlines()] |
| new_lines = sort(lines) |
| |
| if lines != new_lines: |
| print("Fixing file `{filename}`".format(filename=filename)) |
| f.seek(0) |
| f.write("\n".join(new_lines) + "\n") |
| f.truncate() |
| retval = 1 |
| |
| return retval |
| |
| |
| if __name__ == '__main__': |
| exit(main()) |