Daniel Gallagher | b6eff3d | 2017-06-23 16:26:00 -0700 | [diff] [blame] | 1 | """Sort a simple YAML file, keeping blocks of comments and definitions |
| 2 | together. |
| 3 | |
| 4 | We assume a strict subset of YAML that looks like: |
| 5 | |
| 6 | # block of header comments |
| 7 | # here that should always |
| 8 | # be at the top of the file |
| 9 | |
| 10 | # optional comments |
| 11 | # can go here |
| 12 | key: value |
| 13 | key: value |
| 14 | |
| 15 | key: value |
| 16 | |
| 17 | In other words, we don't sort deeper than the top layer, and might corrupt |
| 18 | complicated YAML files. |
| 19 | """ |
Anthony Sottile | 8f61529 | 2022-01-15 19:24:05 -0500 | [diff] [blame] | 20 | from __future__ import annotations |
| 21 | |
Daniel Gallagher | b6eff3d | 2017-06-23 16:26:00 -0700 | [diff] [blame] | 22 | import argparse |
Anthony Sottile | 030bfac | 2019-01-31 19:19:10 -0800 | [diff] [blame] | 23 | from typing import Sequence |
Daniel Gallagher | b6eff3d | 2017-06-23 16:26:00 -0700 | [diff] [blame] | 24 | |
| 25 | |
| 26 | QUOTES = ["'", '"'] |
| 27 | |
| 28 | |
Anthony Sottile | 8f61529 | 2022-01-15 19:24:05 -0500 | [diff] [blame] | 29 | def sort(lines: list[str]) -> list[str]: |
Daniel Gallagher | b6eff3d | 2017-06-23 16:26:00 -0700 | [diff] [blame] | 30 | """Sort a YAML file in alphabetical order, keeping blocks together. |
| 31 | |
| 32 | :param lines: array of strings (without newlines) |
| 33 | :return: sorted array of strings |
| 34 | """ |
| 35 | # make a copy of lines since we will clobber it |
| 36 | lines = list(lines) |
| 37 | new_lines = parse_block(lines, header=True) |
| 38 | |
| 39 | for block in sorted(parse_blocks(lines), key=first_key): |
| 40 | if new_lines: |
| 41 | new_lines.append('') |
| 42 | new_lines.extend(block) |
| 43 | |
| 44 | return new_lines |
| 45 | |
| 46 | |
Anthony Sottile | 8f61529 | 2022-01-15 19:24:05 -0500 | [diff] [blame] | 47 | def parse_block(lines: list[str], header: bool = False) -> list[str]: |
Daniel Gallagher | b6eff3d | 2017-06-23 16:26:00 -0700 | [diff] [blame] | 48 | """Parse and return a single block, popping off the start of `lines`. |
| 49 | |
| 50 | If parsing a header block, we stop after we reach a line that is not a |
| 51 | comment. Otherwise, we stop after reaching an empty line. |
| 52 | |
| 53 | :param lines: list of lines |
| 54 | :param header: whether we are parsing a header block |
| 55 | :return: list of lines that form the single block |
| 56 | """ |
| 57 | block_lines = [] |
| 58 | while lines and lines[0] and (not header or lines[0].startswith('#')): |
| 59 | block_lines.append(lines.pop(0)) |
| 60 | return block_lines |
| 61 | |
| 62 | |
Anthony Sottile | 8f61529 | 2022-01-15 19:24:05 -0500 | [diff] [blame] | 63 | def parse_blocks(lines: list[str]) -> list[list[str]]: |
Daniel Gallagher | b6eff3d | 2017-06-23 16:26:00 -0700 | [diff] [blame] | 64 | """Parse and return all possible blocks, popping off the start of `lines`. |
| 65 | |
| 66 | :param lines: list of lines |
| 67 | :return: list of blocks, where each block is a list of lines |
| 68 | """ |
| 69 | blocks = [] |
| 70 | |
| 71 | while lines: |
| 72 | if lines[0] == '': |
| 73 | lines.pop(0) |
| 74 | else: |
| 75 | blocks.append(parse_block(lines)) |
| 76 | |
| 77 | return blocks |
| 78 | |
| 79 | |
Anthony Sottile | 8f61529 | 2022-01-15 19:24:05 -0500 | [diff] [blame] | 80 | def first_key(lines: list[str]) -> str: |
Daniel Gallagher | b6eff3d | 2017-06-23 16:26:00 -0700 | [diff] [blame] | 81 | """Returns a string representing the sort key of a block. |
| 82 | |
| 83 | The sort key is the first YAML key we encounter, ignoring comments, and |
| 84 | stripping leading quotes. |
| 85 | |
| 86 | >>> print(test) |
| 87 | # some comment |
| 88 | 'foo': true |
| 89 | >>> first_key(test) |
| 90 | 'foo' |
| 91 | """ |
| 92 | for line in lines: |
| 93 | if line.startswith('#'): |
| 94 | continue |
| 95 | if any(line.startswith(quote) for quote in QUOTES): |
| 96 | return line[1:] |
| 97 | return line |
Anthony Sottile | 030bfac | 2019-01-31 19:19:10 -0800 | [diff] [blame] | 98 | else: |
| 99 | return '' # not actually reached in reality |
Daniel Gallagher | b6eff3d | 2017-06-23 16:26:00 -0700 | [diff] [blame] | 100 | |
| 101 | |
Anthony Sottile | 8f61529 | 2022-01-15 19:24:05 -0500 | [diff] [blame] | 102 | def main(argv: Sequence[str] | None = None) -> int: |
Daniel Gallagher | b6eff3d | 2017-06-23 16:26:00 -0700 | [diff] [blame] | 103 | parser = argparse.ArgumentParser() |
| 104 | parser.add_argument('filenames', nargs='*', help='Filenames to fix') |
| 105 | args = parser.parse_args(argv) |
| 106 | |
| 107 | retval = 0 |
| 108 | |
| 109 | for filename in args.filenames: |
| 110 | with open(filename, 'r+') as f: |
| 111 | lines = [line.rstrip() for line in f.readlines()] |
| 112 | new_lines = sort(lines) |
| 113 | |
| 114 | if lines != new_lines: |
Anthony Sottile | f5c42a0 | 2020-02-05 11:10:42 -0800 | [diff] [blame] | 115 | print(f'Fixing file `{filename}`') |
Daniel Gallagher | b6eff3d | 2017-06-23 16:26:00 -0700 | [diff] [blame] | 116 | f.seek(0) |
Anthony Sottile | 8626e26 | 2019-02-11 19:57:37 -0800 | [diff] [blame] | 117 | f.write('\n'.join(new_lines) + '\n') |
Daniel Gallagher | b6eff3d | 2017-06-23 16:26:00 -0700 | [diff] [blame] | 118 | f.truncate() |
| 119 | retval = 1 |
| 120 | |
| 121 | return retval |
| 122 | |
| 123 | |
| 124 | if __name__ == '__main__': |
Anthony Sottile | 39ab2ed | 2021-10-23 13:23:50 -0400 | [diff] [blame] | 125 | raise SystemExit(main()) |