Add check-docstring-first hook.
diff --git a/README.md b/README.md
index 8d17026..08a3bcc 100644
--- a/README.md
+++ b/README.md
@@ -23,7 +23,8 @@
### Hooks available
- `autopep8-wrapper` - Runs autopep8 over python source.
-' `check-added-large-files` - Prevent giant files from being committed.
+- `check-added-large-files` - Prevent giant files from being committed.
+- `check-docstring-first` - Checks a common error of defining a docstring after code.
- `check-json` - Attempts to load all json files to verify syntax.
- `check-yaml` - Attempts to load all yaml files to verify syntax.
- `debug-statements` - Check for pdb / ipdb / pudb statements in code.
diff --git a/hooks.yaml b/hooks.yaml
index 3eb0e94..fb9cc82 100644
--- a/hooks.yaml
+++ b/hooks.yaml
@@ -7,11 +7,17 @@
args: [-i]
- id: check-added-large-files
name: Check for added large files
- language: python
- entry: check-added-large-files
description: Prevent giant files from being committed
+ entry: check-added-large-files
+ language: python
# Match all files
files: ''
+- id: check-docstring-first
+ name: Check docstring is first
+ description: Checks a common error of defining a docstring after code.
+ entry: check-docstring-first
+ language: python
+ files: \.py$
- id: check-json
name: Check JSON
description: This hook checks json files for parseable syntax.
diff --git a/pre_commit_hooks/check_docstring_first.py b/pre_commit_hooks/check_docstring_first.py
new file mode 100644
index 0000000..da5425d
--- /dev/null
+++ b/pre_commit_hooks/check_docstring_first.py
@@ -0,0 +1,63 @@
+from __future__ import absolute_import
+from __future__ import unicode_literals
+
+import argparse
+import io
+import tokenize
+
+
+NON_CODE_TOKENS = frozenset((
+ tokenize.COMMENT, tokenize.ENDMARKER, tokenize.NEWLINE, tokenize.NL,
+))
+
+
+def check_docstring_first(src, filename='<unknown>'):
+ """Returns nonzero if the source has what looks like a docstring that is
+ not at the beginning of the source.
+
+ A string will be considered a docstring if it is a STRING token with a
+ col offset of 0.
+ """
+ found_docstring_line = None
+ found_code_line = None
+
+ tok_gen = tokenize.generate_tokens(io.StringIO(src).readline)
+ for tok_type, _, (sline, scol), _, _ in tok_gen:
+ # Looks like a docstring!
+ if tok_type == tokenize.STRING and scol == 0:
+ if found_docstring_line is not None:
+ print(
+ '{0}:{1} Multiple module docstrings '
+ '(first docstring on line {2}).'.format(
+ filename, sline, found_docstring_line,
+ )
+ )
+ return 1
+ elif found_code_line is not None:
+ print(
+ '{0}:{1} Module docstring appears after code '
+ '(code seen on line {2}).'.format(
+ filename, sline, found_code_line,
+ )
+ )
+ return 1
+ else:
+ found_docstring_line = sline
+ elif tok_type not in NON_CODE_TOKENS and found_code_line is None:
+ found_code_line = sline
+
+ return 0
+
+
+def main(argv=None):
+ parser = argparse.ArgumentParser()
+ parser.add_argument('filenames', nargs='*')
+ args = parser.parse_args(argv)
+
+ retv = 0
+
+ for filename in args.filenames:
+ contents = io.open(filename).read()
+ retv |= check_docstring_first(contents, filename=filename)
+
+ return retv
diff --git a/setup.py b/setup.py
index 3681e65..26991de 100644
--- a/setup.py
+++ b/setup.py
@@ -37,8 +37,9 @@
entry_points={
'console_scripts': [
'autopep8-wrapper = pre_commit_hooks.autopep8_wrapper:main',
- 'check-json = pre_commit_hooks.check_json:check_json',
'check-added-large-files = pre_commit_hooks.check_added_large_files:main',
+ 'check-docstring-first = pre_commit_hooks.check_docstring_first:main',
+ 'check-json = pre_commit_hooks.check_json:check_json',
'check-yaml = pre_commit_hooks.check_yaml:check_yaml',
'debug-statement-hook = pre_commit_hooks.debug_statement_hook:debug_statement_hook',
'end-of-file-fixer = pre_commit_hooks.end_of_file_fixer:end_of_file_fixer',
diff --git a/tests/check_docstring_first_test.py b/tests/check_docstring_first_test.py
new file mode 100644
index 0000000..ecff0e3
--- /dev/null
+++ b/tests/check_docstring_first_test.py
@@ -0,0 +1,67 @@
+from __future__ import absolute_import
+from __future__ import unicode_literals
+
+import io
+
+import pytest
+
+from pre_commit_hooks.check_docstring_first import check_docstring_first
+from pre_commit_hooks.check_docstring_first import main
+
+
+# Contents, expected, expected_output
+TESTS = (
+ # trivial
+ ('', 0, ''),
+ # Acceptable
+ ('"foo"', 0, ''),
+ # Docstrin after code
+ (
+ 'from __future__ import unicode_literals\n'
+ '"foo"\n',
+ 1,
+ '{filename}:2 Module docstring appears after code '
+ '(code seen on line 1).\n'
+ ),
+ # Test double docstring
+ (
+ '"The real docstring"\n'
+ 'from __future__ import absolute_import\n'
+ '"fake docstring"\n',
+ 1,
+ '{filename}:3 Multiple module docstrings '
+ '(first docstring on line 1).\n'
+ ),
+ # Test multiple lines of code above
+ (
+ 'import os\n'
+ 'import sys\n'
+ '"docstring"\n',
+ 1,
+ '{filename}:3 Module docstring appears after code '
+ '(code seen on line 1).\n',
+ ),
+ # String literals in expressions are ok.
+ ('x = "foo"\n', 0, ''),
+)
+
+
+all_tests = pytest.mark.parametrize(
+ ('contents', 'expected', 'expected_out'), TESTS,
+)
+
+
+@all_tests
+def test_unit(capsys, contents, expected, expected_out):
+ assert check_docstring_first(contents) == expected
+ assert capsys.readouterr()[0] == expected_out.format(filename='<unknown>')
+
+
+@all_tests
+def test_integration(tmpdir, capsys, contents, expected, expected_out):
+ tmpfilename = tmpdir.join('test.py').strpath
+ with io.open(tmpfilename, 'w') as tmpfile:
+ tmpfile.write(contents)
+
+ assert main([tmpfilename]) == expected
+ assert capsys.readouterr()[0] == expected_out.format(filename=tmpfilename)