Fix parsing of git output with unusual characters
On Windows, all files are "executable".
Therefore, to know if a file is supposed to be executed,
we check how its attributes were recorded by git:
we run a `git ls-files` command in a subprocess.
By default, this command outputs information
on multiple lines (file and their data separated by newlines).
When a file contains an unusual character,
the character is escaped with an integer sequence
(such as `\303\261`), and git wraps the whole filename
in double-quotes because of the backslashes.
It breaks the current code because we try to open
the filename containing the double-quotes:
it doesn't exist, of course.
Instead of trying to fix this special case by removing
the double-quotes, and breaking other cases
(a double-quote is a valid filename character on Linux),
we tell git to separate each item with the null character `\0`
instead of a new line `\n`, with the option `-z`.
With this option, git doesn't escape unusual characters
with integer sequence, so the output is fixed, and we
parse it by splitting on `\0` instead of `\n`.
Fixes #508.
diff --git a/tests/check_executables_have_shebangs_test.py b/tests/check_executables_have_shebangs_test.py
index 5895a2a..7046081 100644
--- a/tests/check_executables_have_shebangs_test.py
+++ b/tests/check_executables_have_shebangs_test.py
@@ -73,6 +73,21 @@
assert check_executables_have_shebangs._check_git_filemode(files) == 0
+def test_check_git_filemode_passing_unusual_characters(tmpdir):
+ with tmpdir.as_cwd():
+ cmd_output('git', 'init', '.')
+
+ f = tmpdir.join('mañana.txt')
+ f.write('#!/usr/bin/env bash')
+ f_path = str(f)
+ cmd_output('chmod', '+x', f_path)
+ cmd_output('git', 'add', f_path)
+ cmd_output('git', 'update-index', '--chmod=+x', f_path)
+
+ files = (f_path,)
+ assert check_executables_have_shebangs._check_git_filemode(files) == 0
+
+
def test_check_git_filemode_failing(tmpdir):
with tmpdir.as_cwd():
cmd_output('git', 'init', '.')
@@ -87,6 +102,16 @@
assert check_executables_have_shebangs._check_git_filemode(files) == 1
+@pytest.mark.parametrize('out', ('\0f1\0f2\0', '\0f1\0f2', 'f1\0f2\0'))
+def test_check_zsplits_correctly(out):
+ assert check_executables_have_shebangs.zsplit(out) == ['f1', 'f2']
+
+
+@pytest.mark.parametrize('out', ('\0\0', '\0', ''))
+def test_check_zsplit_returns_empty(out):
+ assert check_executables_have_shebangs.zsplit(out) == []
+
+
@pytest.mark.parametrize(
('content', 'mode', 'expected'),
(