New hook 'destroyed-symlinks' to detect symlinks which are changed to regular files with a content of a path which that symlink was pointing to; move zsplit to util
diff --git a/.pre-commit-hooks.yaml b/.pre-commit-hooks.yaml
index a47f733..fa617b9 100644
--- a/.pre-commit-hooks.yaml
+++ b/.pre-commit-hooks.yaml
@@ -100,6 +100,12 @@
entry: debug-statement-hook
language: python
types: [python]
+- id: destroyed-symlinks
+ name: Detect Destroyed Symlinks
+ description: Detects symlinks which are changed to regular files with a content of a path which that symlink was pointing to.
+ entry: destroyed-symlinks
+ language: python
+ types: [file]
- id: detect-aws-credentials
name: Detect AWS Credentials
description: Detects *your* aws credentials from the aws cli credentials file
diff --git a/README.md b/README.md
index 2a76268..3a87e9c 100644
--- a/README.md
+++ b/README.md
@@ -87,6 +87,12 @@
#### `debug-statements`
Check for debugger imports and py37+ `breakpoint()` calls in python source.
+#### `destroyed-symlinks`
+Detects symlinks which are changed to regular files with a content of a path
+which that symlink was pointing to.
+This usually happens on Windows when a user clones a repository that has
+symlinks but they do not have the permission to create symlinks.
+
#### `detect-aws-credentials`
Checks for the existence of AWS secrets that you have set up with the AWS CLI.
The following arguments are available:
diff --git a/pre_commit_hooks/check_executables_have_shebangs.py b/pre_commit_hooks/check_executables_have_shebangs.py
index a02d2a9..2d2bd7d 100644
--- a/pre_commit_hooks/check_executables_have_shebangs.py
+++ b/pre_commit_hooks/check_executables_have_shebangs.py
@@ -8,18 +8,11 @@
from typing import Set
from pre_commit_hooks.util import cmd_output
+from pre_commit_hooks.util import zsplit
EXECUTABLE_VALUES = frozenset(('1', '3', '5', '7'))
-def zsplit(s: str) -> List[str]:
- s = s.strip('\0')
- if s:
- return s.split('\0')
- else:
- return []
-
-
def check_executables(paths: List[str]) -> int:
if sys.platform == 'win32': # pragma: win32 cover
return _check_git_filemode(paths)
diff --git a/pre_commit_hooks/destroyed_symlinks.py b/pre_commit_hooks/destroyed_symlinks.py
new file mode 100755
index 0000000..cfaf4e5
--- /dev/null
+++ b/pre_commit_hooks/destroyed_symlinks.py
@@ -0,0 +1,96 @@
+import argparse
+import shlex
+import subprocess
+from typing import List
+from typing import Optional
+from typing import Sequence
+
+from pre_commit_hooks.util import cmd_output
+from pre_commit_hooks.util import zsplit
+
+ORDINARY_CHANGED_ENTRIES_MARKER = '1'
+PERMS_LINK = '120000'
+PERMS_NONEXIST = '000000'
+
+
+def find_destroyed_symlinks(files: Sequence[str]) -> List[str]:
+ destroyed_links: List[str] = []
+ if not files:
+ return destroyed_links
+ for line in zsplit(
+ cmd_output('git', 'status', '--porcelain=v2', '-z', '--', *files),
+ ):
+ splitted = line.split(' ')
+ if splitted and splitted[0] == ORDINARY_CHANGED_ENTRIES_MARKER:
+ # https://git-scm.com/docs/git-status#_changed_tracked_entries
+ (
+ _, _, _,
+ mode_HEAD,
+ mode_index,
+ _,
+ hash_HEAD,
+ hash_index,
+ *path_splitted,
+ ) = splitted
+ path = ' '.join(path_splitted)
+ if (
+ mode_HEAD == PERMS_LINK and
+ mode_index != PERMS_LINK and
+ mode_index != PERMS_NONEXIST
+ ):
+ if hash_HEAD == hash_index:
+ # if old and new hashes are equal, it's not needed to check
+ # anything more, we've found a destroyed symlink for sure
+ destroyed_links.append(path)
+ else:
+ # if old and new hashes are *not* equal, it doesn't mean
+ # that everything is OK - new file may be altered
+ # by something like trailing-whitespace and/or
+ # mixed-line-ending hooks so we need to go deeper
+ SIZE_CMD = ('git', 'cat-file', '-s')
+ size_index = int(cmd_output(*SIZE_CMD, hash_index).strip())
+ size_HEAD = int(cmd_output(*SIZE_CMD, hash_HEAD).strip())
+
+ # in the worst case new file may have CRLF added
+ # so check content only if new file is bigger
+ # not more than 2 bytes compared to the old one
+ if size_index <= size_HEAD + 2:
+ head_content = subprocess.check_output(
+ ('git', 'cat-file', '-p', hash_HEAD),
+ ).rstrip()
+ index_content = subprocess.check_output(
+ ('git', 'cat-file', '-p', hash_index),
+ ).rstrip()
+ if head_content == index_content:
+ destroyed_links.append(path)
+ return destroyed_links
+
+
+def main(argv: Optional[Sequence[str]] = None) -> int:
+ parser = argparse.ArgumentParser()
+ parser.add_argument('filenames', nargs='*', help='Filenames to check.')
+ args = parser.parse_args(argv)
+ destroyed_links = find_destroyed_symlinks(files=args.filenames)
+ if destroyed_links:
+ print('Destroyed symlinks:')
+ for destroyed_link in destroyed_links:
+ print(f'- {destroyed_link}')
+ print('You should unstage affected files:')
+ print(
+ '\tgit reset HEAD -- {}'.format(
+ ' '.join(shlex.quote(link) for link in destroyed_links),
+ ),
+ )
+ print(
+ 'And retry commit. As a long term solution '
+ 'you may try to explicitly tell git that your '
+ 'environment does not support symlinks:',
+ )
+ print('\tgit config core.symlinks false')
+ return 1
+ else:
+ return 0
+
+
+if __name__ == '__main__':
+ exit(main())
diff --git a/pre_commit_hooks/util.py b/pre_commit_hooks/util.py
index e04b015..402e33e 100644
--- a/pre_commit_hooks/util.py
+++ b/pre_commit_hooks/util.py
@@ -1,5 +1,6 @@
import subprocess
from typing import Any
+from typing import List
from typing import Optional
from typing import Set
@@ -22,3 +23,11 @@
if retcode is not None and proc.returncode != retcode:
raise CalledProcessError(cmd, retcode, proc.returncode, stdout, stderr)
return stdout
+
+
+def zsplit(s: str) -> List[str]:
+ s = s.strip('\0')
+ if s:
+ return s.split('\0')
+ else:
+ return []
diff --git a/setup.cfg b/setup.cfg
index 3c401fc..e2cad1a 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -44,6 +44,7 @@
check-xml = pre_commit_hooks.check_xml:main
check-yaml = pre_commit_hooks.check_yaml:main
debug-statement-hook = pre_commit_hooks.debug_statement_hook:main
+ destroyed-symlinks = pre_commit_hooks.destroyed_symlinks:main
detect-aws-credentials = pre_commit_hooks.detect_aws_credentials:main
detect-private-key = pre_commit_hooks.detect_private_key:main
double-quote-string-fixer = pre_commit_hooks.string_fixer:main
diff --git a/tests/check_executables_have_shebangs_test.py b/tests/check_executables_have_shebangs_test.py
index 7046081..5703ede 100644
--- a/tests/check_executables_have_shebangs_test.py
+++ b/tests/check_executables_have_shebangs_test.py
@@ -102,16 +102,6 @@
assert check_executables_have_shebangs._check_git_filemode(files) == 1
-@pytest.mark.parametrize('out', ('\0f1\0f2\0', '\0f1\0f2', 'f1\0f2\0'))
-def test_check_zsplits_correctly(out):
- assert check_executables_have_shebangs.zsplit(out) == ['f1', 'f2']
-
-
-@pytest.mark.parametrize('out', ('\0\0', '\0', ''))
-def test_check_zsplit_returns_empty(out):
- assert check_executables_have_shebangs.zsplit(out) == []
-
-
@pytest.mark.parametrize(
('content', 'mode', 'expected'),
(
diff --git a/tests/destroyed_symlinks_test.py b/tests/destroyed_symlinks_test.py
new file mode 100644
index 0000000..d2c9031
--- /dev/null
+++ b/tests/destroyed_symlinks_test.py
@@ -0,0 +1,74 @@
+import os
+import subprocess
+
+import pytest
+
+from pre_commit_hooks.destroyed_symlinks import find_destroyed_symlinks
+from pre_commit_hooks.destroyed_symlinks import main
+
+TEST_SYMLINK = 'test_symlink'
+TEST_SYMLINK_TARGET = '/doesnt/really/matters'
+TEST_FILE = 'test_file'
+TEST_FILE_RENAMED = f'{TEST_FILE}_renamed'
+
+
+@pytest.fixture
+def repo_with_destroyed_symlink(tmpdir):
+ source_repo = tmpdir.join('src')
+ os.makedirs(source_repo, exist_ok=True)
+ test_repo = tmpdir.join('test')
+ with source_repo.as_cwd():
+ subprocess.check_call(('git', 'init'))
+ os.symlink(TEST_SYMLINK_TARGET, TEST_SYMLINK)
+ with open(TEST_FILE, 'w') as f:
+ print('some random content', file=f)
+ subprocess.check_call(('git', 'add', '.'))
+ subprocess.check_call(
+ ('git', 'commit', '--no-gpg-sign', '-m', 'initial'),
+ )
+ assert b'120000 ' in subprocess.check_output(
+ ('git', 'cat-file', '-p', 'HEAD^{tree}'),
+ )
+ subprocess.check_call(
+ ('git', '-c', 'core.symlinks=false', 'clone', source_repo, test_repo),
+ )
+ with test_repo.as_cwd():
+ subprocess.check_call(
+ ('git', 'config', '--local', 'core.symlinks', 'true'),
+ )
+ subprocess.check_call(('git', 'mv', TEST_FILE, TEST_FILE_RENAMED))
+ assert not os.path.islink(test_repo.join(TEST_SYMLINK))
+ yield test_repo
+
+
+def test_find_destroyed_symlinks(repo_with_destroyed_symlink):
+ with repo_with_destroyed_symlink.as_cwd():
+ assert find_destroyed_symlinks([]) == []
+ assert main([]) == 0
+
+ subprocess.check_call(('git', 'add', TEST_SYMLINK))
+ assert find_destroyed_symlinks([TEST_SYMLINK]) == [TEST_SYMLINK]
+ assert find_destroyed_symlinks([]) == []
+ assert main([]) == 0
+ assert find_destroyed_symlinks([TEST_FILE_RENAMED, TEST_FILE]) == []
+ ALL_STAGED = [TEST_SYMLINK, TEST_FILE_RENAMED]
+ assert find_destroyed_symlinks(ALL_STAGED) == [TEST_SYMLINK]
+ assert main(ALL_STAGED) != 0
+
+ with open(TEST_SYMLINK, 'a') as f:
+ print(file=f) # add trailing newline
+ subprocess.check_call(['git', 'add', TEST_SYMLINK])
+ assert find_destroyed_symlinks(ALL_STAGED) == [TEST_SYMLINK]
+ assert main(ALL_STAGED) != 0
+
+ with open(TEST_SYMLINK, 'w') as f:
+ print('0' * len(TEST_SYMLINK_TARGET), file=f)
+ subprocess.check_call(('git', 'add', TEST_SYMLINK))
+ assert find_destroyed_symlinks(ALL_STAGED) == []
+ assert main(ALL_STAGED) == 0
+
+ with open(TEST_SYMLINK, 'w') as f:
+ print('0' * (len(TEST_SYMLINK_TARGET) + 3), file=f)
+ subprocess.check_call(('git', 'add', TEST_SYMLINK))
+ assert find_destroyed_symlinks(ALL_STAGED) == []
+ assert main(ALL_STAGED) == 0
diff --git a/tests/util_test.py b/tests/util_test.py
index b42ee6f..7f48816 100644
--- a/tests/util_test.py
+++ b/tests/util_test.py
@@ -2,6 +2,7 @@
from pre_commit_hooks.util import CalledProcessError
from pre_commit_hooks.util import cmd_output
+from pre_commit_hooks.util import zsplit
def test_raises_on_error():
@@ -12,3 +13,13 @@
def test_output():
ret = cmd_output('sh', '-c', 'echo hi')
assert ret == 'hi\n'
+
+
+@pytest.mark.parametrize('out', ('\0f1\0f2\0', '\0f1\0f2', 'f1\0f2\0'))
+def test_check_zsplits_str_correctly(out):
+ assert zsplit(out) == ['f1', 'f2']
+
+
+@pytest.mark.parametrize('out', ('\0\0', '\0', ''))
+def test_check_zsplit_returns_empty(out):
+ assert zsplit(out) == []