New hook 'destroyed-symlinks' to detect symlinks which are changed to regular files with a content of a path which that symlink was pointing to; move zsplit to util
diff --git a/.pre-commit-hooks.yaml b/.pre-commit-hooks.yaml
index a47f733..fa617b9 100644
--- a/.pre-commit-hooks.yaml
+++ b/.pre-commit-hooks.yaml
@@ -100,6 +100,12 @@
     entry: debug-statement-hook
     language: python
     types: [python]
+-   id: destroyed-symlinks
+    name: Detect Destroyed Symlinks
+    description: Detects symlinks which are changed to regular files with a content of a path which that symlink was pointing to.
+    entry: destroyed-symlinks
+    language: python
+    types: [file]
 -   id: detect-aws-credentials
     name: Detect AWS Credentials
     description: Detects *your* aws credentials from the aws cli credentials file
diff --git a/README.md b/README.md
index 2a76268..3a87e9c 100644
--- a/README.md
+++ b/README.md
@@ -87,6 +87,12 @@
 #### `debug-statements`
 Check for debugger imports and py37+ `breakpoint()` calls in python source.
 
+#### `destroyed-symlinks`
+Detects symlinks which are changed to regular files with a content of a path
+which that symlink was pointing to.
+This usually happens on Windows when a user clones a repository that has
+symlinks but they do not have the permission to create symlinks.
+
 #### `detect-aws-credentials`
 Checks for the existence of AWS secrets that you have set up with the AWS CLI.
 The following arguments are available:
diff --git a/pre_commit_hooks/check_executables_have_shebangs.py b/pre_commit_hooks/check_executables_have_shebangs.py
index a02d2a9..2d2bd7d 100644
--- a/pre_commit_hooks/check_executables_have_shebangs.py
+++ b/pre_commit_hooks/check_executables_have_shebangs.py
@@ -8,18 +8,11 @@
 from typing import Set
 
 from pre_commit_hooks.util import cmd_output
+from pre_commit_hooks.util import zsplit
 
 EXECUTABLE_VALUES = frozenset(('1', '3', '5', '7'))
 
 
-def zsplit(s: str) -> List[str]:
-    s = s.strip('\0')
-    if s:
-        return s.split('\0')
-    else:
-        return []
-
-
 def check_executables(paths: List[str]) -> int:
     if sys.platform == 'win32':  # pragma: win32 cover
         return _check_git_filemode(paths)
diff --git a/pre_commit_hooks/destroyed_symlinks.py b/pre_commit_hooks/destroyed_symlinks.py
new file mode 100755
index 0000000..cfaf4e5
--- /dev/null
+++ b/pre_commit_hooks/destroyed_symlinks.py
@@ -0,0 +1,96 @@
+import argparse
+import shlex
+import subprocess
+from typing import List
+from typing import Optional
+from typing import Sequence
+
+from pre_commit_hooks.util import cmd_output
+from pre_commit_hooks.util import zsplit
+
+ORDINARY_CHANGED_ENTRIES_MARKER = '1'
+PERMS_LINK = '120000'
+PERMS_NONEXIST = '000000'
+
+
+def find_destroyed_symlinks(files: Sequence[str]) -> List[str]:
+    destroyed_links: List[str] = []
+    if not files:
+        return destroyed_links
+    for line in zsplit(
+        cmd_output('git', 'status', '--porcelain=v2', '-z', '--', *files),
+    ):
+        splitted = line.split(' ')
+        if splitted and splitted[0] == ORDINARY_CHANGED_ENTRIES_MARKER:
+            # https://git-scm.com/docs/git-status#_changed_tracked_entries
+            (
+                _, _, _,
+                mode_HEAD,
+                mode_index,
+                _,
+                hash_HEAD,
+                hash_index,
+                *path_splitted,
+            ) = splitted
+            path = ' '.join(path_splitted)
+            if (
+                    mode_HEAD == PERMS_LINK and
+                    mode_index != PERMS_LINK and
+                    mode_index != PERMS_NONEXIST
+            ):
+                if hash_HEAD == hash_index:
+                    # if old and new hashes are equal, it's not needed to check
+                    # anything more, we've found a destroyed symlink for sure
+                    destroyed_links.append(path)
+                else:
+                    # if old and new hashes are *not* equal, it doesn't mean
+                    # that everything is OK - new file may be altered
+                    # by something like trailing-whitespace and/or
+                    # mixed-line-ending hooks so we need to go deeper
+                    SIZE_CMD = ('git', 'cat-file', '-s')
+                    size_index = int(cmd_output(*SIZE_CMD, hash_index).strip())
+                    size_HEAD = int(cmd_output(*SIZE_CMD, hash_HEAD).strip())
+
+                    # in the worst case new file may have CRLF added
+                    # so check content only if new file is bigger
+                    # not more than 2 bytes compared to the old one
+                    if size_index <= size_HEAD + 2:
+                        head_content = subprocess.check_output(
+                            ('git', 'cat-file', '-p', hash_HEAD),
+                        ).rstrip()
+                        index_content = subprocess.check_output(
+                            ('git', 'cat-file', '-p', hash_index),
+                        ).rstrip()
+                        if head_content == index_content:
+                            destroyed_links.append(path)
+    return destroyed_links
+
+
+def main(argv: Optional[Sequence[str]] = None) -> int:
+    parser = argparse.ArgumentParser()
+    parser.add_argument('filenames', nargs='*', help='Filenames to check.')
+    args = parser.parse_args(argv)
+    destroyed_links = find_destroyed_symlinks(files=args.filenames)
+    if destroyed_links:
+        print('Destroyed symlinks:')
+        for destroyed_link in destroyed_links:
+            print(f'- {destroyed_link}')
+        print('You should unstage affected files:')
+        print(
+            '\tgit reset HEAD -- {}'.format(
+                ' '.join(shlex.quote(link) for link in destroyed_links),
+            ),
+        )
+        print(
+            'And retry commit. As a long term solution '
+            'you may try to explicitly tell git that your '
+            'environment does not support symlinks:',
+        )
+        print('\tgit config core.symlinks false')
+        return 1
+    else:
+        return 0
+
+
+if __name__ == '__main__':
+    exit(main())
diff --git a/pre_commit_hooks/util.py b/pre_commit_hooks/util.py
index e04b015..402e33e 100644
--- a/pre_commit_hooks/util.py
+++ b/pre_commit_hooks/util.py
@@ -1,5 +1,6 @@
 import subprocess
 from typing import Any
+from typing import List
 from typing import Optional
 from typing import Set
 
@@ -22,3 +23,11 @@
     if retcode is not None and proc.returncode != retcode:
         raise CalledProcessError(cmd, retcode, proc.returncode, stdout, stderr)
     return stdout
+
+
+def zsplit(s: str) -> List[str]:
+    s = s.strip('\0')
+    if s:
+        return s.split('\0')
+    else:
+        return []
diff --git a/setup.cfg b/setup.cfg
index 3c401fc..e2cad1a 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -44,6 +44,7 @@
     check-xml = pre_commit_hooks.check_xml:main
     check-yaml = pre_commit_hooks.check_yaml:main
     debug-statement-hook = pre_commit_hooks.debug_statement_hook:main
+    destroyed-symlinks = pre_commit_hooks.destroyed_symlinks:main
     detect-aws-credentials = pre_commit_hooks.detect_aws_credentials:main
     detect-private-key = pre_commit_hooks.detect_private_key:main
     double-quote-string-fixer = pre_commit_hooks.string_fixer:main
diff --git a/tests/check_executables_have_shebangs_test.py b/tests/check_executables_have_shebangs_test.py
index 7046081..5703ede 100644
--- a/tests/check_executables_have_shebangs_test.py
+++ b/tests/check_executables_have_shebangs_test.py
@@ -102,16 +102,6 @@
         assert check_executables_have_shebangs._check_git_filemode(files) == 1
 
 
-@pytest.mark.parametrize('out', ('\0f1\0f2\0', '\0f1\0f2', 'f1\0f2\0'))
-def test_check_zsplits_correctly(out):
-    assert check_executables_have_shebangs.zsplit(out) == ['f1', 'f2']
-
-
-@pytest.mark.parametrize('out', ('\0\0', '\0', ''))
-def test_check_zsplit_returns_empty(out):
-    assert check_executables_have_shebangs.zsplit(out) == []
-
-
 @pytest.mark.parametrize(
     ('content', 'mode', 'expected'),
     (
diff --git a/tests/destroyed_symlinks_test.py b/tests/destroyed_symlinks_test.py
new file mode 100644
index 0000000..d2c9031
--- /dev/null
+++ b/tests/destroyed_symlinks_test.py
@@ -0,0 +1,74 @@
+import os
+import subprocess
+
+import pytest
+
+from pre_commit_hooks.destroyed_symlinks import find_destroyed_symlinks
+from pre_commit_hooks.destroyed_symlinks import main
+
+TEST_SYMLINK = 'test_symlink'
+TEST_SYMLINK_TARGET = '/doesnt/really/matters'
+TEST_FILE = 'test_file'
+TEST_FILE_RENAMED = f'{TEST_FILE}_renamed'
+
+
+@pytest.fixture
+def repo_with_destroyed_symlink(tmpdir):
+    source_repo = tmpdir.join('src')
+    os.makedirs(source_repo, exist_ok=True)
+    test_repo = tmpdir.join('test')
+    with source_repo.as_cwd():
+        subprocess.check_call(('git', 'init'))
+        os.symlink(TEST_SYMLINK_TARGET, TEST_SYMLINK)
+        with open(TEST_FILE, 'w') as f:
+            print('some random content', file=f)
+        subprocess.check_call(('git', 'add', '.'))
+        subprocess.check_call(
+            ('git', 'commit', '--no-gpg-sign', '-m', 'initial'),
+        )
+        assert b'120000 ' in subprocess.check_output(
+            ('git', 'cat-file', '-p', 'HEAD^{tree}'),
+        )
+    subprocess.check_call(
+        ('git', '-c', 'core.symlinks=false', 'clone', source_repo, test_repo),
+    )
+    with test_repo.as_cwd():
+        subprocess.check_call(
+            ('git', 'config', '--local', 'core.symlinks', 'true'),
+        )
+        subprocess.check_call(('git', 'mv', TEST_FILE, TEST_FILE_RENAMED))
+    assert not os.path.islink(test_repo.join(TEST_SYMLINK))
+    yield test_repo
+
+
+def test_find_destroyed_symlinks(repo_with_destroyed_symlink):
+    with repo_with_destroyed_symlink.as_cwd():
+        assert find_destroyed_symlinks([]) == []
+        assert main([]) == 0
+
+        subprocess.check_call(('git', 'add', TEST_SYMLINK))
+        assert find_destroyed_symlinks([TEST_SYMLINK]) == [TEST_SYMLINK]
+        assert find_destroyed_symlinks([]) == []
+        assert main([]) == 0
+        assert find_destroyed_symlinks([TEST_FILE_RENAMED, TEST_FILE]) == []
+        ALL_STAGED = [TEST_SYMLINK, TEST_FILE_RENAMED]
+        assert find_destroyed_symlinks(ALL_STAGED) == [TEST_SYMLINK]
+        assert main(ALL_STAGED) != 0
+
+        with open(TEST_SYMLINK, 'a') as f:
+            print(file=f)  # add trailing newline
+        subprocess.check_call(['git', 'add', TEST_SYMLINK])
+        assert find_destroyed_symlinks(ALL_STAGED) == [TEST_SYMLINK]
+        assert main(ALL_STAGED) != 0
+
+        with open(TEST_SYMLINK, 'w') as f:
+            print('0' * len(TEST_SYMLINK_TARGET), file=f)
+        subprocess.check_call(('git', 'add', TEST_SYMLINK))
+        assert find_destroyed_symlinks(ALL_STAGED) == []
+        assert main(ALL_STAGED) == 0
+
+        with open(TEST_SYMLINK, 'w') as f:
+            print('0' * (len(TEST_SYMLINK_TARGET) + 3), file=f)
+        subprocess.check_call(('git', 'add', TEST_SYMLINK))
+        assert find_destroyed_symlinks(ALL_STAGED) == []
+        assert main(ALL_STAGED) == 0
diff --git a/tests/util_test.py b/tests/util_test.py
index b42ee6f..7f48816 100644
--- a/tests/util_test.py
+++ b/tests/util_test.py
@@ -2,6 +2,7 @@
 
 from pre_commit_hooks.util import CalledProcessError
 from pre_commit_hooks.util import cmd_output
+from pre_commit_hooks.util import zsplit
 
 
 def test_raises_on_error():
@@ -12,3 +13,13 @@
 def test_output():
     ret = cmd_output('sh', '-c', 'echo hi')
     assert ret == 'hi\n'
+
+
+@pytest.mark.parametrize('out', ('\0f1\0f2\0', '\0f1\0f2', 'f1\0f2\0'))
+def test_check_zsplits_str_correctly(out):
+    assert zsplit(out) == ['f1', 'f2']
+
+
+@pytest.mark.parametrize('out', ('\0\0', '\0', ''))
+def test_check_zsplit_returns_empty(out):
+    assert zsplit(out) == []