Merge pull request #184 from ushuz/pretty-json-no-ensure-ascii

Add --no-ensure-ascii option to pretty-format-json hook
diff --git a/pre_commit_hooks/pretty_format_json.py b/pre_commit_hooks/pretty_format_json.py
index bf1ccb1..c1ee59d 100644
--- a/pre_commit_hooks/pretty_format_json.py
+++ b/pre_commit_hooks/pretty_format_json.py
@@ -1,13 +1,15 @@
 from __future__ import print_function
 
 import argparse
+import io
 import sys
 from collections import OrderedDict
 
 import simplejson
+import six
 
 
-def _get_pretty_format(contents, indent, sort_keys=True, top_keys=[]):
+def _get_pretty_format(contents, indent, ensure_ascii=True, sort_keys=True, top_keys=[]):
     def pairs_first(pairs):
         before = [pair for pair in pairs if pair[0] in top_keys]
         before = sorted(before, key=lambda x: top_keys.index(x[0]))
@@ -15,18 +17,19 @@
         if sort_keys:
             after = sorted(after, key=lambda x: x[0])
         return OrderedDict(before + after)
-    return simplejson.dumps(
+    return six.text_type(simplejson.dumps(
         simplejson.loads(
             contents,
             object_pairs_hook=pairs_first,
         ),
-        indent=indent
-    ) + "\n"  # dumps don't end with a newline
+        indent=indent,
+        ensure_ascii=ensure_ascii
+    )) + "\n"  # dumps don't end with a newline
 
 
-def _autofix(filename, new_contents):
+def _autofix(filename, new_contents, encoding=None):
     print("Fixing file {}".format(filename))
-    with open(filename, 'w') as f:
+    with io.open(filename, 'w', encoding=encoding) as f:
         f.write(new_contents)
 
 
@@ -70,6 +73,13 @@
         help='String used as delimiter for one indentation level',
     )
     parser.add_argument(
+        '--no-ensure-ascii',
+        action='store_true',
+        dest='no_ensure_ascii',
+        default=False,
+        help='Do NOT convert non-ASCII characters to Unicode escape sequences (\\uXXXX)',
+    )
+    parser.add_argument(
         '--no-sort-keys',
         action='store_true',
         dest='no_sort_keys',
@@ -90,20 +100,23 @@
     status = 0
 
     for json_file in args.filenames:
-        with open(json_file) as f:
+        with io.open(json_file, encoding='utf-8') as f:
             contents = f.read()
 
         try:
             pretty_contents = _get_pretty_format(
-                contents, args.indent, sort_keys=not args.no_sort_keys,
-                top_keys=args.top_keys
+                contents, args.indent, ensure_ascii=not args.no_ensure_ascii,
+                sort_keys=not args.no_sort_keys, top_keys=args.top_keys
             )
 
             if contents != pretty_contents:
                 print("File {} is not pretty-formatted".format(json_file))
 
                 if args.autofix:
-                    _autofix(json_file, pretty_contents)
+                    _autofix(
+                        json_file, pretty_contents,
+                        encoding='utf-8' if args.no_ensure_ascii else None
+                    )
 
                 status = 1
 
diff --git a/testing/resources/non_ascii_pretty_formatted_json.json b/testing/resources/non_ascii_pretty_formatted_json.json
new file mode 100644
index 0000000..05d0d00
--- /dev/null
+++ b/testing/resources/non_ascii_pretty_formatted_json.json
@@ -0,0 +1,10 @@
+{
+  "alist": [
+    2,
+    34,
+    234
+  ],
+  "blah": null,
+  "foo": "bar",
+  "non_ascii": "中文にほんご한국어"
+}
diff --git a/tests/pretty_format_json_test.py b/tests/pretty_format_json_test.py
index 7bfc31f..62e37f1 100644
--- a/tests/pretty_format_json_test.py
+++ b/tests/pretty_format_json_test.py
@@ -20,6 +20,7 @@
 @pytest.mark.parametrize(('filename', 'expected_retval'), (
     ('not_pretty_formatted_json.json', 1),
     ('unsorted_pretty_formatted_json.json', 1),
+    ('non_ascii_pretty_formatted_json.json', 1),
     ('pretty_formatted_json.json', 0),
 ))
 def test_pretty_format_json(filename, expected_retval):
@@ -30,6 +31,7 @@
 @pytest.mark.parametrize(('filename', 'expected_retval'), (
     ('not_pretty_formatted_json.json', 1),
     ('unsorted_pretty_formatted_json.json', 0),
+    ('non_ascii_pretty_formatted_json.json', 1),
     ('pretty_formatted_json.json', 0),
 ))
 def test_unsorted_pretty_format_json(filename, expected_retval):
@@ -40,6 +42,7 @@
 @pytest.mark.parametrize(('filename', 'expected_retval'), (
     ('not_pretty_formatted_json.json', 1),
     ('unsorted_pretty_formatted_json.json', 1),
+    ('non_ascii_pretty_formatted_json.json', 1),
     ('pretty_formatted_json.json', 1),
     ('tab_pretty_formatted_json.json', 0),
 ))
@@ -48,6 +51,11 @@
     assert ret == expected_retval
 
 
+def test_non_ascii_pretty_format_json():
+    ret = pretty_format_json(['--no-ensure-ascii', get_resource_path('non_ascii_pretty_formatted_json.json')])
+    assert ret == 0
+
+
 def test_autofix_pretty_format_json(tmpdir):
     srcfile = tmpdir.join('to_be_json_formatted.json')
     shutil.copyfile(