| # Copyright (C) 2010 Apple Inc. All rights reserved. |
| # |
| # Redistribution and use in source and binary forms, with or without |
| # modification, are permitted provided that the following conditions |
| # are met: |
| # 1. Redistributions of source code must retain the above copyright |
| # notice, this list of conditions and the following disclaimer. |
| # 2. Redistributions in binary form must reproduce the above copyright |
| # notice, this list of conditions and the following disclaimer in the |
| # documentation and/or other materials provided with the distribution. |
| # |
| # THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY |
| # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
| # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR |
| # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
| # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
| # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
| # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY |
| # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| |
| import sys |
| |
| types = { |
| "wordchar": { "UseTable" : True, "data": ['_', ('0','9'), ('A', 'Z'), ('a','z')]}, |
| "nonwordchar": { "UseTable" : True, "Inverse": "wordchar", "data": ['`', (0, ord('0') - 1), (ord('9') + 1, ord('A') - 1), (ord('Z') + 1, ord('_') - 1), (ord('z') + 1, 0xffff)]}, |
| "newline": { "UseTable" : False, "data": ['\n', '\r', 0x2028, 0x2029]}, |
| "spaces": { "UseTable" : True, "data": [' ', ('\t', '\r'), 0xa0, 0x1680, 0x180e, 0x2028, 0x2029, 0x202f, 0x205f, 0x3000, (0x2000, 0x200a), 0xfeff]}, |
| "nonspaces": { "UseTable" : True, "Inverse": "spaces", "data": [(0, ord('\t') - 1), (ord('\r') + 1, ord(' ') - 1), (ord(' ') + 1, 0x009f), (0x00a1, 0x167f), (0x1681, 0x180d), (0x180f, 0x1fff), (0x200b, 0x2027), (0x202a, 0x202e), (0x2030, 0x205e), (0x2060, 0x2fff), (0x3001, 0xfefe), (0xff00, 0xffff)]}, |
| "digits": { "UseTable" : False, "data": [('0', '9')]}, |
| "nondigits": { "UseTable" : False, "Inverse": "digits", "data": [(0, ord('0') - 1), (ord('9') + 1, 0xffff)] } |
| } |
| entriesPerLine = 50 |
| arrays = ""; |
| functions = ""; |
| emitTables = (len(sys.argv) < 2 or sys.argv[1] != "--no-tables") |
| |
| for name, classes in types.items(): |
| ranges = []; |
| size = 0; |
| for _class in classes["data"]: |
| if type(_class) == str: |
| ranges.append((ord(_class), ord(_class))) |
| elif type(_class) == int: |
| ranges.append((_class, _class)) |
| else: |
| (min, max) = _class; |
| if type(min) == str: |
| min = ord(min) |
| if type(max) == str: |
| max = ord(max) |
| if max > 0x7f and min <= 0x7f: |
| ranges.append((min, 0x7f)) |
| min = 0x80 |
| ranges.append((min,max)) |
| ranges.sort(); |
| |
| if emitTables and classes["UseTable"] and (not "Inverse" in classes): |
| array = ("static const char _%sData[65536] = {\n" % name); |
| i = 0 |
| for (min,max) in ranges: |
| while i < min: |
| i = i + 1 |
| array += ('0,') |
| if (i % entriesPerLine == 0) and (i != 0): |
| array += ('\n') |
| while i <= max: |
| i = i + 1 |
| if (i == 65536): |
| array += ("1") |
| else: |
| array += ('1,') |
| if (i % entriesPerLine == 0) and (i != 0): |
| array += ('\n') |
| while i < 0xffff: |
| array += ("0,") |
| i = i + 1; |
| if (i % entriesPerLine == 0) and (i != 0): |
| array += ('\n') |
| if i == 0xffff: |
| array += ("0") |
| array += ("\n};\n\n"); |
| arrays += array |
| |
| # Generate createFunction: |
| function = ""; |
| function += ("CharacterClass* %sCreate()\n" % name) |
| function += ("{\n") |
| if emitTables and classes["UseTable"]: |
| if "Inverse" in classes: |
| function += (" CharacterClass* characterClass = new CharacterClass(CharacterClassTable::create(_%sData, true));\n" % (classes["Inverse"])) |
| else: |
| function += (" CharacterClass* characterClass = new CharacterClass(CharacterClassTable::create(_%sData, false));\n" % (name)) |
| else: |
| function += (" CharacterClass* characterClass = new CharacterClass(0);\n") |
| for (min, max) in ranges: |
| if (min == max): |
| if (min > 127): |
| function += (" characterClass->m_matchesUnicode.append(0x%04x);\n" % min) |
| else: |
| function += (" characterClass->m_matches.append(0x%02x);\n" % min) |
| continue |
| if (min > 127) or (max > 127): |
| function += (" characterClass->m_rangesUnicode.append(CharacterRange(0x%04x, 0x%04x));\n" % (min, max)) |
| else: |
| function += (" characterClass->m_ranges.append(CharacterRange(0x%02x, 0x%02x));\n" % (min, max)) |
| function += (" return characterClass;\n") |
| function += ("}\n\n") |
| functions += function |
| |
| if (len(sys.argv) > 1): |
| f = open(sys.argv[-1], "w") |
| f.write(arrays) |
| f.write(functions) |
| f.close() |
| else: |
| print(arrays) |
| print(functions) |
| |