| # Copyright (C) 2018 and later: Unicode, Inc. and others. |
| # License & terms of use: http://www.unicode.org/copyright.html |
| |
| # Python 2/3 Compatibility (ICU-20299) |
| # TODO(ICU-20301): Remove this. |
| from __future__ import print_function |
| |
| from icutools.databuilder import * |
| from icutools.databuilder import utils |
| from icutools.databuilder.request_types import * |
| |
| import os |
| import sys |
| |
| |
| def generate(config, io, common_vars): |
| requests = [] |
| |
| if len(io.glob("misc/*")) == 0: |
| print("Error: Cannot find data directory; please specify --src_dir", file=sys.stderr) |
| exit(1) |
| |
| requests += generate_cnvalias(config, io, common_vars) |
| requests += generate_ulayout(config, io, common_vars) |
| requests += generate_confusables(config, io, common_vars) |
| requests += generate_conversion_mappings(config, io, common_vars) |
| requests += generate_brkitr_brk(config, io, common_vars) |
| requests += generate_stringprep(config, io, common_vars) |
| requests += generate_brkitr_dictionaries(config, io, common_vars) |
| requests += generate_normalization(config, io, common_vars) |
| requests += generate_coll_ucadata(config, io, common_vars) |
| requests += generate_full_unicore_data(config, io, common_vars) |
| requests += generate_unames(config, io, common_vars) |
| requests += generate_misc(config, io, common_vars) |
| requests += generate_curr_supplemental(config, io, common_vars) |
| requests += generate_zone_supplemental(config, io, common_vars) |
| requests += generate_translit(config, io, common_vars) |
| |
| # Res Tree Files |
| # (input dirname, output dirname, resfiles.mk path, mk version var, mk source var, use pool file, dep files) |
| requests += generate_tree(config, io, common_vars, |
| "locales", |
| None, |
| config.use_pool_bundle, |
| []) |
| |
| requests += generate_tree(config, io, common_vars, |
| "curr", |
| "curr", |
| config.use_pool_bundle, |
| []) |
| |
| requests += generate_tree(config, io, common_vars, |
| "lang", |
| "lang", |
| config.use_pool_bundle, |
| []) |
| |
| requests += generate_tree(config, io, common_vars, |
| "region", |
| "region", |
| config.use_pool_bundle, |
| []) |
| |
| requests += generate_tree(config, io, common_vars, |
| "zone", |
| "zone", |
| config.use_pool_bundle, |
| []) |
| |
| requests += generate_tree(config, io, common_vars, |
| "unit", |
| "unit", |
| config.use_pool_bundle, |
| []) |
| |
| requests += generate_tree(config, io, common_vars, |
| "coll", |
| "coll", |
| # Never use pool bundle for coll, brkitr, or rbnf |
| False, |
| # Depends on timezoneTypes.res and keyTypeData.res. |
| # TODO: We should not need this dependency to build collation. |
| # TODO: Bake keyTypeData.res into the common library? |
| [DepTarget("coll_ucadata"), DepTarget("misc_res"), InFile("unidata/UCARules.txt")]) |
| |
| requests += generate_tree(config, io, common_vars, |
| "brkitr", |
| "brkitr", |
| # Never use pool bundle for coll, brkitr, or rbnf |
| False, |
| [DepTarget("brkitr_brk"), DepTarget("dictionaries")]) |
| |
| requests += generate_tree(config, io, common_vars, |
| "rbnf", |
| "rbnf", |
| # Never use pool bundle for coll, brkitr, or rbnf |
| False, |
| []) |
| |
| requests += [ |
| ListRequest( |
| name = "icudata_list", |
| variable_name = "icudata_all_output_files", |
| output_file = TmpFile("icudata.lst"), |
| include_tmp = False |
| ) |
| ] |
| |
| return requests |
| |
| |
| def generate_cnvalias(config, io, common_vars): |
| # UConv Name Aliases |
| input_file = InFile("mappings/convrtrs.txt") |
| output_file = OutFile("cnvalias.icu") |
| return [ |
| SingleExecutionRequest( |
| name = "cnvalias", |
| category = "cnvalias", |
| dep_targets = [], |
| input_files = [input_file], |
| output_files = [output_file], |
| tool = IcuTool("gencnval"), |
| args = "-s {IN_DIR} -d {OUT_DIR} " |
| "{INPUT_FILES[0]}", |
| format_with = {} |
| ) |
| ] |
| |
| |
| def generate_confusables(config, io, common_vars): |
| # CONFUSABLES |
| txt1 = InFile("unidata/confusables.txt") |
| txt2 = InFile("unidata/confusablesWholeScript.txt") |
| cfu = OutFile("confusables.cfu") |
| return [ |
| SingleExecutionRequest( |
| name = "confusables", |
| category = "confusables", |
| dep_targets = [DepTarget("cnvalias")], |
| input_files = [txt1, txt2], |
| output_files = [cfu], |
| tool = IcuTool("gencfu"), |
| args = "-d {OUT_DIR} -i {OUT_DIR} " |
| "-c -r {IN_DIR}/{INPUT_FILES[0]} -w {IN_DIR}/{INPUT_FILES[1]} " |
| "-o {OUTPUT_FILES[0]}", |
| format_with = {} |
| ) |
| ] |
| |
| |
| def generate_conversion_mappings(config, io, common_vars): |
| # UConv Conversion Table Files |
| input_files = [InFile(filename) for filename in io.glob("mappings/*.ucm")] |
| output_files = [OutFile("%s.cnv" % v.filename[9:-4]) for v in input_files] |
| # TODO: handle BUILD_SPECIAL_CNV_FILES? Means to add --ignore-siso-check flag to makeconv |
| return [ |
| RepeatedOrSingleExecutionRequest( |
| name = "conversion_mappings", |
| category = "conversion_mappings", |
| dep_targets = [], |
| input_files = input_files, |
| output_files = output_files, |
| tool = IcuTool("makeconv"), |
| args = "-s {IN_DIR} -d {OUT_DIR} -c {INPUT_FILE_PLACEHOLDER}", |
| format_with = {}, |
| repeat_with = { |
| "INPUT_FILE_PLACEHOLDER": utils.SpaceSeparatedList(file.filename for file in input_files) |
| } |
| ) |
| ] |
| |
| |
| def generate_brkitr_brk(config, io, common_vars): |
| # BRK Files |
| input_files = [InFile(filename) for filename in io.glob("brkitr/rules/*.txt")] |
| output_files = [OutFile("brkitr/%s.brk" % v.filename[13:-4]) for v in input_files] |
| return [ |
| RepeatedExecutionRequest( |
| name = "brkitr_brk", |
| category = "brkitr_rules", |
| dep_targets = [DepTarget("cnvalias"), DepTarget("ulayout")], |
| input_files = input_files, |
| output_files = output_files, |
| tool = IcuTool("genbrk"), |
| args = "-d {OUT_DIR} -i {OUT_DIR} " |
| "-c -r {IN_DIR}/{INPUT_FILE} " |
| "-o {OUTPUT_FILE}", |
| format_with = {}, |
| repeat_with = {} |
| ) |
| ] |
| |
| |
| def generate_stringprep(config, io, common_vars): |
| # SPP FILES |
| input_files = [InFile(filename) for filename in io.glob("sprep/*.txt")] |
| output_files = [OutFile("%s.spp" % v.filename[6:-4]) for v in input_files] |
| bundle_names = [v.filename[6:-4] for v in input_files] |
| return [ |
| RepeatedExecutionRequest( |
| name = "stringprep", |
| category = "stringprep", |
| dep_targets = [InFile("unidata/NormalizationCorrections.txt")], |
| input_files = input_files, |
| output_files = output_files, |
| tool = IcuTool("gensprep"), |
| args = "-s {IN_DIR}/sprep -d {OUT_DIR} -i {OUT_DIR} " |
| "-b {BUNDLE_NAME} -m {IN_DIR}/unidata -u 3.2.0 {BUNDLE_NAME}.txt", |
| format_with = {}, |
| repeat_with = { |
| "BUNDLE_NAME": bundle_names |
| } |
| ) |
| ] |
| |
| |
| def generate_brkitr_dictionaries(config, io, common_vars): |
| # Dict Files |
| input_files = [InFile(filename) for filename in io.glob("brkitr/dictionaries/*.txt")] |
| output_files = [OutFile("brkitr/%s.dict" % v.filename[20:-4]) for v in input_files] |
| extra_options_map = { |
| "brkitr/dictionaries/burmesedict.txt": "--bytes --transform offset-0x1000", |
| "brkitr/dictionaries/cjdict.txt": "--uchars", |
| "brkitr/dictionaries/khmerdict.txt": "--bytes --transform offset-0x1780", |
| "brkitr/dictionaries/laodict.txt": "--bytes --transform offset-0x0e80", |
| "brkitr/dictionaries/thaidict.txt": "--bytes --transform offset-0x0e00" |
| } |
| extra_optionses = [extra_options_map[v.filename] for v in input_files] |
| return [ |
| RepeatedExecutionRequest( |
| name = "dictionaries", |
| category = "brkitr_dictionaries", |
| dep_targets = [], |
| input_files = input_files, |
| output_files = output_files, |
| tool = IcuTool("gendict"), |
| args = "-i {OUT_DIR} " |
| "-c {EXTRA_OPTIONS} " |
| "{IN_DIR}/{INPUT_FILE} {OUT_DIR}/{OUTPUT_FILE}", |
| format_with = {}, |
| repeat_with = { |
| "EXTRA_OPTIONS": extra_optionses |
| } |
| ) |
| ] |
| |
| |
| def generate_normalization(config, io, common_vars): |
| # NRM Files |
| input_files = [InFile(filename) for filename in io.glob("in/*.nrm")] |
| # nfc.nrm is pre-compiled into C++; see generate_full_unicore_data |
| input_files.remove(InFile("in/nfc.nrm")) |
| output_files = [OutFile(v.filename[3:]) for v in input_files] |
| return [ |
| RepeatedExecutionRequest( |
| name = "normalization", |
| category = "normalization", |
| dep_targets = [], |
| input_files = input_files, |
| output_files = output_files, |
| tool = IcuTool("icupkg"), |
| args = "-t{ICUDATA_CHAR} {IN_DIR}/{INPUT_FILE} {OUT_DIR}/{OUTPUT_FILE}", |
| format_with = {}, |
| repeat_with = {} |
| ) |
| ] |
| |
| |
| def generate_coll_ucadata(config, io, common_vars): |
| # Collation Dependency File (ucadata.icu) |
| input_file = InFile("in/coll/ucadata-%s.icu" % config.coll_han_type) |
| output_file = OutFile("coll/ucadata.icu") |
| return [ |
| SingleExecutionRequest( |
| name = "coll_ucadata", |
| category = "coll_ucadata", |
| dep_targets = [], |
| input_files = [input_file], |
| output_files = [output_file], |
| tool = IcuTool("icupkg"), |
| args = "-t{ICUDATA_CHAR} {IN_DIR}/{INPUT_FILES[0]} {OUT_DIR}/{OUTPUT_FILES[0]}", |
| format_with = {} |
| ) |
| ] |
| |
| |
| def generate_full_unicore_data(config, io, common_vars): |
| # The core Unicode properties files (pnames.icu, uprops.icu, ucase.icu, ubidi.icu) |
| # are hardcoded in the common DLL and therefore not included in the data package any more. |
| # They are not built by default but need to be built for ICU4J data, |
| # both in the .jar and in the .dat file (if ICU4J uses the .dat file). |
| # See ICU-4497. |
| if not config.include_uni_core_data: |
| return [] |
| |
| basenames = [ |
| "pnames.icu", |
| "uprops.icu", |
| "ucase.icu", |
| "ubidi.icu", |
| "nfc.nrm" |
| ] |
| input_files = [InFile("in/%s" % bn) for bn in basenames] |
| output_files = [OutFile(bn) for bn in basenames] |
| return [ |
| RepeatedExecutionRequest( |
| name = "unicore", |
| category = "unicore", |
| input_files = input_files, |
| output_files = output_files, |
| tool = IcuTool("icupkg"), |
| args = "-t{ICUDATA_CHAR} {IN_DIR}/{INPUT_FILE} {OUT_DIR}/{OUTPUT_FILE}" |
| ) |
| ] |
| |
| |
| def generate_unames(config, io, common_vars): |
| # Unicode Character Names |
| input_file = InFile("in/unames.icu") |
| output_file = OutFile("unames.icu") |
| return [ |
| SingleExecutionRequest( |
| name = "unames", |
| category = "unames", |
| dep_targets = [], |
| input_files = [input_file], |
| output_files = [output_file], |
| tool = IcuTool("icupkg"), |
| args = "-t{ICUDATA_CHAR} {IN_DIR}/{INPUT_FILES[0]} {OUT_DIR}/{OUTPUT_FILES[0]}", |
| format_with = {} |
| ) |
| ] |
| |
| |
| def generate_ulayout(config, io, common_vars): |
| # Unicode text layout properties |
| basename = "ulayout" |
| input_file = InFile("in/%s.icu" % basename) |
| output_file = OutFile("%s.icu" % basename) |
| return [ |
| SingleExecutionRequest( |
| name = basename, |
| category = basename, |
| dep_targets = [], |
| input_files = [input_file], |
| output_files = [output_file], |
| tool = IcuTool("icupkg"), |
| args = "-t{ICUDATA_CHAR} {IN_DIR}/{INPUT_FILES[0]} {OUT_DIR}/{OUTPUT_FILES[0]}", |
| format_with = {} |
| ) |
| ] |
| |
| |
| def generate_misc(config, io, common_vars): |
| # Misc Data Res Files |
| input_files = [InFile(filename) for filename in io.glob("misc/*.txt")] |
| input_basenames = [v.filename[5:] for v in input_files] |
| output_files = [OutFile("%s.res" % v[:-4]) for v in input_basenames] |
| return [ |
| RepeatedExecutionRequest( |
| name = "misc_res", |
| category = "misc", |
| dep_targets = [DepTarget("cnvalias")], # ICU-21175 |
| input_files = input_files, |
| output_files = output_files, |
| tool = IcuTool("genrb"), |
| args = "-s {IN_DIR}/misc -d {OUT_DIR} -i {OUT_DIR} " |
| "-k -q " |
| "{INPUT_BASENAME}", |
| format_with = {}, |
| repeat_with = { |
| "INPUT_BASENAME": input_basenames |
| } |
| ) |
| ] |
| |
| |
| def generate_curr_supplemental(config, io, common_vars): |
| # Currency Supplemental Res File |
| input_file = InFile("curr/supplementalData.txt") |
| input_basename = "supplementalData.txt" |
| output_file = OutFile("curr/supplementalData.res") |
| return [ |
| SingleExecutionRequest( |
| name = "curr_supplemental_res", |
| category = "curr_supplemental", |
| dep_targets = [], |
| input_files = [input_file], |
| output_files = [output_file], |
| tool = IcuTool("genrb"), |
| args = "-s {IN_DIR}/curr -d {OUT_DIR}/curr -i {OUT_DIR} " |
| "-k " |
| "{INPUT_BASENAME}", |
| format_with = { |
| "INPUT_BASENAME": input_basename |
| } |
| ) |
| ] |
| |
| |
| def generate_zone_supplemental(config, io, common_vars): |
| # tzdbNames Res File |
| input_file = InFile("zone/tzdbNames.txt") |
| input_basename = "tzdbNames.txt" |
| output_file = OutFile("zone/tzdbNames.res") |
| return [ |
| SingleExecutionRequest( |
| name = "zone_supplemental_res", |
| category = "zone_supplemental", |
| dep_targets = [], |
| input_files = [input_file], |
| output_files = [output_file], |
| tool = IcuTool("genrb"), |
| args = "-s {IN_DIR}/zone -d {OUT_DIR}/zone -i {OUT_DIR} " |
| "-k " |
| "{INPUT_BASENAME}", |
| format_with = { |
| "INPUT_BASENAME": input_basename |
| } |
| ) |
| ] |
| |
| |
| def generate_translit(config, io, common_vars): |
| input_files = [ |
| InFile("translit/root.txt"), |
| InFile("translit/en.txt"), |
| InFile("translit/el.txt") |
| ] |
| dep_files = set(InFile(filename) for filename in io.glob("translit/*.txt")) |
| dep_files -= set(input_files) |
| dep_files = list(sorted(dep_files)) |
| input_basenames = [v.filename[9:] for v in input_files] |
| output_files = [ |
| OutFile("translit/%s.res" % v[:-4]) |
| for v in input_basenames |
| ] |
| return [ |
| RepeatedOrSingleExecutionRequest( |
| name = "translit_res", |
| category = "translit", |
| dep_targets = dep_files, |
| input_files = input_files, |
| output_files = output_files, |
| tool = IcuTool("genrb"), |
| args = "-s {IN_DIR}/translit -d {OUT_DIR}/translit -i {OUT_DIR} " |
| "-k " |
| "{INPUT_BASENAME}", |
| format_with = { |
| }, |
| repeat_with = { |
| "INPUT_BASENAME": utils.SpaceSeparatedList(input_basenames) |
| } |
| ) |
| ] |
| |
| |
| def generate_tree( |
| config, |
| io, |
| common_vars, |
| sub_dir, |
| out_sub_dir, |
| use_pool_bundle, |
| dep_targets): |
| requests = [] |
| category = "%s_tree" % sub_dir |
| out_prefix = "%s/" % out_sub_dir if out_sub_dir else "" |
| input_files = [InFile(filename) for filename in io.glob("%s/*.txt" % sub_dir)] |
| if sub_dir == "curr": |
| input_files.remove(InFile("curr/supplementalData.txt")) |
| if sub_dir == "zone": |
| input_files.remove(InFile("zone/tzdbNames.txt")) |
| input_basenames = [v.filename[len(sub_dir)+1:] for v in input_files] |
| output_files = [ |
| OutFile("%s%s.res" % (out_prefix, v[:-4])) |
| for v in input_basenames |
| ] |
| |
| # Generate Pool Bundle |
| if use_pool_bundle: |
| input_pool_files = [OutFile("%spool.res" % out_prefix)] |
| pool_target_name = "%s_pool_write" % sub_dir |
| use_pool_bundle_option = "--usePoolBundle {OUT_DIR}/{OUT_PREFIX}".format( |
| OUT_PREFIX = out_prefix, |
| **common_vars |
| ) |
| requests += [ |
| SingleExecutionRequest( |
| name = pool_target_name, |
| category = category, |
| dep_targets = dep_targets, |
| input_files = input_files, |
| output_files = input_pool_files, |
| tool = IcuTool("genrb"), |
| args = "-s {IN_DIR}/{IN_SUB_DIR} -d {OUT_DIR}/{OUT_PREFIX} -i {OUT_DIR} " |
| "--writePoolBundle -k " |
| "{INPUT_BASENAMES_SPACED}", |
| format_with = { |
| "IN_SUB_DIR": sub_dir, |
| "OUT_PREFIX": out_prefix, |
| "INPUT_BASENAMES_SPACED": utils.SpaceSeparatedList(input_basenames) |
| } |
| ), |
| ] |
| dep_targets = dep_targets + [DepTarget(pool_target_name)] |
| else: |
| use_pool_bundle_option = "" |
| |
| # Generate Res File Tree |
| requests += [ |
| RepeatedOrSingleExecutionRequest( |
| name = "%s_res" % sub_dir, |
| category = category, |
| dep_targets = dep_targets, |
| input_files = input_files, |
| output_files = output_files, |
| tool = IcuTool("genrb"), |
| args = "-s {IN_DIR}/{IN_SUB_DIR} -d {OUT_DIR}/{OUT_PREFIX} -i {OUT_DIR} " |
| "{EXTRA_OPTION} -k " |
| "{INPUT_BASENAME}", |
| format_with = { |
| "IN_SUB_DIR": sub_dir, |
| "OUT_PREFIX": out_prefix, |
| "EXTRA_OPTION": use_pool_bundle_option |
| }, |
| repeat_with = { |
| "INPUT_BASENAME": utils.SpaceSeparatedList(input_basenames) |
| } |
| ) |
| ] |
| |
| # Generate res_index file |
| # Exclude the deprecated locale variants and root; see ICU-20628. This |
| # could be data-driven, but we do not want to perform I/O in this script |
| # (for example, we do not want to read from an XML file). |
| excluded_locales = set([ |
| "ja_JP_TRADITIONAL", |
| "th_TH_TRADITIONAL", |
| "de_", |
| "de__PHONEBOOK", |
| "es_", |
| "es__TRADITIONAL", |
| "root", |
| ]) |
| # Put alias locales in a separate structure; see ICU-20627 |
| dependency_data = io.read_locale_deps(sub_dir) |
| if "aliases" in dependency_data: |
| alias_locales = set(dependency_data["aliases"].keys()) |
| else: |
| alias_locales = set() |
| alias_files = [] |
| installed_files = [] |
| for f in input_files: |
| file_stem = IndexRequest.locale_file_stem(f) |
| if file_stem in excluded_locales: |
| continue |
| destination = alias_files if file_stem in alias_locales else installed_files |
| destination.append(f) |
| cldr_version = dependency_data["cldrVersion"] if sub_dir == "locales" else None |
| index_file_txt = TmpFile("{IN_SUB_DIR}/{INDEX_NAME}.txt".format( |
| IN_SUB_DIR = sub_dir, |
| **common_vars |
| )) |
| index_res_file = OutFile("{OUT_PREFIX}{INDEX_NAME}.res".format( |
| OUT_PREFIX = out_prefix, |
| **common_vars |
| )) |
| index_file_target_name = "%s_index_txt" % sub_dir |
| requests += [ |
| IndexRequest( |
| name = index_file_target_name, |
| category = category, |
| installed_files = installed_files, |
| alias_files = alias_files, |
| txt_file = index_file_txt, |
| output_file = index_res_file, |
| cldr_version = cldr_version, |
| args = "-s {TMP_DIR}/{IN_SUB_DIR} -d {OUT_DIR}/{OUT_PREFIX} -i {OUT_DIR} " |
| "-k " |
| "{INDEX_NAME}.txt", |
| format_with = { |
| "IN_SUB_DIR": sub_dir, |
| "OUT_PREFIX": out_prefix |
| } |
| ) |
| ] |
| |
| return requests |