|  | /* | 
|  | ******************************************************************************** | 
|  | * | 
|  | *   Copyright (C) 1998-2012, International Business Machines | 
|  | *   Corporation and others.  All Rights Reserved. | 
|  | * | 
|  | ******************************************************************************** | 
|  | * | 
|  | * | 
|  | *  makeconv.c: | 
|  | *  tool creating a binary (compressed) representation of the conversion mapping | 
|  | *  table (IBM NLTC ucmap format). | 
|  | * | 
|  | *  05/04/2000    helena     Added fallback mapping into the picture... | 
|  | *  06/29/2000  helena      Major rewrite of the callback APIs. | 
|  | */ | 
|  |  | 
|  | #include <stdio.h> | 
|  | #include "unicode/putil.h" | 
|  | #include "unicode/ucnv_err.h" | 
|  | #include "ucnv_bld.h" | 
|  | #include "ucnv_imp.h" | 
|  | #include "ucnv_cnv.h" | 
|  | #include "cstring.h" | 
|  | #include "cmemory.h" | 
|  | #include "uinvchar.h" | 
|  | #include "filestrm.h" | 
|  | #include "toolutil.h" | 
|  | #include "uoptions.h" | 
|  | #include "unicode/udata.h" | 
|  | #include "unewdata.h" | 
|  | #include "uparse.h" | 
|  | #include "ucm.h" | 
|  | #include "makeconv.h" | 
|  | #include "genmbcs.h" | 
|  |  | 
|  | #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) | 
|  |  | 
|  | #define DEBUG 0 | 
|  |  | 
|  | typedef struct ConvData { | 
|  | UCMFile *ucm; | 
|  | NewConverter *cnvData, *extData; | 
|  | UConverterSharedData sharedData; | 
|  | UConverterStaticData staticData; | 
|  | } ConvData; | 
|  |  | 
|  | static void | 
|  | initConvData(ConvData *data) { | 
|  | uprv_memset(data, 0, sizeof(ConvData)); | 
|  | data->sharedData.structSize=sizeof(UConverterSharedData); | 
|  | data->staticData.structSize=sizeof(UConverterStaticData); | 
|  | data->sharedData.staticData=&data->staticData; | 
|  | } | 
|  |  | 
|  | static void | 
|  | cleanupConvData(ConvData *data) { | 
|  | if(data!=NULL) { | 
|  | if(data->cnvData!=NULL) { | 
|  | data->cnvData->close(data->cnvData); | 
|  | data->cnvData=NULL; | 
|  | } | 
|  | if(data->extData!=NULL) { | 
|  | data->extData->close(data->extData); | 
|  | data->extData=NULL; | 
|  | } | 
|  | ucm_close(data->ucm); | 
|  | data->ucm=NULL; | 
|  | } | 
|  | } | 
|  |  | 
|  | /* | 
|  | * from ucnvstat.c - static prototypes of data-based converters | 
|  | */ | 
|  | extern const UConverterStaticData * ucnv_converterStaticData[UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES]; | 
|  |  | 
|  | /* | 
|  | * Global - verbosity | 
|  | */ | 
|  | UBool VERBOSE = FALSE; | 
|  | UBool SMALL = FALSE; | 
|  | UBool IGNORE_SISO_CHECK = FALSE; | 
|  |  | 
|  | static void | 
|  | createConverter(ConvData *data, const char* converterName, UErrorCode *pErrorCode); | 
|  |  | 
|  | /* | 
|  | * Set up the UNewData and write the converter.. | 
|  | */ | 
|  | static void | 
|  | writeConverterData(ConvData *data, const char *cnvName, const char *cnvDir, UErrorCode *status); | 
|  |  | 
|  | UBool haveCopyright=TRUE; | 
|  |  | 
|  | static UDataInfo dataInfo={ | 
|  | sizeof(UDataInfo), | 
|  | 0, | 
|  |  | 
|  | U_IS_BIG_ENDIAN, | 
|  | U_CHARSET_FAMILY, | 
|  | sizeof(UChar), | 
|  | 0, | 
|  |  | 
|  | {0x63, 0x6e, 0x76, 0x74},     /* dataFormat="cnvt" */ | 
|  | {6, 2, 0, 0},                 /* formatVersion */ | 
|  | {0, 0, 0, 0}                  /* dataVersion (calculated at runtime) */ | 
|  | }; | 
|  |  | 
|  | static void | 
|  | writeConverterData(ConvData *data, const char *cnvName, const char *cnvDir, UErrorCode *status) | 
|  | { | 
|  | UNewDataMemory *mem = NULL; | 
|  | uint32_t sz2; | 
|  | uint32_t size = 0; | 
|  | int32_t tableType; | 
|  |  | 
|  | if(U_FAILURE(*status)) | 
|  | { | 
|  | return; | 
|  | } | 
|  |  | 
|  | tableType=TABLE_NONE; | 
|  | if(data->cnvData!=NULL) { | 
|  | tableType|=TABLE_BASE; | 
|  | } | 
|  | if(data->extData!=NULL) { | 
|  | tableType|=TABLE_EXT; | 
|  | } | 
|  |  | 
|  | mem = udata_create(cnvDir, "cnv", cnvName, &dataInfo, haveCopyright ? U_COPYRIGHT_STRING : NULL, status); | 
|  |  | 
|  | if(U_FAILURE(*status)) | 
|  | { | 
|  | fprintf(stderr, "Couldn't create the udata %s.%s: %s\n", | 
|  | cnvName, | 
|  | "cnv", | 
|  | u_errorName(*status)); | 
|  | return; | 
|  | } | 
|  |  | 
|  | if(VERBOSE) | 
|  | { | 
|  | printf("- Opened udata %s.%s\n", cnvName, "cnv"); | 
|  | } | 
|  |  | 
|  |  | 
|  | /* all read only, clean, platform independent data.  Mmmm. :)  */ | 
|  | udata_writeBlock(mem, &data->staticData, sizeof(UConverterStaticData)); | 
|  | size += sizeof(UConverterStaticData); /* Is 4-aligned  - by size */ | 
|  | /* Now, write the table */ | 
|  | if(tableType&TABLE_BASE) { | 
|  | size += data->cnvData->write(data->cnvData, &data->staticData, mem, tableType); | 
|  | } | 
|  | if(tableType&TABLE_EXT) { | 
|  | size += data->extData->write(data->extData, &data->staticData, mem, tableType); | 
|  | } | 
|  |  | 
|  | sz2 = udata_finish(mem, status); | 
|  | if(size != sz2) | 
|  | { | 
|  | fprintf(stderr, "error: wrote %u bytes to the .cnv file but counted %u bytes\n", (int)sz2, (int)size); | 
|  | *status=U_INTERNAL_PROGRAM_ERROR; | 
|  | } | 
|  | if(VERBOSE) | 
|  | { | 
|  | printf("- Wrote %u bytes to the udata.\n", (int)sz2); | 
|  | } | 
|  | } | 
|  |  | 
|  | enum { | 
|  | OPT_HELP_H, | 
|  | OPT_HELP_QUESTION_MARK, | 
|  | OPT_COPYRIGHT, | 
|  | OPT_VERSION, | 
|  | OPT_DESTDIR, | 
|  | OPT_VERBOSE, | 
|  | OPT_SMALL, | 
|  | OPT_IGNORE_SISO_CHECK, | 
|  | OPT_COUNT | 
|  | }; | 
|  |  | 
|  | static UOption options[]={ | 
|  | UOPTION_HELP_H, | 
|  | UOPTION_HELP_QUESTION_MARK, | 
|  | UOPTION_COPYRIGHT, | 
|  | UOPTION_VERSION, | 
|  | UOPTION_DESTDIR, | 
|  | UOPTION_VERBOSE, | 
|  | { "small", NULL, NULL, NULL, '\1', UOPT_NO_ARG, 0 }, | 
|  | { "ignore-siso-check", NULL, NULL, NULL, '\1', UOPT_NO_ARG, 0 } | 
|  | }; | 
|  |  | 
|  | int main(int argc, char* argv[]) | 
|  | { | 
|  | ConvData data; | 
|  | UErrorCode err = U_ZERO_ERROR, localError; | 
|  | char outFileName[UCNV_MAX_FULL_FILE_NAME_LENGTH]; | 
|  | const char* destdir, *arg; | 
|  | size_t destdirlen; | 
|  | char* dot = NULL, *outBasename; | 
|  | char cnvName[UCNV_MAX_FULL_FILE_NAME_LENGTH]; | 
|  | char cnvNameWithPkg[UCNV_MAX_FULL_FILE_NAME_LENGTH]; | 
|  | UVersionInfo icuVersion; | 
|  | UBool printFilename; | 
|  |  | 
|  | err = U_ZERO_ERROR; | 
|  |  | 
|  | U_MAIN_INIT_ARGS(argc, argv); | 
|  |  | 
|  | /* Set up the ICU version number */ | 
|  | u_getVersion(icuVersion); | 
|  | uprv_memcpy(&dataInfo.dataVersion, &icuVersion, sizeof(UVersionInfo)); | 
|  |  | 
|  | /* preset then read command line options */ | 
|  | options[OPT_DESTDIR].value=u_getDataDirectory(); | 
|  | argc=u_parseArgs(argc, argv, LENGTHOF(options), options); | 
|  |  | 
|  | /* error handling, printing usage message */ | 
|  | if(argc<0) { | 
|  | fprintf(stderr, | 
|  | "error in command line argument \"%s\"\n", | 
|  | argv[-argc]); | 
|  | } else if(argc<2) { | 
|  | argc=-1; | 
|  | } | 
|  | if(argc<0 || options[OPT_HELP_H].doesOccur || options[OPT_HELP_QUESTION_MARK].doesOccur) { | 
|  | FILE *stdfile=argc<0 ? stderr : stdout; | 
|  | fprintf(stdfile, | 
|  | "usage: %s [-options] files...\n" | 
|  | "\tread .ucm codepage mapping files and write .cnv files\n" | 
|  | "options:\n" | 
|  | "\t-h or -? or --help  this usage text\n" | 
|  | "\t-V or --version     show a version message\n" | 
|  | "\t-c or --copyright   include a copyright notice\n" | 
|  | "\t-d or --destdir     destination directory, followed by the path\n" | 
|  | "\t-v or --verbose     Turn on verbose output\n", | 
|  | argv[0]); | 
|  | fprintf(stdfile, | 
|  | "\t      --small       Generate smaller .cnv files. They will be\n" | 
|  | "\t                    significantly smaller but may not be compatible with\n" | 
|  | "\t                    older versions of ICU and will require heap memory\n" | 
|  | "\t                    allocation when loaded.\n" | 
|  | "\t      --ignore-siso-check         Use SI/SO other than 0xf/0xe.\n"); | 
|  | return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR; | 
|  | } | 
|  |  | 
|  | if(options[OPT_VERSION].doesOccur) { | 
|  | printf("makeconv version %u.%u, ICU tool to read .ucm codepage mapping files and write .cnv files\n", | 
|  | dataInfo.formatVersion[0], dataInfo.formatVersion[1]); | 
|  | printf("%s\n", U_COPYRIGHT_STRING); | 
|  | exit(0); | 
|  | } | 
|  |  | 
|  | /* get the options values */ | 
|  | haveCopyright = options[OPT_COPYRIGHT].doesOccur; | 
|  | destdir = options[OPT_DESTDIR].value; | 
|  | VERBOSE = options[OPT_VERBOSE].doesOccur; | 
|  | SMALL = options[OPT_SMALL].doesOccur; | 
|  |  | 
|  | if (options[OPT_IGNORE_SISO_CHECK].doesOccur) { | 
|  | IGNORE_SISO_CHECK = TRUE; | 
|  | } | 
|  |  | 
|  | if (destdir != NULL && *destdir != 0) { | 
|  | uprv_strcpy(outFileName, destdir); | 
|  | destdirlen = uprv_strlen(destdir); | 
|  | outBasename = outFileName + destdirlen; | 
|  | if (*(outBasename - 1) != U_FILE_SEP_CHAR) { | 
|  | *outBasename++ = U_FILE_SEP_CHAR; | 
|  | ++destdirlen; | 
|  | } | 
|  | } else { | 
|  | destdirlen = 0; | 
|  | outBasename = outFileName; | 
|  | } | 
|  |  | 
|  | #if DEBUG | 
|  | { | 
|  | int i; | 
|  | printf("makeconv: processing %d files...\n", argc - 1); | 
|  | for(i=1; i<argc; ++i) { | 
|  | printf("%s ", argv[i]); | 
|  | } | 
|  | printf("\n"); | 
|  | fflush(stdout); | 
|  | } | 
|  | #endif | 
|  |  | 
|  | err = U_ZERO_ERROR; | 
|  | printFilename = (UBool) (argc > 2 || VERBOSE); | 
|  | for (++argv; --argc; ++argv) | 
|  | { | 
|  | arg = getLongPathname(*argv); | 
|  |  | 
|  | /* Check for potential buffer overflow */ | 
|  | if(strlen(arg) >= UCNV_MAX_FULL_FILE_NAME_LENGTH) | 
|  | { | 
|  | fprintf(stderr, "%s\n", u_errorName(U_BUFFER_OVERFLOW_ERROR)); | 
|  | return U_BUFFER_OVERFLOW_ERROR; | 
|  | } | 
|  |  | 
|  | /*produces the right destination path for display*/ | 
|  | if (destdirlen != 0) | 
|  | { | 
|  | const char *basename; | 
|  |  | 
|  | /* find the last file sepator */ | 
|  | basename = findBasename(arg); | 
|  | uprv_strcpy(outBasename, basename); | 
|  | } | 
|  | else | 
|  | { | 
|  | uprv_strcpy(outFileName, arg); | 
|  | } | 
|  |  | 
|  | /*removes the extension if any is found*/ | 
|  | dot = uprv_strrchr(outBasename, '.'); | 
|  | if (dot) | 
|  | { | 
|  | *dot = '\0'; | 
|  | } | 
|  |  | 
|  | /* the basename without extension is the converter name */ | 
|  | uprv_strcpy(cnvName, outBasename); | 
|  |  | 
|  | /*Adds the target extension*/ | 
|  | uprv_strcat(outBasename, CONVERTER_FILE_EXTENSION); | 
|  |  | 
|  | #if DEBUG | 
|  | printf("makeconv: processing %s  ...\n", arg); | 
|  | fflush(stdout); | 
|  | #endif | 
|  | localError = U_ZERO_ERROR; | 
|  | initConvData(&data); | 
|  | createConverter(&data, arg, &localError); | 
|  |  | 
|  | if (U_FAILURE(localError)) | 
|  | { | 
|  | /* if an error is found, print out an error msg and keep going */ | 
|  | fprintf(stderr, "Error creating converter for \"%s\" file for \"%s\" (%s)\n", outFileName, arg, | 
|  | u_errorName(localError)); | 
|  | if(U_SUCCESS(err)) { | 
|  | err = localError; | 
|  | } | 
|  | } | 
|  | else | 
|  | { | 
|  | /* Insure the static data name matches the  file name */ | 
|  | /* Changed to ignore directory and only compare base name | 
|  | LDH 1/2/08*/ | 
|  | char *p; | 
|  | p = strrchr(cnvName, U_FILE_SEP_CHAR); /* Find last file separator */ | 
|  |  | 
|  | if(p == NULL)            /* OK, try alternate */ | 
|  | { | 
|  | p = strrchr(cnvName, U_FILE_ALT_SEP_CHAR); | 
|  | if(p == NULL) | 
|  | { | 
|  | p=cnvName; /* If no separators, no problem */ | 
|  | } | 
|  | } | 
|  | else | 
|  | { | 
|  | p++;   /* If found separtor, don't include it in compare */ | 
|  | } | 
|  | if(uprv_stricmp(p,data.staticData.name)) | 
|  | { | 
|  | fprintf(stderr, "Warning: %s%s claims to be '%s'\n", | 
|  | cnvName,  CONVERTER_FILE_EXTENSION, | 
|  | data.staticData.name); | 
|  | } | 
|  |  | 
|  | uprv_strcpy((char*)data.staticData.name, cnvName); | 
|  |  | 
|  | if(!uprv_isInvariantString((char*)data.staticData.name, -1)) { | 
|  | fprintf(stderr, | 
|  | "Error: A converter name must contain only invariant characters.\n" | 
|  | "%s is not a valid converter name.\n", | 
|  | data.staticData.name); | 
|  | if(U_SUCCESS(err)) { | 
|  | err = U_INVALID_TABLE_FORMAT; | 
|  | } | 
|  | } | 
|  |  | 
|  | uprv_strcpy(cnvNameWithPkg, cnvName); | 
|  |  | 
|  | localError = U_ZERO_ERROR; | 
|  | writeConverterData(&data, cnvNameWithPkg, destdir, &localError); | 
|  |  | 
|  | if(U_FAILURE(localError)) | 
|  | { | 
|  | /* if an error is found, print out an error msg and keep going*/ | 
|  | fprintf(stderr, "Error writing \"%s\" file for \"%s\" (%s)\n", outFileName, arg, | 
|  | u_errorName(localError)); | 
|  | if(U_SUCCESS(err)) { | 
|  | err = localError; | 
|  | } | 
|  | } | 
|  | else if (printFilename) | 
|  | { | 
|  | puts(outBasename); | 
|  | } | 
|  | } | 
|  | fflush(stdout); | 
|  | fflush(stderr); | 
|  |  | 
|  | cleanupConvData(&data); | 
|  | } | 
|  |  | 
|  | return err; | 
|  | } | 
|  |  | 
|  | static void | 
|  | getPlatformAndCCSIDFromName(const char *name, int8_t *pPlatform, int32_t *pCCSID) { | 
|  | if( (name[0]=='i' || name[0]=='I') && | 
|  | (name[1]=='b' || name[1]=='B') && | 
|  | (name[2]=='m' || name[2]=='M') | 
|  | ) { | 
|  | name+=3; | 
|  | if(*name=='-') { | 
|  | ++name; | 
|  | } | 
|  | *pPlatform=UCNV_IBM; | 
|  | *pCCSID=(int32_t)uprv_strtoul(name, NULL, 10); | 
|  | } else { | 
|  | *pPlatform=UCNV_UNKNOWN; | 
|  | *pCCSID=0; | 
|  | } | 
|  | } | 
|  |  | 
|  | static void | 
|  | readHeader(ConvData *data, | 
|  | FileStream* convFile, | 
|  | const char* converterName, | 
|  | UErrorCode *pErrorCode) { | 
|  | char line[1024]; | 
|  | char *s, *key, *value; | 
|  | const UConverterStaticData *prototype; | 
|  | UConverterStaticData *staticData; | 
|  |  | 
|  | if(U_FAILURE(*pErrorCode)) { | 
|  | return; | 
|  | } | 
|  |  | 
|  | staticData=&data->staticData; | 
|  | staticData->platform=UCNV_IBM; | 
|  | staticData->subCharLen=0; | 
|  |  | 
|  | while(T_FileStream_readLine(convFile, line, sizeof(line))) { | 
|  | /* basic parsing and handling of state-related items */ | 
|  | if(ucm_parseHeaderLine(data->ucm, line, &key, &value)) { | 
|  | continue; | 
|  | } | 
|  |  | 
|  | /* stop at the beginning of the mapping section */ | 
|  | if(uprv_strcmp(line, "CHARMAP")==0) { | 
|  | break; | 
|  | } | 
|  |  | 
|  | /* collect the information from the header field, ignore unknown keys */ | 
|  | if(uprv_strcmp(key, "code_set_name")==0) { | 
|  | if(*value!=0) { | 
|  | uprv_strcpy((char *)staticData->name, value); | 
|  | getPlatformAndCCSIDFromName(value, &staticData->platform, &staticData->codepage); | 
|  | } | 
|  | } else if(uprv_strcmp(key, "subchar")==0) { | 
|  | uint8_t bytes[UCNV_EXT_MAX_BYTES]; | 
|  | int8_t length; | 
|  |  | 
|  | s=value; | 
|  | length=ucm_parseBytes(bytes, line, (const char **)&s); | 
|  | if(1<=length && length<=4 && *s==0) { | 
|  | staticData->subCharLen=length; | 
|  | uprv_memcpy(staticData->subChar, bytes, length); | 
|  | } else { | 
|  | fprintf(stderr, "error: illegal <subchar> %s\n", value); | 
|  | *pErrorCode=U_INVALID_TABLE_FORMAT; | 
|  | return; | 
|  | } | 
|  | } else if(uprv_strcmp(key, "subchar1")==0) { | 
|  | uint8_t bytes[UCNV_EXT_MAX_BYTES]; | 
|  |  | 
|  | s=value; | 
|  | if(1==ucm_parseBytes(bytes, line, (const char **)&s) && *s==0) { | 
|  | staticData->subChar1=bytes[0]; | 
|  | } else { | 
|  | fprintf(stderr, "error: illegal <subchar1> %s\n", value); | 
|  | *pErrorCode=U_INVALID_TABLE_FORMAT; | 
|  | return; | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | /* copy values from the UCMFile to the static data */ | 
|  | staticData->maxBytesPerChar=(int8_t)data->ucm->states.maxCharLength; | 
|  | staticData->minBytesPerChar=(int8_t)data->ucm->states.minCharLength; | 
|  | staticData->conversionType=data->ucm->states.conversionType; | 
|  |  | 
|  | if(staticData->conversionType==UCNV_UNSUPPORTED_CONVERTER) { | 
|  | fprintf(stderr, "ucm error: missing conversion type (<uconv_class>)\n"); | 
|  | *pErrorCode=U_INVALID_TABLE_FORMAT; | 
|  | return; | 
|  | } | 
|  |  | 
|  | /* | 
|  | * Now that we know the type, copy any 'default' values from the table. | 
|  | * We need not check the type any further because the parser only | 
|  | * recognizes what we have prototypes for. | 
|  | * | 
|  | * For delta (extension-only) tables, copy values from the base file | 
|  | * instead, see createConverter(). | 
|  | */ | 
|  | if(data->ucm->baseName[0]==0) { | 
|  | prototype=ucnv_converterStaticData[staticData->conversionType]; | 
|  | if(prototype!=NULL) { | 
|  | if(staticData->name[0]==0) { | 
|  | uprv_strcpy((char *)staticData->name, prototype->name); | 
|  | } | 
|  |  | 
|  | if(staticData->codepage==0) { | 
|  | staticData->codepage=prototype->codepage; | 
|  | } | 
|  |  | 
|  | if(staticData->platform==0) { | 
|  | staticData->platform=prototype->platform; | 
|  | } | 
|  |  | 
|  | if(staticData->minBytesPerChar==0) { | 
|  | staticData->minBytesPerChar=prototype->minBytesPerChar; | 
|  | } | 
|  |  | 
|  | if(staticData->maxBytesPerChar==0) { | 
|  | staticData->maxBytesPerChar=prototype->maxBytesPerChar; | 
|  | } | 
|  |  | 
|  | if(staticData->subCharLen==0) { | 
|  | staticData->subCharLen=prototype->subCharLen; | 
|  | if(prototype->subCharLen>0) { | 
|  | uprv_memcpy(staticData->subChar, prototype->subChar, prototype->subCharLen); | 
|  | } | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | if(data->ucm->states.outputType<0) { | 
|  | data->ucm->states.outputType=(int8_t)data->ucm->states.maxCharLength-1; | 
|  | } | 
|  |  | 
|  | if( staticData->subChar1!=0 && | 
|  | (staticData->minBytesPerChar>1 || | 
|  | (staticData->conversionType!=UCNV_MBCS && | 
|  | staticData->conversionType!=UCNV_EBCDIC_STATEFUL)) | 
|  | ) { | 
|  | fprintf(stderr, "error: <subchar1> defined for a type other than MBCS or EBCDIC_STATEFUL\n"); | 
|  | *pErrorCode=U_INVALID_TABLE_FORMAT; | 
|  | } | 
|  | } | 
|  |  | 
|  | /* return TRUE if a base table was read, FALSE for an extension table */ | 
|  | static UBool | 
|  | readFile(ConvData *data, const char* converterName, | 
|  | UErrorCode *pErrorCode) { | 
|  | char line[1024]; | 
|  | char *end; | 
|  | FileStream *convFile; | 
|  |  | 
|  | UCMStates *baseStates; | 
|  | UBool dataIsBase; | 
|  |  | 
|  | if(U_FAILURE(*pErrorCode)) { | 
|  | return FALSE; | 
|  | } | 
|  |  | 
|  | data->ucm=ucm_open(); | 
|  |  | 
|  | convFile=T_FileStream_open(converterName, "r"); | 
|  | if(convFile==NULL) { | 
|  | *pErrorCode=U_FILE_ACCESS_ERROR; | 
|  | return FALSE; | 
|  | } | 
|  |  | 
|  | readHeader(data, convFile, converterName, pErrorCode); | 
|  | if(U_FAILURE(*pErrorCode)) { | 
|  | return FALSE; | 
|  | } | 
|  |  | 
|  | if(data->ucm->baseName[0]==0) { | 
|  | dataIsBase=TRUE; | 
|  | baseStates=&data->ucm->states; | 
|  | ucm_processStates(baseStates, IGNORE_SISO_CHECK); | 
|  | } else { | 
|  | dataIsBase=FALSE; | 
|  | baseStates=NULL; | 
|  | } | 
|  |  | 
|  | /* read the base table */ | 
|  | ucm_readTable(data->ucm, convFile, dataIsBase, baseStates, pErrorCode); | 
|  | if(U_FAILURE(*pErrorCode)) { | 
|  | return FALSE; | 
|  | } | 
|  |  | 
|  | /* read an extension table if there is one */ | 
|  | while(T_FileStream_readLine(convFile, line, sizeof(line))) { | 
|  | end=uprv_strchr(line, 0); | 
|  | while(line<end && | 
|  | (*(end-1)=='\n' || *(end-1)=='\r' || *(end-1)==' ' || *(end-1)=='\t')) { | 
|  | --end; | 
|  | } | 
|  | *end=0; | 
|  |  | 
|  | if(line[0]=='#' || u_skipWhitespace(line)==end) { | 
|  | continue; /* ignore empty and comment lines */ | 
|  | } | 
|  |  | 
|  | if(0==uprv_strcmp(line, "CHARMAP")) { | 
|  | /* read the extension table */ | 
|  | ucm_readTable(data->ucm, convFile, FALSE, baseStates, pErrorCode); | 
|  | } else { | 
|  | fprintf(stderr, "unexpected text after the base mapping table\n"); | 
|  | } | 
|  | break; | 
|  | } | 
|  |  | 
|  | T_FileStream_close(convFile); | 
|  |  | 
|  | if(data->ucm->base->flagsType==UCM_FLAGS_MIXED || data->ucm->ext->flagsType==UCM_FLAGS_MIXED) { | 
|  | fprintf(stderr, "error: some entries have the mapping precision (with '|'), some do not\n"); | 
|  | *pErrorCode=U_INVALID_TABLE_FORMAT; | 
|  | } | 
|  |  | 
|  | return dataIsBase; | 
|  | } | 
|  |  | 
|  | static void | 
|  | createConverter(ConvData *data, const char *converterName, UErrorCode *pErrorCode) { | 
|  | ConvData baseData; | 
|  | UBool dataIsBase; | 
|  |  | 
|  | UConverterStaticData *staticData; | 
|  | UCMStates *states, *baseStates; | 
|  |  | 
|  | if(U_FAILURE(*pErrorCode)) { | 
|  | return; | 
|  | } | 
|  |  | 
|  | initConvData(data); | 
|  |  | 
|  | dataIsBase=readFile(data, converterName, pErrorCode); | 
|  | if(U_FAILURE(*pErrorCode)) { | 
|  | return; | 
|  | } | 
|  |  | 
|  | staticData=&data->staticData; | 
|  | states=&data->ucm->states; | 
|  |  | 
|  | if(dataIsBase) { | 
|  | /* | 
|  | * Build a normal .cnv file with a base table | 
|  | * and an optional extension table. | 
|  | */ | 
|  | data->cnvData=MBCSOpen(data->ucm); | 
|  | if(data->cnvData==NULL) { | 
|  | *pErrorCode=U_MEMORY_ALLOCATION_ERROR; | 
|  |  | 
|  | } else if(!data->cnvData->isValid(data->cnvData, | 
|  | staticData->subChar, staticData->subCharLen) | 
|  | ) { | 
|  | fprintf(stderr, "       the substitution character byte sequence is illegal in this codepage structure!\n"); | 
|  | *pErrorCode=U_INVALID_TABLE_FORMAT; | 
|  |  | 
|  | } else if(staticData->subChar1!=0 && | 
|  | !data->cnvData->isValid(data->cnvData, &staticData->subChar1, 1) | 
|  | ) { | 
|  | fprintf(stderr, "       the subchar1 byte is illegal in this codepage structure!\n"); | 
|  | *pErrorCode=U_INVALID_TABLE_FORMAT; | 
|  |  | 
|  | } else if( | 
|  | data->ucm->ext->mappingsLength>0 && | 
|  | !ucm_checkBaseExt(states, data->ucm->base, data->ucm->ext, data->ucm->ext, FALSE) | 
|  | ) { | 
|  | *pErrorCode=U_INVALID_TABLE_FORMAT; | 
|  | } else if(data->ucm->base->flagsType&UCM_FLAGS_EXPLICIT) { | 
|  | /* sort the table so that it can be turned into UTF-8-friendly data */ | 
|  | ucm_sortTable(data->ucm->base); | 
|  | } | 
|  |  | 
|  | if(U_SUCCESS(*pErrorCode)) { | 
|  | if( | 
|  | /* add the base table after ucm_checkBaseExt()! */ | 
|  | !data->cnvData->addTable(data->cnvData, data->ucm->base, &data->staticData) | 
|  | ) { | 
|  | *pErrorCode=U_INVALID_TABLE_FORMAT; | 
|  | } else { | 
|  | /* | 
|  | * addTable() may have requested moving more mappings to the extension table | 
|  | * if they fit into the base toUnicode table but not into the | 
|  | * base fromUnicode table. | 
|  | * (Especially for UTF-8-friendly fromUnicode tables.) | 
|  | * Such mappings will have the MBCS_FROM_U_EXT_FLAG set, which causes them | 
|  | * to be excluded from the extension toUnicode data. | 
|  | * See MBCSOkForBaseFromUnicode() for which mappings do not fit into | 
|  | * the base fromUnicode table. | 
|  | */ | 
|  | ucm_moveMappings(data->ucm->base, data->ucm->ext); | 
|  | ucm_sortTable(data->ucm->ext); | 
|  | if(data->ucm->ext->mappingsLength>0) { | 
|  | /* prepare the extension table, if there is one */ | 
|  | data->extData=CnvExtOpen(data->ucm); | 
|  | if(data->extData==NULL) { | 
|  | *pErrorCode=U_MEMORY_ALLOCATION_ERROR; | 
|  | } else if( | 
|  | !data->extData->addTable(data->extData, data->ucm->ext, &data->staticData) | 
|  | ) { | 
|  | *pErrorCode=U_INVALID_TABLE_FORMAT; | 
|  | } | 
|  | } | 
|  | } | 
|  | } | 
|  | } else { | 
|  | /* Build an extension-only .cnv file. */ | 
|  | char baseFilename[500]; | 
|  | char *basename; | 
|  |  | 
|  | initConvData(&baseData); | 
|  |  | 
|  | /* assemble a path/filename for data->ucm->baseName */ | 
|  | uprv_strcpy(baseFilename, converterName); | 
|  | basename=(char *)findBasename(baseFilename); | 
|  | uprv_strcpy(basename, data->ucm->baseName); | 
|  | uprv_strcat(basename, ".ucm"); | 
|  |  | 
|  | /* read the base table */ | 
|  | dataIsBase=readFile(&baseData, baseFilename, pErrorCode); | 
|  | if(U_FAILURE(*pErrorCode)) { | 
|  | return; | 
|  | } else if(!dataIsBase) { | 
|  | fprintf(stderr, "error: the <icu:base> file \"%s\" is not a base table file\n", baseFilename); | 
|  | *pErrorCode=U_INVALID_TABLE_FORMAT; | 
|  | } else { | 
|  | /* prepare the extension table */ | 
|  | data->extData=CnvExtOpen(data->ucm); | 
|  | if(data->extData==NULL) { | 
|  | *pErrorCode=U_MEMORY_ALLOCATION_ERROR; | 
|  | } else { | 
|  | /* fill in gaps in extension file header fields */ | 
|  | UCMapping *m, *mLimit; | 
|  | uint8_t fallbackFlags; | 
|  |  | 
|  | baseStates=&baseData.ucm->states; | 
|  | if(states->conversionType==UCNV_DBCS) { | 
|  | staticData->minBytesPerChar=(int8_t)(states->minCharLength=2); | 
|  | } else if(states->minCharLength==0) { | 
|  | staticData->minBytesPerChar=(int8_t)(states->minCharLength=baseStates->minCharLength); | 
|  | } | 
|  | if(states->maxCharLength<states->minCharLength) { | 
|  | staticData->maxBytesPerChar=(int8_t)(states->maxCharLength=baseStates->maxCharLength); | 
|  | } | 
|  |  | 
|  | if(staticData->subCharLen==0) { | 
|  | uprv_memcpy(staticData->subChar, baseData.staticData.subChar, 4); | 
|  | staticData->subCharLen=baseData.staticData.subCharLen; | 
|  | } | 
|  | /* | 
|  | * do not copy subChar1 - | 
|  | * only use what is explicitly specified | 
|  | * because it cannot be unset in the extension file header | 
|  | */ | 
|  |  | 
|  | /* get the fallback flags */ | 
|  | fallbackFlags=0; | 
|  | for(m=baseData.ucm->base->mappings, mLimit=m+baseData.ucm->base->mappingsLength; | 
|  | m<mLimit && fallbackFlags!=3; | 
|  | ++m | 
|  | ) { | 
|  | if(m->f==1) { | 
|  | fallbackFlags|=1; | 
|  | } else if(m->f==3) { | 
|  | fallbackFlags|=2; | 
|  | } | 
|  | } | 
|  |  | 
|  | if(fallbackFlags&1) { | 
|  | staticData->hasFromUnicodeFallback=TRUE; | 
|  | } | 
|  | if(fallbackFlags&2) { | 
|  | staticData->hasToUnicodeFallback=TRUE; | 
|  | } | 
|  |  | 
|  | if(1!=ucm_countChars(baseStates, staticData->subChar, staticData->subCharLen)) { | 
|  | fprintf(stderr, "       the substitution character byte sequence is illegal in this codepage structure!\n"); | 
|  | *pErrorCode=U_INVALID_TABLE_FORMAT; | 
|  |  | 
|  | } else if(staticData->subChar1!=0 && 1!=ucm_countChars(baseStates, &staticData->subChar1, 1)) { | 
|  | fprintf(stderr, "       the subchar1 byte is illegal in this codepage structure!\n"); | 
|  | *pErrorCode=U_INVALID_TABLE_FORMAT; | 
|  |  | 
|  | } else if( | 
|  | !ucm_checkValidity(data->ucm->ext, baseStates) || | 
|  | !ucm_checkBaseExt(baseStates, baseData.ucm->base, data->ucm->ext, data->ucm->ext, FALSE) | 
|  | ) { | 
|  | *pErrorCode=U_INVALID_TABLE_FORMAT; | 
|  | } else { | 
|  | if(states->maxCharLength>1) { | 
|  | /* | 
|  | * When building a normal .cnv file with a base table | 
|  | * for an MBCS (not SBCS) table with explicit precision flags, | 
|  | * the MBCSAddTable() function marks some mappings for moving | 
|  | * to the extension table. | 
|  | * They fit into the base toUnicode table but not into the | 
|  | * base fromUnicode table. | 
|  | * (Note: We do have explicit precision flags because they are | 
|  | * required for extension table generation, and | 
|  | * ucm_checkBaseExt() verified it.) | 
|  | * | 
|  | * We do not call MBCSAddTable() here (we probably could) | 
|  | * so we need to do the analysis before building the extension table. | 
|  | * We assume that MBCSAddTable() will build a UTF-8-friendly table. | 
|  | * Redundant mappings in the extension table are ok except they cost some size. | 
|  | * | 
|  | * Do this after ucm_checkBaseExt(). | 
|  | */ | 
|  | const MBCSData *mbcsData=MBCSGetDummy(); | 
|  | int32_t needsMove=0; | 
|  | for(m=baseData.ucm->base->mappings, mLimit=m+baseData.ucm->base->mappingsLength; | 
|  | m<mLimit; | 
|  | ++m | 
|  | ) { | 
|  | if(!MBCSOkForBaseFromUnicode(mbcsData, m->b.bytes, m->bLen, m->u, m->f)) { | 
|  | m->f|=MBCS_FROM_U_EXT_FLAG; | 
|  | m->moveFlag=UCM_MOVE_TO_EXT; | 
|  | ++needsMove; | 
|  | } | 
|  | } | 
|  |  | 
|  | if(needsMove!=0) { | 
|  | ucm_moveMappings(baseData.ucm->base, data->ucm->ext); | 
|  | ucm_sortTable(data->ucm->ext); | 
|  | } | 
|  | } | 
|  | if(!data->extData->addTable(data->extData, data->ucm->ext, &data->staticData)) { | 
|  | *pErrorCode=U_INVALID_TABLE_FORMAT; | 
|  | } | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | cleanupConvData(&baseData); | 
|  | } | 
|  | } | 
|  |  | 
|  | /* | 
|  | * Hey, Emacs, please set the following: | 
|  | * | 
|  | * Local Variables: | 
|  | * indent-tabs-mode: nil | 
|  | * End: | 
|  | * | 
|  | */ |