| // © 2016 and later: Unicode, Inc. and others. |
| // License & terms of use: http://www.unicode.org/copyright.html |
| /****************************************************************************** |
| * Copyright (C) 2008-2012, International Business Machines |
| * Corporation and others. All Rights Reserved. |
| ******************************************************************************* |
| */ |
| #include "unicode/utypes.h" |
| |
| #include <stdio.h> |
| #include <stdlib.h> |
| #include "unicode/utypes.h" |
| #include "unicode/putil.h" |
| #include "cmemory.h" |
| #include "cstring.h" |
| #include "filestrm.h" |
| #include "toolutil.h" |
| #include "unicode/uclean.h" |
| #include "unewdata.h" |
| #include "putilimp.h" |
| #include "pkg_gencmn.h" |
| |
| #define STRING_STORE_SIZE 200000 |
| |
| #define COMMON_DATA_NAME U_ICUDATA_NAME |
| #define DATA_TYPE "dat" |
| |
| /* ICU package data file format (.dat files) ------------------------------- *** |
| |
| Description of the data format after the usual ICU data file header |
| (UDataInfo etc.). |
| |
| Format version 1 |
| |
| A .dat package file contains a simple Table of Contents of item names, |
| followed by the items themselves: |
| |
| 1. ToC table |
| |
| uint32_t count; - number of items |
| UDataOffsetTOCEntry entry[count]; - pair of uint32_t values per item: |
| uint32_t nameOffset; - offset of the item name |
| uint32_t dataOffset; - offset of the item data |
| both are byte offsets from the beginning of the data |
| |
| 2. item name strings |
| |
| All item names are stored as char * strings in one block between the ToC table |
| and the data items. |
| |
| 3. data items |
| |
| The data items are stored following the item names block. |
| Each data item is 16-aligned. |
| The data items are stored in the sorted order of their names. |
| |
| Therefore, the top of the name strings block is the offset of the first item, |
| the length of the last item is the difference between its offset and |
| the .dat file length, and the length of all previous items is the difference |
| between its offset and the next one. |
| |
| ----------------------------------------------------------------------------- */ |
| |
| /* UDataInfo cf. udata.h */ |
| static const UDataInfo dataInfo={ |
| sizeof(UDataInfo), |
| 0, |
| |
| U_IS_BIG_ENDIAN, |
| U_CHARSET_FAMILY, |
| sizeof(UChar), |
| 0, |
| |
| {0x43, 0x6d, 0x6e, 0x44}, /* dataFormat="CmnD" */ |
| {1, 0, 0, 0}, /* formatVersion */ |
| {3, 0, 0, 0} /* dataVersion */ |
| }; |
| |
| static uint32_t maxSize; |
| |
| static char stringStore[STRING_STORE_SIZE]; |
| static uint32_t stringTop=0, basenameTotal=0; |
| |
| typedef struct { |
| char *pathname, *basename; |
| uint32_t basenameLength, basenameOffset, fileSize, fileOffset; |
| } File; |
| |
| #define CHUNK_FILE_COUNT 256 |
| static File *files = NULL; |
| static uint32_t fileCount=0; |
| static uint32_t fileMax = 0; |
| |
| |
| static char *symPrefix = NULL; |
| |
| #define LINE_BUFFER_SIZE 512 |
| /* prototypes --------------------------------------------------------------- */ |
| |
| static void |
| addFile(const char *filename, const char *name, const char *source, UBool sourceTOC, UBool verbose); |
| |
| static char * |
| allocString(uint32_t length); |
| |
| U_CDECL_BEGIN |
| static int |
| compareFiles(const void *file1, const void *file2); |
| U_CDECL_END |
| |
| static char * |
| pathToFullPath(const char *path, const char *source); |
| |
| /* map non-tree separator (such as '\') to tree separator ('/') inplace. */ |
| static void |
| fixDirToTreePath(char *s); |
| /* -------------------------------------------------------------------------- */ |
| |
| U_CAPI void U_EXPORT2 |
| createCommonDataFile(const char *destDir, const char *name, const char *entrypointName, const char *type, const char *source, const char *copyRight, |
| const char *dataFile, uint32_t max_size, UBool sourceTOC, UBool verbose, char *gencmnFileName) { |
| static char buffer[4096]; |
| char *line; |
| char *linePtr; |
| char *s = NULL; |
| UErrorCode errorCode=U_ZERO_ERROR; |
| uint32_t i, fileOffset, basenameOffset, length, nread; |
| FileStream *in, *file; |
| |
| line = (char *)uprv_malloc(sizeof(char) * LINE_BUFFER_SIZE); |
| if (line == NULL) { |
| fprintf(stderr, "gencmn: unable to allocate memory for line buffer of size %d\n", LINE_BUFFER_SIZE); |
| exit(U_MEMORY_ALLOCATION_ERROR); |
| } |
| |
| linePtr = line; |
| |
| maxSize = max_size; |
| |
| if (destDir == NULL) { |
| destDir = u_getDataDirectory(); |
| } |
| if (name == NULL) { |
| name = COMMON_DATA_NAME; |
| } |
| if (type == NULL) { |
| type = DATA_TYPE; |
| } |
| if (source == NULL) { |
| source = "."; |
| } |
| |
| if (dataFile == NULL) { |
| in = T_FileStream_stdin(); |
| } else { |
| in = T_FileStream_open(dataFile, "r"); |
| if(in == NULL) { |
| fprintf(stderr, "gencmn: unable to open input file %s\n", dataFile); |
| exit(U_FILE_ACCESS_ERROR); |
| } |
| } |
| |
| if (verbose) { |
| if(sourceTOC) { |
| printf("generating %s_%s.c (table of contents source file)\n", name, type); |
| } else { |
| printf("generating %s.%s (common data file with table of contents)\n", name, type); |
| } |
| } |
| |
| /* read the list of files and get their lengths */ |
| while((s != NULL && *s != 0) || (s=T_FileStream_readLine(in, (line=linePtr), |
| LINE_BUFFER_SIZE))!=NULL) { |
| /* remove trailing newline characters and parse space separated items */ |
| if (s != NULL && *s != 0) { |
| line=s; |
| } else { |
| s=line; |
| } |
| while(*s!=0) { |
| if(*s==' ') { |
| *s=0; |
| ++s; |
| break; |
| } else if(*s=='\r' || *s=='\n') { |
| *s=0; |
| break; |
| } |
| ++s; |
| } |
| |
| /* check for comment */ |
| |
| if (*line == '#') { |
| continue; |
| } |
| |
| /* add the file */ |
| #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR) |
| { |
| char *t; |
| while((t = uprv_strchr(line,U_FILE_ALT_SEP_CHAR))) { |
| *t = U_FILE_SEP_CHAR; |
| } |
| } |
| #endif |
| addFile(getLongPathname(line), name, source, sourceTOC, verbose); |
| } |
| |
| uprv_free(linePtr); |
| |
| if(in!=T_FileStream_stdin()) { |
| T_FileStream_close(in); |
| } |
| |
| if(fileCount==0) { |
| fprintf(stderr, "gencmn: no files listed in %s\n", dataFile == NULL ? "<stdin>" : dataFile); |
| return; |
| } |
| |
| /* sort the files by basename */ |
| qsort(files, fileCount, sizeof(File), compareFiles); |
| |
| if(!sourceTOC) { |
| UNewDataMemory *out; |
| |
| /* determine the offsets of all basenames and files in this common one */ |
| basenameOffset=4+8*fileCount; |
| fileOffset=(basenameOffset+(basenameTotal+15))&~0xf; |
| for(i=0; i<fileCount; ++i) { |
| files[i].fileOffset=fileOffset; |
| fileOffset+=(files[i].fileSize+15)&~0xf; |
| files[i].basenameOffset=basenameOffset; |
| basenameOffset+=files[i].basenameLength; |
| } |
| |
| /* create the output file */ |
| out=udata_create(destDir, type, name, |
| &dataInfo, |
| copyRight == NULL ? U_COPYRIGHT_STRING : copyRight, |
| &errorCode); |
| if(U_FAILURE(errorCode)) { |
| fprintf(stderr, "gencmn: udata_create(-d %s -n %s -t %s) failed - %s\n", |
| destDir, name, type, |
| u_errorName(errorCode)); |
| exit(errorCode); |
| } |
| |
| /* write the table of contents */ |
| udata_write32(out, fileCount); |
| for(i=0; i<fileCount; ++i) { |
| udata_write32(out, files[i].basenameOffset); |
| udata_write32(out, files[i].fileOffset); |
| } |
| |
| /* write the basenames */ |
| for(i=0; i<fileCount; ++i) { |
| udata_writeString(out, files[i].basename, files[i].basenameLength); |
| } |
| length=4+8*fileCount+basenameTotal; |
| |
| /* copy the files */ |
| for(i=0; i<fileCount; ++i) { |
| /* pad to 16-align the next file */ |
| length&=0xf; |
| if(length!=0) { |
| udata_writePadding(out, 16-length); |
| } |
| |
| if (verbose) { |
| printf("adding %s (%ld byte%s)\n", files[i].pathname, (long)files[i].fileSize, files[i].fileSize == 1 ? "" : "s"); |
| } |
| |
| /* copy the next file */ |
| file=T_FileStream_open(files[i].pathname, "rb"); |
| if(file==NULL) { |
| fprintf(stderr, "gencmn: unable to open listed file %s\n", files[i].pathname); |
| exit(U_FILE_ACCESS_ERROR); |
| } |
| for(nread = 0;;) { |
| length=T_FileStream_read(file, buffer, sizeof(buffer)); |
| if(length <= 0) { |
| break; |
| } |
| nread += length; |
| udata_writeBlock(out, buffer, length); |
| } |
| T_FileStream_close(file); |
| length=files[i].fileSize; |
| |
| if (nread != files[i].fileSize) { |
| fprintf(stderr, "gencmn: unable to read %s properly (got %ld/%ld byte%s)\n", files[i].pathname, (long)nread, (long)files[i].fileSize, files[i].fileSize == 1 ? "" : "s"); |
| exit(U_FILE_ACCESS_ERROR); |
| } |
| } |
| |
| /* pad to 16-align the last file (cleaner, avoids growing .dat files in icuswap) */ |
| length&=0xf; |
| if(length!=0) { |
| udata_writePadding(out, 16-length); |
| } |
| |
| /* finish */ |
| udata_finish(out, &errorCode); |
| if(U_FAILURE(errorCode)) { |
| fprintf(stderr, "gencmn: udata_finish() failed - %s\n", u_errorName(errorCode)); |
| exit(errorCode); |
| } |
| } else { |
| /* write a .c source file with the table of contents */ |
| char *filename; |
| FileStream *out; |
| |
| /* create the output filename */ |
| filename=s=buffer; |
| uprv_strcpy(filename, destDir); |
| s=filename+uprv_strlen(filename); |
| if(s>filename && *(s-1)!=U_FILE_SEP_CHAR) { |
| *s++=U_FILE_SEP_CHAR; |
| } |
| uprv_strcpy(s, name); |
| if(*(type)!=0) { |
| s+=uprv_strlen(s); |
| *s++='_'; |
| uprv_strcpy(s, type); |
| } |
| s+=uprv_strlen(s); |
| uprv_strcpy(s, ".c"); |
| |
| /* open the output file */ |
| out=T_FileStream_open(filename, "w"); |
| if (gencmnFileName != NULL) { |
| uprv_strcpy(gencmnFileName, filename); |
| } |
| if(out==NULL) { |
| fprintf(stderr, "gencmn: unable to open .c output file %s\n", filename); |
| exit(U_FILE_ACCESS_ERROR); |
| } |
| |
| /* write the source file */ |
| sprintf(buffer, |
| "/*\n" |
| " * ICU common data table of contents for %s.%s\n" |
| " * Automatically generated by icu/source/tools/gencmn/gencmn .\n" |
| " */\n\n" |
| "#include \"unicode/utypes.h\"\n" |
| "#include \"unicode/udata.h\"\n" |
| "\n" |
| "/* external symbol declarations for data (%d files) */\n", |
| name, type, fileCount); |
| T_FileStream_writeLine(out, buffer); |
| |
| sprintf(buffer, "extern const char\n %s%s[]", symPrefix?symPrefix:"", files[0].pathname); |
| T_FileStream_writeLine(out, buffer); |
| for(i=1; i<fileCount; ++i) { |
| sprintf(buffer, ",\n %s%s[]", symPrefix?symPrefix:"", files[i].pathname); |
| T_FileStream_writeLine(out, buffer); |
| } |
| T_FileStream_writeLine(out, ";\n\n"); |
| |
| sprintf( |
| buffer, |
| "U_EXPORT struct {\n" |
| " uint16_t headerSize;\n" |
| " uint8_t magic1, magic2;\n" |
| " UDataInfo info;\n" |
| " char padding[%lu];\n" |
| " uint32_t count, reserved;\n" |
| " struct {\n" |
| " const char *name;\n" |
| " const void *data;\n" |
| " } toc[%lu];\n" |
| "} U_EXPORT2 %s_dat = {\n" |
| " 32, 0xda, 0x27, {\n" |
| " %lu, 0,\n" |
| " %u, %u, %u, 0,\n" |
| " {0x54, 0x6f, 0x43, 0x50},\n" |
| " {1, 0, 0, 0},\n" |
| " {0, 0, 0, 0}\n" |
| " },\n" |
| " \"\", %lu, 0, {\n", |
| static_cast<unsigned long>(32-4-sizeof(UDataInfo)), |
| static_cast<unsigned long>(fileCount), |
| entrypointName, |
| static_cast<unsigned long>(sizeof(UDataInfo)), |
| U_IS_BIG_ENDIAN, |
| U_CHARSET_FAMILY, |
| U_SIZEOF_UCHAR, |
| static_cast<unsigned long>(fileCount) |
| ); |
| T_FileStream_writeLine(out, buffer); |
| |
| sprintf(buffer, " { \"%s\", %s%s }", files[0].basename, symPrefix?symPrefix:"", files[0].pathname); |
| T_FileStream_writeLine(out, buffer); |
| for(i=1; i<fileCount; ++i) { |
| sprintf(buffer, ",\n { \"%s\", %s%s }", files[i].basename, symPrefix?symPrefix:"", files[i].pathname); |
| T_FileStream_writeLine(out, buffer); |
| } |
| |
| T_FileStream_writeLine(out, "\n }\n};\n"); |
| T_FileStream_close(out); |
| |
| uprv_free(symPrefix); |
| } |
| } |
| |
| static void |
| addFile(const char *filename, const char *name, const char *source, UBool sourceTOC, UBool verbose) { |
| char *s; |
| uint32_t length; |
| char *fullPath = NULL; |
| |
| if(fileCount==fileMax) { |
| fileMax += CHUNK_FILE_COUNT; |
| files = (File *)uprv_realloc(files, fileMax*sizeof(files[0])); /* note: never freed. */ |
| if(files==NULL) { |
| fprintf(stderr, "pkgdata/gencmn: Could not allocate %u bytes for %d files\n", (unsigned int)(fileMax*sizeof(files[0])), fileCount); |
| exit(U_MEMORY_ALLOCATION_ERROR); |
| } |
| } |
| |
| if(!sourceTOC) { |
| FileStream *file; |
| |
| if(uprv_pathIsAbsolute(filename)) { |
| fprintf(stderr, "gencmn: Error: absolute path encountered. Old style paths are not supported. Use relative paths such as 'fur.res' or 'translit%cfur.res'.\n\tBad path: '%s'\n", U_FILE_SEP_CHAR, filename); |
| exit(U_ILLEGAL_ARGUMENT_ERROR); |
| } |
| fullPath = pathToFullPath(filename, source); |
| /* store the pathname */ |
| length = (uint32_t)(uprv_strlen(filename) + 1 + uprv_strlen(name) + 1); |
| s=allocString(length); |
| uprv_strcpy(s, name); |
| uprv_strcat(s, U_TREE_ENTRY_SEP_STRING); |
| uprv_strcat(s, filename); |
| |
| /* get the basename */ |
| fixDirToTreePath(s); |
| files[fileCount].basename=s; |
| files[fileCount].basenameLength=length; |
| |
| files[fileCount].pathname=fullPath; |
| |
| basenameTotal+=length; |
| |
| /* try to open the file */ |
| file=T_FileStream_open(fullPath, "rb"); |
| if(file==NULL) { |
| fprintf(stderr, "gencmn: unable to open listed file %s\n", fullPath); |
| exit(U_FILE_ACCESS_ERROR); |
| } |
| |
| /* get the file length */ |
| length=T_FileStream_size(file); |
| if(T_FileStream_error(file) || length<=20) { |
| fprintf(stderr, "gencmn: unable to get length of listed file %s\n", fullPath); |
| exit(U_FILE_ACCESS_ERROR); |
| } |
| |
| T_FileStream_close(file); |
| |
| /* do not add files that are longer than maxSize */ |
| if(maxSize && length>maxSize) { |
| if (verbose) { |
| printf("%s ignored (size %ld > %ld)\n", fullPath, (long)length, (long)maxSize); |
| } |
| return; |
| } |
| files[fileCount].fileSize=length; |
| } else { |
| char *t; |
| /* get and store the basename */ |
| /* need to include the package name */ |
| length = (uint32_t)(uprv_strlen(filename) + 1 + uprv_strlen(name) + 1); |
| s=allocString(length); |
| uprv_strcpy(s, name); |
| uprv_strcat(s, U_TREE_ENTRY_SEP_STRING); |
| uprv_strcat(s, filename); |
| fixDirToTreePath(s); |
| files[fileCount].basename=s; |
| /* turn the basename into an entry point name and store in the pathname field */ |
| t=files[fileCount].pathname=allocString(length); |
| while(--length>0) { |
| if(*s=='.' || *s=='-' || *s=='/') { |
| *t='_'; |
| } else { |
| *t=*s; |
| } |
| ++s; |
| ++t; |
| } |
| *t=0; |
| } |
| ++fileCount; |
| } |
| |
| static char * |
| allocString(uint32_t length) { |
| uint32_t top=stringTop+length; |
| char *p; |
| |
| if(top>STRING_STORE_SIZE) { |
| fprintf(stderr, "gencmn: out of memory\n"); |
| exit(U_MEMORY_ALLOCATION_ERROR); |
| } |
| p=stringStore+stringTop; |
| stringTop=top; |
| return p; |
| } |
| |
| static char * |
| pathToFullPath(const char *path, const char *source) { |
| int32_t length; |
| int32_t newLength; |
| char *fullPath; |
| int32_t n; |
| |
| length = (uint32_t)(uprv_strlen(path) + 1); |
| newLength = (length + 1 + (int32_t)uprv_strlen(source)); |
| fullPath = (char *)uprv_malloc(newLength); |
| if(source != NULL) { |
| uprv_strcpy(fullPath, source); |
| uprv_strcat(fullPath, U_FILE_SEP_STRING); |
| } else { |
| fullPath[0] = 0; |
| } |
| n = (int32_t)uprv_strlen(fullPath); |
| fullPath[n] = 0; /* Suppress compiler warning for unused variable n */ |
| /* when conditional code below is not compiled. */ |
| uprv_strcat(fullPath, path); |
| |
| #if (U_FILE_ALT_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR) |
| #if (U_FILE_ALT_SEP_CHAR != U_FILE_SEP_CHAR) |
| /* replace tree separator (such as '/') with file sep char (such as ':' or '\\') */ |
| for(;fullPath[n];n++) { |
| if(fullPath[n] == U_FILE_ALT_SEP_CHAR) { |
| fullPath[n] = U_FILE_SEP_CHAR; |
| } |
| } |
| #endif |
| #endif |
| #if (U_FILE_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR) |
| /* replace tree separator (such as '/') with file sep char (such as ':' or '\\') */ |
| for(;fullPath[n];n++) { |
| if(fullPath[n] == U_TREE_ENTRY_SEP_CHAR) { |
| fullPath[n] = U_FILE_SEP_CHAR; |
| } |
| } |
| #endif |
| return fullPath; |
| } |
| |
| U_CDECL_BEGIN |
| static int |
| compareFiles(const void *file1, const void *file2) { |
| /* sort by basename */ |
| return uprv_strcmp(((File *)file1)->basename, ((File *)file2)->basename); |
| } |
| U_CDECL_END |
| |
| static void |
| fixDirToTreePath(char *s) |
| { |
| (void)s; |
| #if (U_FILE_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR) || ((U_FILE_ALT_SEP_CHAR != U_FILE_SEP_CHAR) && (U_FILE_ALT_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR)) |
| char *t; |
| #endif |
| #if (U_FILE_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR) |
| for(t=s;t=uprv_strchr(t,U_FILE_SEP_CHAR);) { |
| *t = U_TREE_ENTRY_SEP_CHAR; |
| } |
| #endif |
| #if (U_FILE_ALT_SEP_CHAR != U_FILE_SEP_CHAR) && (U_FILE_ALT_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR) |
| for(t=s;t=uprv_strchr(t,U_FILE_ALT_SEP_CHAR);) { |
| *t = U_TREE_ENTRY_SEP_CHAR; |
| } |
| #endif |
| } |