| /* |
| ****************************************************************************** |
| * |
| * Copyright (C) 1998-2014, International Business Machines |
| * Corporation and others. All Rights Reserved. |
| * |
| ****************************************************************************** |
| * |
| * File ustdio.c |
| * |
| * Modification History: |
| * |
| * Date Name Description |
| * 11/18/98 stephen Creation. |
| * 03/12/99 stephen Modified for new C API. |
| * 07/19/99 stephen Fixed read() and gets() |
| ****************************************************************************** |
| */ |
| |
| #include "unicode/ustdio.h" |
| |
| #if !UCONFIG_NO_CONVERSION |
| |
| #include "unicode/putil.h" |
| #include "cmemory.h" |
| #include "cstring.h" |
| #include "ufile.h" |
| #include "ufmt_cmn.h" |
| #include "unicode/ucnv.h" |
| #include "unicode/ustring.h" |
| |
| #include <string.h> |
| |
| #define DELIM_LF 0x000A |
| #define DELIM_VT 0x000B |
| #define DELIM_FF 0x000C |
| #define DELIM_CR 0x000D |
| #define DELIM_NEL 0x0085 |
| #define DELIM_LS 0x2028 |
| #define DELIM_PS 0x2029 |
| |
| /* TODO: is this correct for all codepages? Should we just use \n and let the converter handle it? */ |
| #if U_PLATFORM_USES_ONLY_WIN32_API |
| static const UChar DELIMITERS [] = { DELIM_CR, DELIM_LF, 0x0000 }; |
| static const uint32_t DELIMITERS_LEN = 2; |
| /* TODO: Default newline writing should be detected based upon the converter being used. */ |
| #else |
| static const UChar DELIMITERS [] = { DELIM_LF, 0x0000 }; |
| static const uint32_t DELIMITERS_LEN = 1; |
| #endif |
| |
| #define IS_FIRST_STRING_DELIMITER(c1) \ |
| (UBool)((DELIM_LF <= (c1) && (c1) <= DELIM_CR) \ |
| || (c1) == DELIM_NEL \ |
| || (c1) == DELIM_LS \ |
| || (c1) == DELIM_PS) |
| #define CAN_HAVE_COMBINED_STRING_DELIMITER(c1) (UBool)((c1) == DELIM_CR) |
| #define IS_COMBINED_STRING_DELIMITER(c1, c2) \ |
| (UBool)((c1) == DELIM_CR && (c2) == DELIM_LF) |
| |
| |
| #if !UCONFIG_NO_TRANSLITERATION |
| |
| U_CAPI UTransliterator* U_EXPORT2 |
| u_fsettransliterator(UFILE *file, UFileDirection direction, |
| UTransliterator *adopt, UErrorCode *status) |
| { |
| UTransliterator *old = NULL; |
| |
| if(U_FAILURE(*status)) |
| { |
| return adopt; |
| } |
| |
| if(!file) |
| { |
| *status = U_ILLEGAL_ARGUMENT_ERROR; |
| return adopt; |
| } |
| |
| if(direction & U_READ) |
| { |
| /** TODO: implement */ |
| *status = U_UNSUPPORTED_ERROR; |
| return adopt; |
| } |
| |
| if(adopt == NULL) /* they are clearing it */ |
| { |
| if(file->fTranslit != NULL) |
| { |
| /* TODO: Check side */ |
| old = file->fTranslit->translit; |
| uprv_free(file->fTranslit->buffer); |
| file->fTranslit->buffer=NULL; |
| uprv_free(file->fTranslit); |
| file->fTranslit=NULL; |
| } |
| } |
| else |
| { |
| if(file->fTranslit == NULL) |
| { |
| file->fTranslit = (UFILETranslitBuffer*) uprv_malloc(sizeof(UFILETranslitBuffer)); |
| if(!file->fTranslit) |
| { |
| *status = U_MEMORY_ALLOCATION_ERROR; |
| return adopt; |
| } |
| file->fTranslit->capacity = 0; |
| file->fTranslit->length = 0; |
| file->fTranslit->pos = 0; |
| file->fTranslit->buffer = NULL; |
| } |
| else |
| { |
| old = file->fTranslit->translit; |
| ufile_flush_translit(file); |
| } |
| |
| file->fTranslit->translit = adopt; |
| } |
| |
| return old; |
| } |
| |
| static const UChar * u_file_translit(UFILE *f, const UChar *src, int32_t *count, UBool flush) |
| { |
| int32_t newlen; |
| int32_t junkCount = 0; |
| int32_t textLength; |
| int32_t textLimit; |
| UTransPosition pos; |
| UErrorCode status = U_ZERO_ERROR; |
| |
| if(count == NULL) |
| { |
| count = &junkCount; |
| } |
| |
| if ((!f)||(!f->fTranslit)||(!f->fTranslit->translit)) |
| { |
| /* fast path */ |
| return src; |
| } |
| |
| /* First: slide over everything */ |
| if(f->fTranslit->length > f->fTranslit->pos) |
| { |
| memmove(f->fTranslit->buffer, f->fTranslit->buffer + f->fTranslit->pos, |
| (f->fTranslit->length - f->fTranslit->pos)*sizeof(UChar)); |
| } |
| f->fTranslit->length -= f->fTranslit->pos; /* always */ |
| f->fTranslit->pos = 0; |
| |
| /* Calculate new buffer size needed */ |
| newlen = (*count + f->fTranslit->length) * 4; |
| |
| if(newlen > f->fTranslit->capacity) |
| { |
| if(f->fTranslit->buffer == NULL) |
| { |
| f->fTranslit->buffer = (UChar*)uprv_malloc(newlen * sizeof(UChar)); |
| } |
| else |
| { |
| f->fTranslit->buffer = (UChar*)uprv_realloc(f->fTranslit->buffer, newlen * sizeof(UChar)); |
| } |
| /* Check for malloc/realloc failure. */ |
| if (f->fTranslit->buffer == NULL) { |
| return NULL; |
| } |
| f->fTranslit->capacity = newlen; |
| } |
| |
| /* Now, copy any data over */ |
| u_strncpy(f->fTranslit->buffer + f->fTranslit->length, |
| src, |
| *count); |
| f->fTranslit->length += *count; |
| |
| /* Now, translit in place as much as we can */ |
| if(flush == FALSE) |
| { |
| textLength = f->fTranslit->length; |
| pos.contextStart = 0; |
| pos.contextLimit = textLength; |
| pos.start = 0; |
| pos.limit = textLength; |
| |
| utrans_transIncrementalUChars(f->fTranslit->translit, |
| f->fTranslit->buffer, /* because we shifted */ |
| &textLength, |
| f->fTranslit->capacity, |
| &pos, |
| &status); |
| |
| /* now: start/limit point to the transliterated text */ |
| /* Transliterated is [buffer..pos.start) */ |
| *count = pos.start; |
| f->fTranslit->pos = pos.start; |
| f->fTranslit->length = pos.limit; |
| |
| return f->fTranslit->buffer; |
| } |
| else |
| { |
| textLength = f->fTranslit->length; |
| textLimit = f->fTranslit->length; |
| |
| utrans_transUChars(f->fTranslit->translit, |
| f->fTranslit->buffer, |
| &textLength, |
| f->fTranslit->capacity, |
| 0, |
| &textLimit, |
| &status); |
| |
| /* out: converted len */ |
| *count = textLimit; |
| |
| /* Set pointers to 0 */ |
| f->fTranslit->pos = 0; |
| f->fTranslit->length = 0; |
| |
| return f->fTranslit->buffer; |
| } |
| } |
| |
| #endif |
| |
| void |
| ufile_flush_translit(UFILE *f) |
| { |
| #if !UCONFIG_NO_TRANSLITERATION |
| if((!f)||(!f->fTranslit)) |
| return; |
| #endif |
| |
| u_file_write_flush(NULL, 0, f, FALSE, TRUE); |
| } |
| |
| |
| void |
| ufile_flush_io(UFILE *f) |
| { |
| if((!f) || (!f->fFile)) { |
| return; /* skip if no file */ |
| } |
| |
| u_file_write_flush(NULL, 0, f, TRUE, FALSE); |
| } |
| |
| |
| void |
| ufile_close_translit(UFILE *f) |
| { |
| #if !UCONFIG_NO_TRANSLITERATION |
| if((!f)||(!f->fTranslit)) |
| return; |
| #endif |
| |
| ufile_flush_translit(f); |
| |
| #if !UCONFIG_NO_TRANSLITERATION |
| if(f->fTranslit->translit) |
| utrans_close(f->fTranslit->translit); |
| |
| if(f->fTranslit->buffer) |
| { |
| uprv_free(f->fTranslit->buffer); |
| } |
| |
| uprv_free(f->fTranslit); |
| f->fTranslit = NULL; |
| #endif |
| } |
| |
| |
| /* Input/output */ |
| |
| U_CAPI int32_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ |
| u_fputs(const UChar *s, |
| UFILE *f) |
| { |
| int32_t count = u_file_write(s, u_strlen(s), f); |
| count += u_file_write(DELIMITERS, DELIMITERS_LEN, f); |
| return count; |
| } |
| |
| U_CAPI UChar32 U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ |
| u_fputc(UChar32 uc, |
| UFILE *f) |
| { |
| UChar buf[2]; |
| int32_t idx = 0; |
| UBool isError = FALSE; |
| |
| U16_APPEND(buf, idx, sizeof(buf)/sizeof(*buf), uc, isError); |
| if (isError) { |
| return U_EOF; |
| } |
| return u_file_write(buf, idx, f) == idx ? uc : U_EOF; |
| } |
| |
| |
| U_CFUNC int32_t U_EXPORT2 |
| u_file_write_flush(const UChar *chars, |
| int32_t count, |
| UFILE *f, |
| UBool flushIO, |
| UBool flushTranslit) |
| { |
| /* Set up conversion parameters */ |
| UErrorCode status = U_ZERO_ERROR; |
| const UChar *mySource = chars; |
| const UChar *mySourceBegin; |
| const UChar *mySourceEnd; |
| char charBuffer[UFILE_CHARBUFFER_SIZE]; |
| char *myTarget = charBuffer; |
| int32_t written = 0; |
| int32_t numConverted = 0; |
| |
| if (count < 0) { |
| count = u_strlen(chars); |
| } |
| |
| #if !UCONFIG_NO_TRANSLITERATION |
| if((f->fTranslit) && (f->fTranslit->translit)) |
| { |
| /* Do the transliteration */ |
| mySource = u_file_translit(f, chars, &count, flushTranslit); |
| } |
| #endif |
| |
| /* Write to a string. */ |
| if (!f->fFile) { |
| int32_t charsLeft = (int32_t)(f->str.fLimit - f->str.fPos); |
| if (flushIO && charsLeft > count) { |
| count++; |
| } |
| written = ufmt_min(count, charsLeft); |
| u_strncpy(f->str.fPos, mySource, written); |
| f->str.fPos += written; |
| return written; |
| } |
| |
| mySourceEnd = mySource + count; |
| |
| /* Perform the conversion in a loop */ |
| do { |
| mySourceBegin = mySource; /* beginning location for this loop */ |
| status = U_ZERO_ERROR; |
| if(f->fConverter != NULL) { /* We have a valid converter */ |
| ucnv_fromUnicode(f->fConverter, |
| &myTarget, |
| charBuffer + UFILE_CHARBUFFER_SIZE, |
| &mySource, |
| mySourceEnd, |
| NULL, |
| flushIO, |
| &status); |
| } else { /*weiv: do the invariant conversion */ |
| int32_t convertChars = (int32_t) (mySourceEnd - mySource); |
| if (convertChars > UFILE_CHARBUFFER_SIZE) { |
| convertChars = UFILE_CHARBUFFER_SIZE; |
| status = U_BUFFER_OVERFLOW_ERROR; |
| } |
| u_UCharsToChars(mySource, myTarget, convertChars); |
| mySource += convertChars; |
| myTarget += convertChars; |
| } |
| numConverted = (int32_t)(myTarget - charBuffer); |
| |
| if (numConverted > 0) { |
| /* write the converted bytes */ |
| fwrite(charBuffer, |
| sizeof(char), |
| numConverted, |
| f->fFile); |
| |
| written += (int32_t) (mySource - mySourceBegin); |
| } |
| myTarget = charBuffer; |
| } |
| while(status == U_BUFFER_OVERFLOW_ERROR); |
| |
| /* return # of chars written */ |
| return written; |
| } |
| |
| U_CAPI int32_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ |
| u_file_write( const UChar *chars, |
| int32_t count, |
| UFILE *f) |
| { |
| return u_file_write_flush(chars,count,f,FALSE,FALSE); |
| } |
| |
| |
| /* private function used for buffering input */ |
| void |
| ufile_fill_uchar_buffer(UFILE *f) |
| { |
| UErrorCode status; |
| const char *mySource; |
| const char *mySourceEnd; |
| UChar *myTarget; |
| int32_t bufferSize; |
| int32_t maxCPBytes; |
| int32_t bytesRead; |
| int32_t availLength; |
| int32_t dataSize; |
| char charBuffer[UFILE_CHARBUFFER_SIZE]; |
| u_localized_string *str; |
| |
| if (f->fFile == NULL) { |
| /* There is nothing to do. It's a string. */ |
| return; |
| } |
| |
| str = &f->str; |
| dataSize = (int32_t)(str->fLimit - str->fPos); |
| if (f->fFileno == 0 && dataSize > 0) { |
| /* Don't read from stdin too many times. There is still some data. */ |
| return; |
| } |
| |
| /* shift the buffer if it isn't empty */ |
| if(dataSize != 0) { |
| uprv_memmove(f->fUCBuffer, str->fPos, dataSize * sizeof(UChar)); /* not accessing beyond memory */ |
| } |
| |
| |
| /* record how much buffer space is available */ |
| availLength = UFILE_UCHARBUFFER_SIZE - dataSize; |
| |
| /* Determine the # of codepage bytes needed to fill our UChar buffer */ |
| /* weiv: if converter is NULL, we use invariant converter with charwidth = 1)*/ |
| maxCPBytes = availLength / (f->fConverter!=NULL?(2*ucnv_getMinCharSize(f->fConverter)):1); |
| |
| /* Read in the data to convert */ |
| if (f->fFileno == 0) { |
| /* Special case. Read from stdin one line at a time. */ |
| char *retStr = fgets(charBuffer, ufmt_min(maxCPBytes, UFILE_CHARBUFFER_SIZE), f->fFile); |
| bytesRead = (int32_t)(retStr ? uprv_strlen(charBuffer) : 0); |
| } |
| else { |
| /* A normal file */ |
| bytesRead = (int32_t)fread(charBuffer, |
| sizeof(char), |
| ufmt_min(maxCPBytes, UFILE_CHARBUFFER_SIZE), |
| f->fFile); |
| } |
| |
| /* Set up conversion parameters */ |
| status = U_ZERO_ERROR; |
| mySource = charBuffer; |
| mySourceEnd = charBuffer + bytesRead; |
| myTarget = f->fUCBuffer + dataSize; |
| bufferSize = UFILE_UCHARBUFFER_SIZE; |
| |
| if(f->fConverter != NULL) { /* We have a valid converter */ |
| /* Perform the conversion */ |
| ucnv_toUnicode(f->fConverter, |
| &myTarget, |
| f->fUCBuffer + bufferSize, |
| &mySource, |
| mySourceEnd, |
| NULL, |
| (UBool)(feof(f->fFile) != 0), |
| &status); |
| |
| } else { /*weiv: do the invariant conversion */ |
| u_charsToUChars(mySource, myTarget, bytesRead); |
| myTarget += bytesRead; |
| } |
| |
| /* update the pointers into our array */ |
| str->fPos = str->fBuffer; |
| str->fLimit = myTarget; |
| } |
| |
| U_CAPI UChar* U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ |
| u_fgets(UChar *s, |
| int32_t n, |
| UFILE *f) |
| { |
| int32_t dataSize; |
| int32_t count; |
| UChar *alias; |
| const UChar *limit; |
| UChar *sItr; |
| UChar currDelim = 0; |
| u_localized_string *str; |
| |
| if (n <= 0) { |
| /* Caller screwed up. We need to write the null terminatior. */ |
| return NULL; |
| } |
| |
| /* fill the buffer if needed */ |
| str = &f->str; |
| if (str->fPos >= str->fLimit) { |
| ufile_fill_uchar_buffer(f); |
| } |
| |
| /* subtract 1 from n to compensate for the terminator */ |
| --n; |
| |
| /* determine the amount of data in the buffer */ |
| dataSize = (int32_t)(str->fLimit - str->fPos); |
| |
| /* if 0 characters were left, return 0 */ |
| if (dataSize == 0) |
| return NULL; |
| |
| /* otherwise, iteratively fill the buffer and copy */ |
| count = 0; |
| sItr = s; |
| currDelim = 0; |
| while (dataSize > 0 && count < n) { |
| alias = str->fPos; |
| |
| /* Find how much to copy */ |
| if (dataSize < (n - count)) { |
| limit = str->fLimit; |
| } |
| else { |
| limit = alias + (n - count); |
| } |
| |
| if (!currDelim) { |
| /* Copy UChars until we find the first occurrence of a delimiter character */ |
| while (alias < limit && !IS_FIRST_STRING_DELIMITER(*alias)) { |
| count++; |
| *(sItr++) = *(alias++); |
| } |
| /* Preserve the newline */ |
| if (alias < limit && IS_FIRST_STRING_DELIMITER(*alias)) { |
| if (CAN_HAVE_COMBINED_STRING_DELIMITER(*alias)) { |
| currDelim = *alias; |
| } |
| else { |
| currDelim = 1; /* This isn't a newline, but it's used to say |
| that we should break later. We've checked all |
| possible newline combinations even across buffer |
| boundaries. */ |
| } |
| count++; |
| *(sItr++) = *(alias++); |
| } |
| } |
| /* If we have a CRLF combination, preserve that too. */ |
| if (alias < limit) { |
| if (currDelim && IS_COMBINED_STRING_DELIMITER(currDelim, *alias)) { |
| count++; |
| *(sItr++) = *(alias++); |
| } |
| currDelim = 1; /* This isn't a newline, but it's used to say |
| that we should break later. We've checked all |
| possible newline combinations even across buffer |
| boundaries. */ |
| } |
| |
| /* update the current buffer position */ |
| str->fPos = alias; |
| |
| /* if we found a delimiter */ |
| if (currDelim == 1) { |
| /* break out */ |
| break; |
| } |
| |
| /* refill the buffer */ |
| ufile_fill_uchar_buffer(f); |
| |
| /* determine the amount of data in the buffer */ |
| dataSize = (int32_t)(str->fLimit - str->fPos); |
| } |
| |
| /* add the terminator and return s */ |
| *sItr = 0x0000; |
| return s; |
| } |
| |
| U_CFUNC UBool U_EXPORT2 |
| ufile_getch(UFILE *f, UChar *ch) |
| { |
| UBool isValidChar = FALSE; |
| |
| *ch = U_EOF; |
| /* if we have an available character in the buffer, return it */ |
| if(f->str.fPos < f->str.fLimit){ |
| *ch = *(f->str.fPos)++; |
| isValidChar = TRUE; |
| } |
| else { |
| /* otherwise, fill the buffer and return the next character */ |
| if(f->str.fPos >= f->str.fLimit) { |
| ufile_fill_uchar_buffer(f); |
| } |
| if(f->str.fPos < f->str.fLimit) { |
| *ch = *(f->str.fPos)++; |
| isValidChar = TRUE; |
| } |
| } |
| return isValidChar; |
| } |
| |
| U_CAPI UChar U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ |
| u_fgetc(UFILE *f) |
| { |
| UChar ch; |
| ufile_getch(f, &ch); |
| return ch; |
| } |
| |
| U_CFUNC UBool U_EXPORT2 |
| ufile_getch32(UFILE *f, UChar32 *c32) |
| { |
| UBool isValidChar = FALSE; |
| u_localized_string *str; |
| |
| *c32 = U_EOF; |
| |
| /* Fill the buffer if it is empty */ |
| str = &f->str; |
| if (f && str->fPos + 1 >= str->fLimit) { |
| ufile_fill_uchar_buffer(f); |
| } |
| |
| /* Get the next character in the buffer */ |
| if (str->fPos < str->fLimit) { |
| *c32 = *(str->fPos)++; |
| if (U_IS_LEAD(*c32)) { |
| if (str->fPos < str->fLimit) { |
| UChar c16 = *(str->fPos)++; |
| *c32 = U16_GET_SUPPLEMENTARY(*c32, c16); |
| isValidChar = TRUE; |
| } |
| else { |
| *c32 = U_EOF; |
| } |
| } |
| else { |
| isValidChar = TRUE; |
| } |
| } |
| |
| return isValidChar; |
| } |
| |
| U_CAPI UChar32 U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ |
| u_fgetcx(UFILE *f) |
| { |
| UChar32 ch; |
| ufile_getch32(f, &ch); |
| return ch; |
| } |
| |
| U_CAPI UChar32 U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ |
| u_fungetc(UChar32 ch, |
| UFILE *f) |
| { |
| u_localized_string *str; |
| |
| str = &f->str; |
| |
| /* if we're at the beginning of the buffer, sorry! */ |
| if (str->fPos == str->fBuffer |
| || (U_IS_LEAD(ch) && (str->fPos - 1) == str->fBuffer)) |
| { |
| ch = U_EOF; |
| } |
| else { |
| /* otherwise, put the character back */ |
| /* Remember, read them back on in the reverse order. */ |
| if (U_IS_LEAD(ch)) { |
| if (*--(str->fPos) != U16_TRAIL(ch) |
| || *--(str->fPos) != U16_LEAD(ch)) |
| { |
| ch = U_EOF; |
| } |
| } |
| else if (*--(str->fPos) != ch) { |
| ch = U_EOF; |
| } |
| } |
| return ch; |
| } |
| |
| U_CAPI int32_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ |
| u_file_read( UChar *chars, |
| int32_t count, |
| UFILE *f) |
| { |
| int32_t dataSize; |
| int32_t read = 0; |
| u_localized_string *str = &f->str; |
| |
| do { |
| |
| /* determine the amount of data in the buffer */ |
| dataSize = (int32_t)(str->fLimit - str->fPos); |
| if (dataSize <= 0) { |
| /* fill the buffer */ |
| ufile_fill_uchar_buffer(f); |
| dataSize = (int32_t)(str->fLimit - str->fPos); |
| } |
| |
| /* Make sure that we don't read too much */ |
| if (dataSize > (count - read)) { |
| dataSize = count - read; |
| } |
| |
| /* copy the current data in the buffer */ |
| memcpy(chars + read, str->fPos, dataSize * sizeof(UChar)); |
| |
| /* update number of items read */ |
| read += dataSize; |
| |
| /* update the current buffer position */ |
| str->fPos += dataSize; |
| } |
| while (dataSize != 0 && read < count); |
| |
| return read; |
| } |
| #endif |