| /* |
| ******************************************************************************* |
| * |
| * Copyright (C) 1998-2014, International Business Machines |
| * Corporation and others. All Rights Reserved. |
| * |
| ******************************************************************************* |
| * |
| * File uscnnf_p.c |
| * |
| * Modification History: |
| * |
| * Date Name Description |
| * 12/02/98 stephen Creation. |
| * 03/13/99 stephen Modified for new C API. |
| ******************************************************************************* |
| */ |
| |
| #include "unicode/utypes.h" |
| |
| #if !UCONFIG_NO_FORMATTING && !UCONFIG_NO_CONVERSION |
| |
| #include "unicode/uchar.h" |
| #include "unicode/ustring.h" |
| #include "unicode/unum.h" |
| #include "unicode/udat.h" |
| #include "unicode/uset.h" |
| #include "uscanf.h" |
| #include "ufmt_cmn.h" |
| #include "ufile.h" |
| #include "locbund.h" |
| |
| #include "cmemory.h" |
| #include "ustr_cnv.h" |
| |
| /* flag characters for u_scanf */ |
| #define FLAG_ASTERISK 0x002A |
| #define FLAG_PAREN 0x0028 |
| |
| #define ISFLAG(s) (s) == FLAG_ASTERISK || \ |
| (s) == FLAG_PAREN |
| |
| /* special characters for u_scanf */ |
| #define SPEC_DOLLARSIGN 0x0024 |
| |
| /* unicode digits */ |
| #define DIGIT_ZERO 0x0030 |
| #define DIGIT_ONE 0x0031 |
| #define DIGIT_TWO 0x0032 |
| #define DIGIT_THREE 0x0033 |
| #define DIGIT_FOUR 0x0034 |
| #define DIGIT_FIVE 0x0035 |
| #define DIGIT_SIX 0x0036 |
| #define DIGIT_SEVEN 0x0037 |
| #define DIGIT_EIGHT 0x0038 |
| #define DIGIT_NINE 0x0039 |
| |
| #define ISDIGIT(s) (s) == DIGIT_ZERO || \ |
| (s) == DIGIT_ONE || \ |
| (s) == DIGIT_TWO || \ |
| (s) == DIGIT_THREE || \ |
| (s) == DIGIT_FOUR || \ |
| (s) == DIGIT_FIVE || \ |
| (s) == DIGIT_SIX || \ |
| (s) == DIGIT_SEVEN || \ |
| (s) == DIGIT_EIGHT || \ |
| (s) == DIGIT_NINE |
| |
| /* u_scanf modifiers */ |
| #define MOD_H 0x0068 |
| #define MOD_LOWERL 0x006C |
| #define MOD_L 0x004C |
| |
| #define ISMOD(s) (s) == MOD_H || \ |
| (s) == MOD_LOWERL || \ |
| (s) == MOD_L |
| |
| /** |
| * Struct encapsulating a single uscanf format specification. |
| */ |
| typedef struct u_scanf_spec_info { |
| int32_t fWidth; /* Width */ |
| |
| UChar fSpec; /* Format specification */ |
| |
| UChar fPadChar; /* Padding character */ |
| |
| UBool fSkipArg; /* TRUE if arg should be skipped */ |
| UBool fIsLongDouble; /* L flag */ |
| UBool fIsShort; /* h flag */ |
| UBool fIsLong; /* l flag */ |
| UBool fIsLongLong; /* ll flag */ |
| UBool fIsString; /* TRUE if this is a NULL-terminated string. */ |
| } u_scanf_spec_info; |
| |
| |
| /** |
| * Struct encapsulating a single u_scanf format specification. |
| */ |
| typedef struct u_scanf_spec { |
| u_scanf_spec_info fInfo; /* Information on this spec */ |
| int32_t fArgPos; /* Position of data in arg list */ |
| } u_scanf_spec; |
| |
| /** |
| * Parse a single u_scanf format specifier in Unicode. |
| * @param fmt A pointer to a '%' character in a u_scanf format specification. |
| * @param spec A pointer to a <TT>u_scanf_spec</TT> to receive the parsed |
| * format specifier. |
| * @return The number of characters contained in this specifier. |
| */ |
| static int32_t |
| u_scanf_parse_spec (const UChar *fmt, |
| u_scanf_spec *spec) |
| { |
| const UChar *s = fmt; |
| const UChar *backup; |
| u_scanf_spec_info *info = &(spec->fInfo); |
| |
| /* initialize spec to default values */ |
| spec->fArgPos = -1; |
| |
| info->fWidth = -1; |
| info->fSpec = 0x0000; |
| info->fPadChar = 0x0020; |
| info->fSkipArg = FALSE; |
| info->fIsLongDouble = FALSE; |
| info->fIsShort = FALSE; |
| info->fIsLong = FALSE; |
| info->fIsLongLong = FALSE; |
| info->fIsString = TRUE; |
| |
| |
| /* skip over the initial '%' */ |
| s++; |
| |
| /* Check for positional argument */ |
| if(ISDIGIT(*s)) { |
| |
| /* Save the current position */ |
| backup = s; |
| |
| /* handle positional parameters */ |
| if(ISDIGIT(*s)) { |
| spec->fArgPos = (int) (*s++ - DIGIT_ZERO); |
| |
| while(ISDIGIT(*s)) { |
| spec->fArgPos *= 10; |
| spec->fArgPos += (int) (*s++ - DIGIT_ZERO); |
| } |
| } |
| |
| /* if there is no '$', don't read anything */ |
| if(*s != SPEC_DOLLARSIGN) { |
| spec->fArgPos = -1; |
| s = backup; |
| } |
| /* munge the '$' */ |
| else |
| s++; |
| } |
| |
| /* Get any format flags */ |
| while(ISFLAG(*s)) { |
| switch(*s++) { |
| |
| /* skip argument */ |
| case FLAG_ASTERISK: |
| info->fSkipArg = TRUE; |
| break; |
| |
| /* pad character specified */ |
| case FLAG_PAREN: |
| |
| /* first four characters are hex values for pad char */ |
| info->fPadChar = (UChar)ufmt_digitvalue(*s++); |
| info->fPadChar = (UChar)((info->fPadChar * 16) + ufmt_digitvalue(*s++)); |
| info->fPadChar = (UChar)((info->fPadChar * 16) + ufmt_digitvalue(*s++)); |
| info->fPadChar = (UChar)((info->fPadChar * 16) + ufmt_digitvalue(*s++)); |
| |
| /* final character is ignored */ |
| s++; |
| |
| break; |
| } |
| } |
| |
| /* Get the width */ |
| if(ISDIGIT(*s)){ |
| info->fWidth = (int) (*s++ - DIGIT_ZERO); |
| |
| while(ISDIGIT(*s)) { |
| info->fWidth *= 10; |
| info->fWidth += (int) (*s++ - DIGIT_ZERO); |
| } |
| } |
| |
| /* Get any modifiers */ |
| if(ISMOD(*s)) { |
| switch(*s++) { |
| |
| /* short */ |
| case MOD_H: |
| info->fIsShort = TRUE; |
| break; |
| |
| /* long or long long */ |
| case MOD_LOWERL: |
| if(*s == MOD_LOWERL) { |
| info->fIsLongLong = TRUE; |
| /* skip over the next 'l' */ |
| s++; |
| } |
| else |
| info->fIsLong = TRUE; |
| break; |
| |
| /* long double */ |
| case MOD_L: |
| info->fIsLongDouble = TRUE; |
| break; |
| } |
| } |
| |
| /* finally, get the specifier letter */ |
| info->fSpec = *s++; |
| |
| /* return # of characters in this specifier */ |
| return (int32_t)(s - fmt); |
| } |
| |
| #define UP_PERCENT 0x0025 |
| |
| |
| /* ANSI style formatting */ |
| /* Use US-ASCII characters only for formatting */ |
| |
| /* % */ |
| #define UFMT_SIMPLE_PERCENT {ufmt_simple_percent, u_scanf_simple_percent_handler} |
| /* s */ |
| #define UFMT_STRING {ufmt_string, u_scanf_string_handler} |
| /* c */ |
| #define UFMT_CHAR {ufmt_string, u_scanf_char_handler} |
| /* d, i */ |
| #define UFMT_INT {ufmt_int, u_scanf_integer_handler} |
| /* u */ |
| #define UFMT_UINT {ufmt_int, u_scanf_uinteger_handler} |
| /* o */ |
| #define UFMT_OCTAL {ufmt_int, u_scanf_octal_handler} |
| /* x, X */ |
| #define UFMT_HEX {ufmt_int, u_scanf_hex_handler} |
| /* f */ |
| #define UFMT_DOUBLE {ufmt_double, u_scanf_double_handler} |
| /* e, E */ |
| #define UFMT_SCIENTIFIC {ufmt_double, u_scanf_scientific_handler} |
| /* g, G */ |
| #define UFMT_SCIDBL {ufmt_double, u_scanf_scidbl_handler} |
| /* n */ |
| #define UFMT_COUNT {ufmt_count, u_scanf_count_handler} |
| /* [ */ |
| #define UFMT_SCANSET {ufmt_string, u_scanf_scanset_handler} |
| |
| /* non-ANSI extensions */ |
| /* Use US-ASCII characters only for formatting */ |
| |
| /* p */ |
| #define UFMT_POINTER {ufmt_pointer, u_scanf_pointer_handler} |
| /* V */ |
| #define UFMT_SPELLOUT {ufmt_double, u_scanf_spellout_handler} |
| /* P */ |
| #define UFMT_PERCENT {ufmt_double, u_scanf_percent_handler} |
| /* C K is old format */ |
| #define UFMT_UCHAR {ufmt_uchar, u_scanf_uchar_handler} |
| /* S U is old format */ |
| #define UFMT_USTRING {ufmt_ustring, u_scanf_ustring_handler} |
| |
| |
| #define UFMT_EMPTY {ufmt_empty, NULL} |
| |
| /** |
| * A u_scanf handler function. |
| * A u_scanf handler is responsible for handling a single u_scanf |
| * format specification, for example 'd' or 's'. |
| * @param stream The UFILE to which to write output. |
| * @param info A pointer to a <TT>u_scanf_spec_info</TT> struct containing |
| * information on the format specification. |
| * @param args A pointer to the argument data |
| * @param fmt A pointer to the first character in the format string |
| * following the spec. |
| * @param fmtConsumed On output, set to the number of characters consumed |
| * in <TT>fmt</TT>. Do nothing, if the argument isn't variable width. |
| * @param argConverted The number of arguments converted and assigned, or -1 if an |
| * error occurred. |
| * @return The number of code points consumed during reading. |
| */ |
| typedef int32_t (*u_scanf_handler) (UFILE *stream, |
| u_scanf_spec_info *info, |
| ufmt_args *args, |
| const UChar *fmt, |
| int32_t *fmtConsumed, |
| int32_t *argConverted); |
| |
| typedef struct u_scanf_info { |
| ufmt_type_info info; |
| u_scanf_handler handler; |
| } u_scanf_info; |
| |
| #define USCANF_NUM_FMT_HANDLERS 108 |
| #define USCANF_SYMBOL_BUFFER_SIZE 8 |
| |
| /* We do not use handlers for 0-0x1f */ |
| #define USCANF_BASE_FMT_HANDLERS 0x20 |
| |
| |
| static int32_t |
| u_scanf_skip_leading_ws(UFILE *input, |
| UChar pad) |
| { |
| UChar c; |
| int32_t count = 0; |
| UBool isNotEOF; |
| |
| /* skip all leading ws in the input */ |
| while( (isNotEOF = ufile_getch(input, &c)) && (c == pad || u_isWhitespace(c)) ) |
| { |
| count++; |
| } |
| |
| /* put the final character back on the input */ |
| if(isNotEOF) |
| u_fungetc(c, input); |
| |
| return count; |
| } |
| |
| /* TODO: Is always skipping the prefix symbol as a positive sign a good idea in all locales? */ |
| static int32_t |
| u_scanf_skip_leading_positive_sign(UFILE *input, |
| UNumberFormat *format, |
| UErrorCode *status) |
| { |
| UChar c; |
| int32_t count = 0; |
| UBool isNotEOF; |
| UChar plusSymbol[USCANF_SYMBOL_BUFFER_SIZE]; |
| int32_t symbolLen; |
| UErrorCode localStatus = U_ZERO_ERROR; |
| |
| if (U_SUCCESS(*status)) { |
| symbolLen = unum_getSymbol(format, |
| UNUM_PLUS_SIGN_SYMBOL, |
| plusSymbol, |
| sizeof(plusSymbol)/sizeof(*plusSymbol), |
| &localStatus); |
| |
| if (U_SUCCESS(localStatus)) { |
| /* skip all leading ws in the input */ |
| while( (isNotEOF = ufile_getch(input, &c)) && (count < symbolLen && c == plusSymbol[count]) ) |
| { |
| count++; |
| } |
| |
| /* put the final character back on the input */ |
| if(isNotEOF) { |
| u_fungetc(c, input); |
| } |
| } |
| } |
| |
| return count; |
| } |
| |
| static int32_t |
| u_scanf_simple_percent_handler(UFILE *input, |
| u_scanf_spec_info *info, |
| ufmt_args *args, |
| const UChar *fmt, |
| int32_t *fmtConsumed, |
| int32_t *argConverted) |
| { |
| /* make sure the next character in the input is a percent */ |
| *argConverted = 0; |
| if(u_fgetc(input) != 0x0025) { |
| *argConverted = -1; |
| } |
| return 1; |
| } |
| |
| static int32_t |
| u_scanf_count_handler(UFILE *input, |
| u_scanf_spec_info *info, |
| ufmt_args *args, |
| const UChar *fmt, |
| int32_t *fmtConsumed, |
| int32_t *argConverted) |
| { |
| /* in the special case of count, the u_scanf_spec_info's width */ |
| /* will contain the # of items converted thus far */ |
| if (!info->fSkipArg) { |
| if (info->fIsShort) |
| *(int16_t*)(args[0].ptrValue) = (int16_t)(UINT16_MAX & info->fWidth); |
| else if (info->fIsLongLong) |
| *(int64_t*)(args[0].ptrValue) = info->fWidth; |
| else |
| *(int32_t*)(args[0].ptrValue) = (int32_t)(UINT32_MAX & info->fWidth); |
| } |
| *argConverted = 0; |
| |
| /* we converted 0 args */ |
| return 0; |
| } |
| |
| static int32_t |
| u_scanf_double_handler(UFILE *input, |
| u_scanf_spec_info *info, |
| ufmt_args *args, |
| const UChar *fmt, |
| int32_t *fmtConsumed, |
| int32_t *argConverted) |
| { |
| int32_t len; |
| double num; |
| UNumberFormat *format; |
| int32_t parsePos = 0; |
| int32_t skipped; |
| UErrorCode status = U_ZERO_ERROR; |
| |
| |
| /* skip all ws in the input */ |
| skipped = u_scanf_skip_leading_ws(input, info->fPadChar); |
| |
| /* fill the input's internal buffer */ |
| ufile_fill_uchar_buffer(input); |
| |
| /* determine the size of the input's buffer */ |
| len = (int32_t)(input->str.fLimit - input->str.fPos); |
| |
| /* truncate to the width, if specified */ |
| if(info->fWidth != -1) |
| len = ufmt_min(len, info->fWidth); |
| |
| /* get the formatter */ |
| format = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_DECIMAL); |
| |
| /* handle error */ |
| if(format == 0) |
| return 0; |
| |
| /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */ |
| skipped += u_scanf_skip_leading_positive_sign(input, format, &status); |
| |
| /* parse the number */ |
| num = unum_parseDouble(format, input->str.fPos, len, &parsePos, &status); |
| |
| if (!info->fSkipArg) { |
| if (info->fIsLong) |
| *(double*)(args[0].ptrValue) = num; |
| else if (info->fIsLongDouble) |
| *(long double*)(args[0].ptrValue) = num; |
| else |
| *(float*)(args[0].ptrValue) = (float)num; |
| } |
| |
| /* mask off any necessary bits */ |
| /* if(! info->fIsLong_double) |
| num &= DBL_MAX;*/ |
| |
| /* update the input's position to reflect consumed data */ |
| input->str.fPos += parsePos; |
| |
| /* we converted 1 arg */ |
| *argConverted = !info->fSkipArg; |
| return parsePos + skipped; |
| } |
| |
| #define UPRINTF_SYMBOL_BUFFER_SIZE 8 |
| |
| static int32_t |
| u_scanf_scientific_handler(UFILE *input, |
| u_scanf_spec_info *info, |
| ufmt_args *args, |
| const UChar *fmt, |
| int32_t *fmtConsumed, |
| int32_t *argConverted) |
| { |
| int32_t len; |
| double num; |
| UNumberFormat *format; |
| int32_t parsePos = 0; |
| int32_t skipped; |
| UErrorCode status = U_ZERO_ERROR; |
| UChar srcExpBuf[UPRINTF_SYMBOL_BUFFER_SIZE]; |
| int32_t srcLen, expLen; |
| UChar expBuf[UPRINTF_SYMBOL_BUFFER_SIZE]; |
| |
| |
| /* skip all ws in the input */ |
| skipped = u_scanf_skip_leading_ws(input, info->fPadChar); |
| |
| /* fill the input's internal buffer */ |
| ufile_fill_uchar_buffer(input); |
| |
| /* determine the size of the input's buffer */ |
| len = (int32_t)(input->str.fLimit - input->str.fPos); |
| |
| /* truncate to the width, if specified */ |
| if(info->fWidth != -1) |
| len = ufmt_min(len, info->fWidth); |
| |
| /* get the formatter */ |
| format = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_SCIENTIFIC); |
| |
| /* handle error */ |
| if(format == 0) |
| return 0; |
| |
| /* set the appropriate flags on the formatter */ |
| |
| srcLen = unum_getSymbol(format, |
| UNUM_EXPONENTIAL_SYMBOL, |
| srcExpBuf, |
| sizeof(srcExpBuf), |
| &status); |
| |
| /* Upper/lower case the e */ |
| if (info->fSpec == (UChar)0x65 /* e */) { |
| expLen = u_strToLower(expBuf, (int32_t)sizeof(expBuf), |
| srcExpBuf, srcLen, |
| input->str.fBundle.fLocale, |
| &status); |
| } |
| else { |
| expLen = u_strToUpper(expBuf, (int32_t)sizeof(expBuf), |
| srcExpBuf, srcLen, |
| input->str.fBundle.fLocale, |
| &status); |
| } |
| |
| unum_setSymbol(format, |
| UNUM_EXPONENTIAL_SYMBOL, |
| expBuf, |
| expLen, |
| &status); |
| |
| |
| |
| |
| /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */ |
| skipped += u_scanf_skip_leading_positive_sign(input, format, &status); |
| |
| /* parse the number */ |
| num = unum_parseDouble(format, input->str.fPos, len, &parsePos, &status); |
| |
| if (!info->fSkipArg) { |
| if (info->fIsLong) |
| *(double*)(args[0].ptrValue) = num; |
| else if (info->fIsLongDouble) |
| *(long double*)(args[0].ptrValue) = num; |
| else |
| *(float*)(args[0].ptrValue) = (float)num; |
| } |
| |
| /* mask off any necessary bits */ |
| /* if(! info->fIsLong_double) |
| num &= DBL_MAX;*/ |
| |
| /* update the input's position to reflect consumed data */ |
| input->str.fPos += parsePos; |
| |
| /* we converted 1 arg */ |
| *argConverted = !info->fSkipArg; |
| return parsePos + skipped; |
| } |
| |
| static int32_t |
| u_scanf_scidbl_handler(UFILE *input, |
| u_scanf_spec_info *info, |
| ufmt_args *args, |
| const UChar *fmt, |
| int32_t *fmtConsumed, |
| int32_t *argConverted) |
| { |
| int32_t len; |
| double num; |
| UNumberFormat *scientificFormat, *genericFormat; |
| /*int32_t scientificResult, genericResult;*/ |
| double scientificResult, genericResult; |
| int32_t scientificParsePos = 0, genericParsePos = 0, parsePos = 0; |
| int32_t skipped; |
| UErrorCode scientificStatus = U_ZERO_ERROR; |
| UErrorCode genericStatus = U_ZERO_ERROR; |
| |
| |
| /* since we can't determine by scanning the characters whether */ |
| /* a number was formatted in the 'f' or 'g' styles, parse the */ |
| /* string with both formatters, and assume whichever one */ |
| /* parsed the most is the correct formatter to use */ |
| |
| |
| /* skip all ws in the input */ |
| skipped = u_scanf_skip_leading_ws(input, info->fPadChar); |
| |
| /* fill the input's internal buffer */ |
| ufile_fill_uchar_buffer(input); |
| |
| /* determine the size of the input's buffer */ |
| len = (int32_t)(input->str.fLimit - input->str.fPos); |
| |
| /* truncate to the width, if specified */ |
| if(info->fWidth != -1) |
| len = ufmt_min(len, info->fWidth); |
| |
| /* get the formatters */ |
| scientificFormat = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_SCIENTIFIC); |
| genericFormat = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_DECIMAL); |
| |
| /* handle error */ |
| if(scientificFormat == 0 || genericFormat == 0) |
| return 0; |
| |
| /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */ |
| skipped += u_scanf_skip_leading_positive_sign(input, genericFormat, &genericStatus); |
| |
| /* parse the number using each format*/ |
| |
| scientificResult = unum_parseDouble(scientificFormat, input->str.fPos, len, |
| &scientificParsePos, &scientificStatus); |
| |
| genericResult = unum_parseDouble(genericFormat, input->str.fPos, len, |
| &genericParsePos, &genericStatus); |
| |
| /* determine which parse made it farther */ |
| if(scientificParsePos > genericParsePos) { |
| /* stash the result in num */ |
| num = scientificResult; |
| /* update the input's position to reflect consumed data */ |
| parsePos += scientificParsePos; |
| } |
| else { |
| /* stash the result in num */ |
| num = genericResult; |
| /* update the input's position to reflect consumed data */ |
| parsePos += genericParsePos; |
| } |
| input->str.fPos += parsePos; |
| |
| if (!info->fSkipArg) { |
| if (info->fIsLong) |
| *(double*)(args[0].ptrValue) = num; |
| else if (info->fIsLongDouble) |
| *(long double*)(args[0].ptrValue) = num; |
| else |
| *(float*)(args[0].ptrValue) = (float)num; |
| } |
| |
| /* mask off any necessary bits */ |
| /* if(! info->fIsLong_double) |
| num &= DBL_MAX;*/ |
| |
| /* we converted 1 arg */ |
| *argConverted = !info->fSkipArg; |
| return parsePos + skipped; |
| } |
| |
| static int32_t |
| u_scanf_integer_handler(UFILE *input, |
| u_scanf_spec_info *info, |
| ufmt_args *args, |
| const UChar *fmt, |
| int32_t *fmtConsumed, |
| int32_t *argConverted) |
| { |
| int32_t len; |
| void *num = (void*) (args[0].ptrValue); |
| UNumberFormat *format; |
| int32_t parsePos = 0; |
| int32_t skipped; |
| UErrorCode status = U_ZERO_ERROR; |
| int64_t result; |
| |
| |
| /* skip all ws in the input */ |
| skipped = u_scanf_skip_leading_ws(input, info->fPadChar); |
| |
| /* fill the input's internal buffer */ |
| ufile_fill_uchar_buffer(input); |
| |
| /* determine the size of the input's buffer */ |
| len = (int32_t)(input->str.fLimit - input->str.fPos); |
| |
| /* truncate to the width, if specified */ |
| if(info->fWidth != -1) |
| len = ufmt_min(len, info->fWidth); |
| |
| /* get the formatter */ |
| format = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_DECIMAL); |
| |
| /* handle error */ |
| if(format == 0) |
| return 0; |
| |
| /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */ |
| skipped += u_scanf_skip_leading_positive_sign(input, format, &status); |
| |
| /* parse the number */ |
| result = unum_parseInt64(format, input->str.fPos, len, &parsePos, &status); |
| |
| /* mask off any necessary bits */ |
| if (!info->fSkipArg) { |
| if (info->fIsShort) |
| *(int16_t*)num = (int16_t)(UINT16_MAX & result); |
| else if (info->fIsLongLong) |
| *(int64_t*)num = result; |
| else |
| *(int32_t*)num = (int32_t)(UINT32_MAX & result); |
| } |
| |
| /* update the input's position to reflect consumed data */ |
| input->str.fPos += parsePos; |
| |
| /* we converted 1 arg */ |
| *argConverted = !info->fSkipArg; |
| return parsePos + skipped; |
| } |
| |
| static int32_t |
| u_scanf_uinteger_handler(UFILE *input, |
| u_scanf_spec_info *info, |
| ufmt_args *args, |
| const UChar *fmt, |
| int32_t *fmtConsumed, |
| int32_t *argConverted) |
| { |
| /* TODO Fix this when Numberformat handles uint64_t */ |
| return u_scanf_integer_handler(input, info, args, fmt, fmtConsumed, argConverted); |
| } |
| |
| static int32_t |
| u_scanf_percent_handler(UFILE *input, |
| u_scanf_spec_info *info, |
| ufmt_args *args, |
| const UChar *fmt, |
| int32_t *fmtConsumed, |
| int32_t *argConverted) |
| { |
| int32_t len; |
| double num; |
| UNumberFormat *format; |
| int32_t parsePos = 0; |
| UErrorCode status = U_ZERO_ERROR; |
| |
| |
| /* skip all ws in the input */ |
| u_scanf_skip_leading_ws(input, info->fPadChar); |
| |
| /* fill the input's internal buffer */ |
| ufile_fill_uchar_buffer(input); |
| |
| /* determine the size of the input's buffer */ |
| len = (int32_t)(input->str.fLimit - input->str.fPos); |
| |
| /* truncate to the width, if specified */ |
| if(info->fWidth != -1) |
| len = ufmt_min(len, info->fWidth); |
| |
| /* get the formatter */ |
| format = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_PERCENT); |
| |
| /* handle error */ |
| if(format == 0) |
| return 0; |
| |
| /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */ |
| u_scanf_skip_leading_positive_sign(input, format, &status); |
| |
| /* parse the number */ |
| num = unum_parseDouble(format, input->str.fPos, len, &parsePos, &status); |
| |
| if (!info->fSkipArg) { |
| *(double*)(args[0].ptrValue) = num; |
| } |
| |
| /* mask off any necessary bits */ |
| /* if(! info->fIsLong_double) |
| num &= DBL_MAX;*/ |
| |
| /* update the input's position to reflect consumed data */ |
| input->str.fPos += parsePos; |
| |
| /* we converted 1 arg */ |
| *argConverted = !info->fSkipArg; |
| return parsePos; |
| } |
| |
| static int32_t |
| u_scanf_string_handler(UFILE *input, |
| u_scanf_spec_info *info, |
| ufmt_args *args, |
| const UChar *fmt, |
| int32_t *fmtConsumed, |
| int32_t *argConverted) |
| { |
| const UChar *source; |
| UConverter *conv; |
| char *arg = (char*)(args[0].ptrValue); |
| char *alias = arg; |
| char *limit; |
| UErrorCode status = U_ZERO_ERROR; |
| int32_t count; |
| int32_t skipped = 0; |
| UChar c; |
| UBool isNotEOF = FALSE; |
| |
| /* skip all ws in the input */ |
| if (info->fIsString) { |
| skipped = u_scanf_skip_leading_ws(input, info->fPadChar); |
| } |
| |
| /* get the string one character at a time, truncating to the width */ |
| count = 0; |
| |
| /* open the default converter */ |
| conv = u_getDefaultConverter(&status); |
| |
| if(U_FAILURE(status)) |
| return -1; |
| |
| while( (info->fWidth == -1 || count < info->fWidth) |
| && (isNotEOF = ufile_getch(input, &c)) |
| && (!info->fIsString || (c != info->fPadChar && !u_isWhitespace(c)))) |
| { |
| |
| if (!info->fSkipArg) { |
| /* put the character from the input onto the target */ |
| source = &c; |
| /* Since we do this one character at a time, do it this way. */ |
| if (info->fWidth > 0) { |
| limit = alias + info->fWidth - count; |
| } |
| else { |
| limit = alias + ucnv_getMaxCharSize(conv); |
| } |
| |
| /* convert the character to the default codepage */ |
| ucnv_fromUnicode(conv, &alias, limit, &source, source + 1, |
| NULL, TRUE, &status); |
| |
| if(U_FAILURE(status)) { |
| /* clean up */ |
| u_releaseDefaultConverter(conv); |
| return -1; |
| } |
| } |
| |
| /* increment the count */ |
| ++count; |
| } |
| |
| /* put the final character we read back on the input */ |
| if (!info->fSkipArg) { |
| if ((info->fWidth == -1 || count < info->fWidth) && isNotEOF) |
| u_fungetc(c, input); |
| |
| /* add the terminator */ |
| if (info->fIsString) { |
| *alias = 0x00; |
| } |
| } |
| |
| /* clean up */ |
| u_releaseDefaultConverter(conv); |
| |
| /* we converted 1 arg */ |
| *argConverted = !info->fSkipArg; |
| return count + skipped; |
| } |
| |
| static int32_t |
| u_scanf_char_handler(UFILE *input, |
| u_scanf_spec_info *info, |
| ufmt_args *args, |
| const UChar *fmt, |
| int32_t *fmtConsumed, |
| int32_t *argConverted) |
| { |
| if (info->fWidth < 0) { |
| info->fWidth = 1; |
| } |
| info->fIsString = FALSE; |
| return u_scanf_string_handler(input, info, args, fmt, fmtConsumed, argConverted); |
| } |
| |
| static int32_t |
| u_scanf_ustring_handler(UFILE *input, |
| u_scanf_spec_info *info, |
| ufmt_args *args, |
| const UChar *fmt, |
| int32_t *fmtConsumed, |
| int32_t *argConverted) |
| { |
| UChar *arg = (UChar*)(args[0].ptrValue); |
| UChar *alias = arg; |
| int32_t count; |
| int32_t skipped = 0; |
| UChar c; |
| UBool isNotEOF = FALSE; |
| |
| /* skip all ws in the input */ |
| if (info->fIsString) { |
| skipped = u_scanf_skip_leading_ws(input, info->fPadChar); |
| } |
| |
| /* get the string one character at a time, truncating to the width */ |
| count = 0; |
| |
| while( (info->fWidth == -1 || count < info->fWidth) |
| && (isNotEOF = ufile_getch(input, &c)) |
| && (!info->fIsString || (c != info->fPadChar && !u_isWhitespace(c)))) |
| { |
| |
| /* put the character from the input onto the target */ |
| if (!info->fSkipArg) { |
| *alias++ = c; |
| } |
| |
| /* increment the count */ |
| ++count; |
| } |
| |
| /* put the final character we read back on the input */ |
| if (!info->fSkipArg) { |
| if((info->fWidth == -1 || count < info->fWidth) && isNotEOF) { |
| u_fungetc(c, input); |
| } |
| |
| /* add the terminator */ |
| if (info->fIsString) { |
| *alias = 0x0000; |
| } |
| } |
| |
| /* we converted 1 arg */ |
| *argConverted = !info->fSkipArg; |
| return count + skipped; |
| } |
| |
| static int32_t |
| u_scanf_uchar_handler(UFILE *input, |
| u_scanf_spec_info *info, |
| ufmt_args *args, |
| const UChar *fmt, |
| int32_t *fmtConsumed, |
| int32_t *argConverted) |
| { |
| if (info->fWidth < 0) { |
| info->fWidth = 1; |
| } |
| info->fIsString = FALSE; |
| return u_scanf_ustring_handler(input, info, args, fmt, fmtConsumed, argConverted); |
| } |
| |
| static int32_t |
| u_scanf_spellout_handler(UFILE *input, |
| u_scanf_spec_info *info, |
| ufmt_args *args, |
| const UChar *fmt, |
| int32_t *fmtConsumed, |
| int32_t *argConverted) |
| { |
| int32_t len; |
| double num; |
| UNumberFormat *format; |
| int32_t parsePos = 0; |
| int32_t skipped; |
| UErrorCode status = U_ZERO_ERROR; |
| |
| |
| /* skip all ws in the input */ |
| skipped = u_scanf_skip_leading_ws(input, info->fPadChar); |
| |
| /* fill the input's internal buffer */ |
| ufile_fill_uchar_buffer(input); |
| |
| /* determine the size of the input's buffer */ |
| len = (int32_t)(input->str.fLimit - input->str.fPos); |
| |
| /* truncate to the width, if specified */ |
| if(info->fWidth != -1) |
| len = ufmt_min(len, info->fWidth); |
| |
| /* get the formatter */ |
| format = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_SPELLOUT); |
| |
| /* handle error */ |
| if(format == 0) |
| return 0; |
| |
| /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */ |
| /* This is not applicable to RBNF. */ |
| /*skipped += u_scanf_skip_leading_positive_sign(input, format, &status);*/ |
| |
| /* parse the number */ |
| num = unum_parseDouble(format, input->str.fPos, len, &parsePos, &status); |
| |
| if (!info->fSkipArg) { |
| *(double*)(args[0].ptrValue) = num; |
| } |
| |
| /* mask off any necessary bits */ |
| /* if(! info->fIsLong_double) |
| num &= DBL_MAX;*/ |
| |
| /* update the input's position to reflect consumed data */ |
| input->str.fPos += parsePos; |
| |
| /* we converted 1 arg */ |
| *argConverted = !info->fSkipArg; |
| return parsePos + skipped; |
| } |
| |
| static int32_t |
| u_scanf_hex_handler(UFILE *input, |
| u_scanf_spec_info *info, |
| ufmt_args *args, |
| const UChar *fmt, |
| int32_t *fmtConsumed, |
| int32_t *argConverted) |
| { |
| int32_t len; |
| int32_t skipped; |
| void *num = (void*) (args[0].ptrValue); |
| int64_t result; |
| |
| /* skip all ws in the input */ |
| skipped = u_scanf_skip_leading_ws(input, info->fPadChar); |
| |
| /* fill the input's internal buffer */ |
| ufile_fill_uchar_buffer(input); |
| |
| /* determine the size of the input's buffer */ |
| len = (int32_t)(input->str.fLimit - input->str.fPos); |
| |
| /* truncate to the width, if specified */ |
| if(info->fWidth != -1) |
| len = ufmt_min(len, info->fWidth); |
| |
| /* check for alternate form */ |
| if( *(input->str.fPos) == 0x0030 && |
| (*(input->str.fPos + 1) == 0x0078 || *(input->str.fPos + 1) == 0x0058) ) { |
| |
| /* skip the '0' and 'x' or 'X' if present */ |
| input->str.fPos += 2; |
| len -= 2; |
| } |
| |
| /* parse the number */ |
| result = ufmt_uto64(input->str.fPos, &len, 16); |
| |
| /* update the input's position to reflect consumed data */ |
| input->str.fPos += len; |
| |
| /* mask off any necessary bits */ |
| if (!info->fSkipArg) { |
| if (info->fIsShort) |
| *(int16_t*)num = (int16_t)(UINT16_MAX & result); |
| else if (info->fIsLongLong) |
| *(int64_t*)num = result; |
| else |
| *(int32_t*)num = (int32_t)(UINT32_MAX & result); |
| } |
| |
| /* we converted 1 arg */ |
| *argConverted = !info->fSkipArg; |
| return len + skipped; |
| } |
| |
| static int32_t |
| u_scanf_octal_handler(UFILE *input, |
| u_scanf_spec_info *info, |
| ufmt_args *args, |
| const UChar *fmt, |
| int32_t *fmtConsumed, |
| int32_t *argConverted) |
| { |
| int32_t len; |
| int32_t skipped; |
| void *num = (void*) (args[0].ptrValue); |
| int64_t result; |
| |
| /* skip all ws in the input */ |
| skipped = u_scanf_skip_leading_ws(input, info->fPadChar); |
| |
| /* fill the input's internal buffer */ |
| ufile_fill_uchar_buffer(input); |
| |
| /* determine the size of the input's buffer */ |
| len = (int32_t)(input->str.fLimit - input->str.fPos); |
| |
| /* truncate to the width, if specified */ |
| if(info->fWidth != -1) |
| len = ufmt_min(len, info->fWidth); |
| |
| /* parse the number */ |
| result = ufmt_uto64(input->str.fPos, &len, 8); |
| |
| /* update the input's position to reflect consumed data */ |
| input->str.fPos += len; |
| |
| /* mask off any necessary bits */ |
| if (!info->fSkipArg) { |
| if (info->fIsShort) |
| *(int16_t*)num = (int16_t)(UINT16_MAX & result); |
| else if (info->fIsLongLong) |
| *(int64_t*)num = result; |
| else |
| *(int32_t*)num = (int32_t)(UINT32_MAX & result); |
| } |
| |
| /* we converted 1 arg */ |
| *argConverted = !info->fSkipArg; |
| return len + skipped; |
| } |
| |
| static int32_t |
| u_scanf_pointer_handler(UFILE *input, |
| u_scanf_spec_info *info, |
| ufmt_args *args, |
| const UChar *fmt, |
| int32_t *fmtConsumed, |
| int32_t *argConverted) |
| { |
| int32_t len; |
| int32_t skipped; |
| void *result; |
| void **p = (void**)(args[0].ptrValue); |
| |
| |
| /* skip all ws in the input */ |
| skipped = u_scanf_skip_leading_ws(input, info->fPadChar); |
| |
| /* fill the input's internal buffer */ |
| ufile_fill_uchar_buffer(input); |
| |
| /* determine the size of the input's buffer */ |
| len = (int32_t)(input->str.fLimit - input->str.fPos); |
| |
| /* truncate to the width, if specified */ |
| if(info->fWidth != -1) { |
| len = ufmt_min(len, info->fWidth); |
| } |
| |
| /* Make sure that we don't consume too much */ |
| if (len > (int32_t)(sizeof(void*)*2)) { |
| len = (int32_t)(sizeof(void*)*2); |
| } |
| |
| /* parse the pointer - assign to temporary value */ |
| result = ufmt_utop(input->str.fPos, &len); |
| |
| if (!info->fSkipArg) { |
| *p = result; |
| } |
| |
| /* update the input's position to reflect consumed data */ |
| input->str.fPos += len; |
| |
| /* we converted 1 arg */ |
| *argConverted = !info->fSkipArg; |
| return len + skipped; |
| } |
| |
| static int32_t |
| u_scanf_scanset_handler(UFILE *input, |
| u_scanf_spec_info *info, |
| ufmt_args *args, |
| const UChar *fmt, |
| int32_t *fmtConsumed, |
| int32_t *argConverted) |
| { |
| USet *scanset; |
| UErrorCode status = U_ZERO_ERROR; |
| int32_t chLeft = INT32_MAX; |
| UChar32 c; |
| UChar *alias = (UChar*) (args[0].ptrValue); |
| UBool isNotEOF = FALSE; |
| UBool readCharacter = FALSE; |
| |
| /* Create an empty set */ |
| scanset = uset_open(0, -1); |
| |
| /* Back up one to get the [ */ |
| fmt--; |
| |
| /* truncate to the width, if specified and alias the target */ |
| if(info->fWidth >= 0) { |
| chLeft = info->fWidth; |
| } |
| |
| /* parse the scanset from the fmt string */ |
| *fmtConsumed = uset_applyPattern(scanset, fmt, -1, 0, &status); |
| |
| /* verify that the parse was successful */ |
| if (U_SUCCESS(status)) { |
| c=0; |
| |
| /* grab characters one at a time and make sure they are in the scanset */ |
| while(chLeft > 0) { |
| if ((isNotEOF = ufile_getch32(input, &c)) && uset_contains(scanset, c)) { |
| readCharacter = TRUE; |
| if (!info->fSkipArg) { |
| int32_t idx = 0; |
| UBool isError = FALSE; |
| |
| U16_APPEND(alias, idx, chLeft, c, isError); |
| if (isError) { |
| break; |
| } |
| alias += idx; |
| } |
| chLeft -= (1 + U_IS_SUPPLEMENTARY(c)); |
| } |
| else { |
| /* if the character's not in the scanset, break out */ |
| break; |
| } |
| } |
| |
| /* put the final character we read back on the input */ |
| if(isNotEOF && chLeft > 0) { |
| u_fungetc(c, input); |
| } |
| } |
| |
| uset_close(scanset); |
| |
| /* if we didn't match at least 1 character, fail */ |
| if(!readCharacter) |
| return -1; |
| /* otherwise, add the terminator */ |
| else if (!info->fSkipArg) { |
| *alias = 0x00; |
| } |
| |
| /* we converted 1 arg */ |
| *argConverted = !info->fSkipArg; |
| return (info->fWidth >= 0 ? info->fWidth : INT32_MAX) - chLeft; |
| } |
| |
| /* Use US-ASCII characters only for formatting. Most codepages have |
| characters 20-7F from Unicode. Using any other codepage specific |
| characters will make it very difficult to format the string on |
| non-Unicode machines */ |
| static const u_scanf_info g_u_scanf_infos[USCANF_NUM_FMT_HANDLERS] = { |
| /* 0x20 */ |
| UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, |
| UFMT_EMPTY, UFMT_SIMPLE_PERCENT,UFMT_EMPTY, UFMT_EMPTY, |
| UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, |
| UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, |
| |
| /* 0x30 */ |
| UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, |
| UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, |
| UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, |
| UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, |
| |
| /* 0x40 */ |
| UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_UCHAR, |
| UFMT_EMPTY, UFMT_SCIENTIFIC, UFMT_EMPTY, UFMT_SCIDBL, |
| #ifdef U_USE_OBSOLETE_IO_FORMATTING |
| UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_UCHAR/*deprecated*/, |
| #else |
| UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, |
| #endif |
| UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, |
| |
| /* 0x50 */ |
| UFMT_PERCENT, UFMT_EMPTY, UFMT_EMPTY, UFMT_USTRING, |
| #ifdef U_USE_OBSOLETE_IO_FORMATTING |
| UFMT_EMPTY, UFMT_USTRING/*deprecated*/,UFMT_SPELLOUT, UFMT_EMPTY, |
| #else |
| UFMT_EMPTY, UFMT_EMPTY, UFMT_SPELLOUT, UFMT_EMPTY, |
| #endif |
| UFMT_HEX, UFMT_EMPTY, UFMT_EMPTY, UFMT_SCANSET, |
| UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, |
| |
| /* 0x60 */ |
| UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_CHAR, |
| UFMT_INT, UFMT_SCIENTIFIC, UFMT_DOUBLE, UFMT_SCIDBL, |
| UFMT_EMPTY, UFMT_INT, UFMT_EMPTY, UFMT_EMPTY, |
| UFMT_EMPTY, UFMT_EMPTY, UFMT_COUNT, UFMT_OCTAL, |
| |
| /* 0x70 */ |
| UFMT_POINTER, UFMT_EMPTY, UFMT_EMPTY, UFMT_STRING, |
| UFMT_EMPTY, UFMT_UINT, UFMT_EMPTY, UFMT_EMPTY, |
| UFMT_HEX, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, |
| UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, |
| }; |
| |
| U_CFUNC int32_t |
| u_scanf_parse(UFILE *f, |
| const UChar *patternSpecification, |
| va_list ap) |
| { |
| const UChar *alias; |
| int32_t count, converted, argConsumed, cpConsumed; |
| uint16_t handlerNum; |
| |
| ufmt_args args; |
| u_scanf_spec spec; |
| ufmt_type_info info; |
| u_scanf_handler handler; |
| |
| /* alias the pattern */ |
| alias = patternSpecification; |
| |
| /* haven't converted anything yet */ |
| argConsumed = 0; |
| converted = 0; |
| cpConsumed = 0; |
| |
| /* iterate through the pattern */ |
| for(;;) { |
| |
| /* match any characters up to the next '%' */ |
| while(*alias != UP_PERCENT && *alias != 0x0000 && u_fgetc(f) == *alias) { |
| alias++; |
| } |
| |
| /* if we aren't at a '%', or if we're at end of string, break*/ |
| if(*alias != UP_PERCENT || *alias == 0x0000) |
| break; |
| |
| /* parse the specifier */ |
| count = u_scanf_parse_spec(alias, &spec); |
| |
| /* update the pointer in pattern */ |
| alias += count; |
| |
| handlerNum = (uint16_t)(spec.fInfo.fSpec - USCANF_BASE_FMT_HANDLERS); |
| if (handlerNum < USCANF_NUM_FMT_HANDLERS) { |
| /* skip the argument, if necessary */ |
| /* query the info function for argument information */ |
| info = g_u_scanf_infos[ handlerNum ].info; |
| if (info != ufmt_count && u_feof(f)) { |
| break; |
| } |
| else if(spec.fInfo.fSkipArg) { |
| args.ptrValue = NULL; |
| } |
| else { |
| switch(info) { |
| case ufmt_count: |
| /* set the spec's width to the # of items converted */ |
| spec.fInfo.fWidth = cpConsumed; |
| /* fall through to next case */ |
| case ufmt_char: |
| case ufmt_uchar: |
| case ufmt_int: |
| case ufmt_string: |
| case ufmt_ustring: |
| case ufmt_pointer: |
| case ufmt_float: |
| case ufmt_double: |
| args.ptrValue = va_arg(ap, void*); |
| break; |
| |
| default: |
| /* else args is ignored */ |
| args.ptrValue = NULL; |
| break; |
| } |
| } |
| |
| /* call the handler function */ |
| handler = g_u_scanf_infos[ handlerNum ].handler; |
| if(handler != 0) { |
| |
| /* reset count to 1 so that += for alias works. */ |
| count = 1; |
| |
| cpConsumed += (*handler)(f, &spec.fInfo, &args, alias, &count, &argConsumed); |
| |
| /* if the handler encountered an error condition, break */ |
| if(argConsumed < 0) { |
| converted = -1; |
| break; |
| } |
| |
| /* add to the # of items converted */ |
| converted += argConsumed; |
| |
| /* update the pointer in pattern */ |
| alias += count-1; |
| } |
| /* else do nothing */ |
| } |
| /* else do nothing */ |
| |
| /* just ignore unknown tags */ |
| } |
| |
| /* return # of items converted */ |
| return converted; |
| } |
| |
| #endif /* #if !UCONFIG_NO_FORMATTING */ |