src/third_party/icu/source/io/uscanf_p.c - cobalt - Git at Google

 /*
 *******************************************************************************
 *
 *   Copyright (C) 1998-2010, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 *
 *******************************************************************************
 *
 * File uscnnf_p.c
 *
 * Modification History:
 *
 *   Date        Name        Description
 *   12/02/98    stephen        Creation.
 *   03/13/99    stephen     Modified for new C API.
 *******************************************************************************
 */

 #include "unicode/utypes.h"

 #if !UCONFIG_NO_FORMATTING

 #include "unicode/uchar.h"
 #include "unicode/ustring.h"
 #include "unicode/unum.h"
 #include "unicode/udat.h"
 #include "unicode/uset.h"
 #include "uscanf.h"
 #include "ufmt_cmn.h"
 #include "ufile.h"
 #include "locbund.h"

 #include "cmemory.h"
 #include "ustr_cnv.h"

 /* flag characters for u_scanf */
 #define FLAG_ASTERISK 0x002A
 #define FLAG_PAREN 0x0028

 #define ISFLAG(s)    (s) == FLAG_ASTERISK || \
             (s) == FLAG_PAREN

 /* special characters for u_scanf */
 #define SPEC_DOLLARSIGN 0x0024

 /* unicode digits */
 #define DIGIT_ZERO 0x0030
 #define DIGIT_ONE 0x0031
 #define DIGIT_TWO 0x0032
 #define DIGIT_THREE 0x0033
 #define DIGIT_FOUR 0x0034
 #define DIGIT_FIVE 0x0035
 #define DIGIT_SIX 0x0036
 #define DIGIT_SEVEN 0x0037
 #define DIGIT_EIGHT 0x0038
 #define DIGIT_NINE 0x0039

 #define ISDIGIT(s)    (s) == DIGIT_ZERO || \
             (s) == DIGIT_ONE || \
             (s) == DIGIT_TWO || \
             (s) == DIGIT_THREE || \
             (s) == DIGIT_FOUR || \
             (s) == DIGIT_FIVE || \
             (s) == DIGIT_SIX || \
             (s) == DIGIT_SEVEN || \
             (s) == DIGIT_EIGHT || \
             (s) == DIGIT_NINE

 /* u_scanf modifiers */
 #define MOD_H 0x0068
 #define MOD_LOWERL 0x006C
 #define MOD_L 0x004C

 #define ISMOD(s)    (s) == MOD_H || \
             (s) == MOD_LOWERL || \
             (s) == MOD_L

 /**
  * Struct encapsulating a single uscanf format specification.
  */
 typedef struct u_scanf_spec_info {
     int32_t fWidth;         /* Width  */

     UChar   fSpec;          /* Format specification  */

     UChar   fPadChar;       /* Padding character  */

     UBool   fSkipArg;       /* TRUE if arg should be skipped */
     UBool   fIsLongDouble;  /* L flag  */
     UBool   fIsShort;       /* h flag  */
     UBool   fIsLong;        /* l flag  */
     UBool   fIsLongLong;    /* ll flag  */
     UBool   fIsString;      /* TRUE if this is a NULL-terminated string. */
 } u_scanf_spec_info;


 /**
  * Struct encapsulating a single u_scanf format specification.
  */
 typedef struct u_scanf_spec {
     u_scanf_spec_info    fInfo;        /* Information on this spec */
     int32_t        fArgPos;    /* Position of data in arg list */
 } u_scanf_spec;

 /**
  * Parse a single u_scanf format specifier in Unicode.
  * @param fmt A pointer to a '%' character in a u_scanf format specification.
  * @param spec A pointer to a <TT>u_scanf_spec</TT> to receive the parsed
  * format specifier.
  * @return The number of characters contained in this specifier.
  */
 static int32_t
 u_scanf_parse_spec (const UChar     *fmt,
             u_scanf_spec    *spec)
 {
     const UChar *s = fmt;
     const UChar *backup;
     u_scanf_spec_info *info = &(spec->fInfo);

     /* initialize spec to default values */
     spec->fArgPos             = -1;

     info->fWidth        = -1;
     info->fSpec         = 0x0000;
     info->fPadChar      = 0x0020;
     info->fSkipArg      = FALSE;
     info->fIsLongDouble = FALSE;
     info->fIsShort      = FALSE;
     info->fIsLong       = FALSE;
     info->fIsLongLong   = FALSE;
     info->fIsString     = TRUE;


     /* skip over the initial '%' */
     s++;

     /* Check for positional argument */
     if(ISDIGIT(*s)) {

         /* Save the current position */
         backup = s;

         /* handle positional parameters */
         if(ISDIGIT(*s)) {
             spec->fArgPos = (int) (*s++ - DIGIT_ZERO);

             while(ISDIGIT(*s)) {
                 spec->fArgPos *= 10;
                 spec->fArgPos += (int) (*s++ - DIGIT_ZERO);
             }
         }

         /* if there is no '$', don't read anything */
         if(*s != SPEC_DOLLARSIGN) {
             spec->fArgPos = -1;
             s = backup;
         }
         /* munge the '$' */
         else
             s++;
     }

     /* Get any format flags */
     while(ISFLAG(*s)) {
         switch(*s++) {

             /* skip argument */
         case FLAG_ASTERISK:
             info->fSkipArg = TRUE;
             break;

             /* pad character specified */
         case FLAG_PAREN:

             /* first four characters are hex values for pad char */
             info->fPadChar = (UChar)ufmt_digitvalue(*s++);
             info->fPadChar = (UChar)((info->fPadChar * 16) + ufmt_digitvalue(*s++));
             info->fPadChar = (UChar)((info->fPadChar * 16) + ufmt_digitvalue(*s++));
             info->fPadChar = (UChar)((info->fPadChar * 16) + ufmt_digitvalue(*s++));

             /* final character is ignored */
             s++;

             break;
         }
     }

     /* Get the width */
     if(ISDIGIT(*s)){
         info->fWidth = (int) (*s++ - DIGIT_ZERO);

         while(ISDIGIT(*s)) {
             info->fWidth *= 10;
             info->fWidth += (int) (*s++ - DIGIT_ZERO);
         }
     }

     /* Get any modifiers */
     if(ISMOD(*s)) {
         switch(*s++) {

             /* short */
         case MOD_H:
             info->fIsShort = TRUE;
             break;

             /* long or long long */
         case MOD_LOWERL:
             if(*s == MOD_LOWERL) {
                 info->fIsLongLong = TRUE;
                 /* skip over the next 'l' */
                 s++;
             }
             else
                 info->fIsLong = TRUE;
             break;

             /* long double */
         case MOD_L:
             info->fIsLongDouble = TRUE;
             break;
         }
     }

     /* finally, get the specifier letter */
     info->fSpec = *s++;

     /* return # of characters in this specifier */
     return (int32_t)(s - fmt);
 }

 #define UP_PERCENT 0x0025


 /* ANSI style formatting */
 /* Use US-ASCII characters only for formatting */

 /* % */
 #define UFMT_SIMPLE_PERCENT {ufmt_simple_percent, u_scanf_simple_percent_handler}
 /* s */
 #define UFMT_STRING         {ufmt_string, u_scanf_string_handler}
 /* c */
 #define UFMT_CHAR           {ufmt_string, u_scanf_char_handler}
 /* d, i */
 #define UFMT_INT            {ufmt_int, u_scanf_integer_handler}
 /* u */
 #define UFMT_UINT           {ufmt_int, u_scanf_uinteger_handler}
 /* o */
 #define UFMT_OCTAL          {ufmt_int, u_scanf_octal_handler}
 /* x, X */
 #define UFMT_HEX            {ufmt_int, u_scanf_hex_handler}
 /* f */
 #define UFMT_DOUBLE         {ufmt_double, u_scanf_double_handler}
 /* e, E */
 #define UFMT_SCIENTIFIC     {ufmt_double, u_scanf_scientific_handler}
 /* g, G */
 #define UFMT_SCIDBL         {ufmt_double, u_scanf_scidbl_handler}
 /* n */
 #define UFMT_COUNT          {ufmt_count, u_scanf_count_handler}
 /* [ */
 #define UFMT_SCANSET        {ufmt_string, u_scanf_scanset_handler}

 /* non-ANSI extensions */
 /* Use US-ASCII characters only for formatting */

 /* p */
 #define UFMT_POINTER        {ufmt_pointer, u_scanf_pointer_handler}
 /* V */
 #define UFMT_SPELLOUT       {ufmt_double, u_scanf_spellout_handler}
 /* P */
 #define UFMT_PERCENT        {ufmt_double, u_scanf_percent_handler}
 /* C  K is old format */
 #define UFMT_UCHAR          {ufmt_uchar, u_scanf_uchar_handler}
 /* S  U is old format */
 #define UFMT_USTRING        {ufmt_ustring, u_scanf_ustring_handler}


 #define UFMT_EMPTY {ufmt_empty, NULL}

 /**
  * A u_scanf handler function.
  * A u_scanf handler is responsible for handling a single u_scanf
  * format specification, for example 'd' or 's'.
  * @param stream The UFILE to which to write output.
  * @param info A pointer to a <TT>u_scanf_spec_info</TT> struct containing
  * information on the format specification.
  * @param args A pointer to the argument data
  * @param fmt A pointer to the first character in the format string
  * following the spec.
  * @param fmtConsumed On output, set to the number of characters consumed
  * in <TT>fmt</TT>. Do nothing, if the argument isn't variable width.
  * @param argConverted The number of arguments converted and assigned, or -1 if an
  * error occurred.
  * @return The number of code points consumed during reading.
  */
 typedef int32_t (*u_scanf_handler) (UFILE   *stream,
                    u_scanf_spec_info  *info,
                    ufmt_args                *args,
                    const UChar              *fmt,
                    int32_t                  *fmtConsumed,
                    int32_t                  *argConverted);

 typedef struct u_scanf_info {
     ufmt_type_info info;
     u_scanf_handler handler;
 } u_scanf_info;

 #define USCANF_NUM_FMT_HANDLERS 108
 #define USCANF_SYMBOL_BUFFER_SIZE 8

 /* We do not use handlers for 0-0x1f */
 #define USCANF_BASE_FMT_HANDLERS 0x20


 static int32_t
 u_scanf_skip_leading_ws(UFILE   *input,
                         UChar   pad)
 {
     UChar   c;
     int32_t count = 0;
     UBool isNotEOF;

     /* skip all leading ws in the input */
     while( (isNotEOF = ufile_getch(input, &c)) && (c == pad || u_isWhitespace(c)) )
     {
         count++;
     }

     /* put the final character back on the input */
     if(isNotEOF)
         u_fungetc(c, input);

     return count;
 }

 /* TODO: Is always skipping the prefix symbol as a positive sign a good idea in all locales? */
 static int32_t
 u_scanf_skip_leading_positive_sign(UFILE   *input,
                                    UNumberFormat *format,
                                    UErrorCode *status)
 {
     UChar   c;
     int32_t count = 0;
     UBool isNotEOF;
     UChar plusSymbol[USCANF_SYMBOL_BUFFER_SIZE];
     int32_t symbolLen;
     UErrorCode localStatus = U_ZERO_ERROR;

     if (U_SUCCESS(*status)) {
         symbolLen = unum_getSymbol(format,
             UNUM_PLUS_SIGN_SYMBOL,
             plusSymbol,
             sizeof(plusSymbol)/sizeof(*plusSymbol),
             &localStatus);

         if (U_SUCCESS(localStatus)) {
             /* skip all leading ws in the input */
             while( (isNotEOF = ufile_getch(input, &c)) && (count < symbolLen && c == plusSymbol[count]) )
             {
                 count++;
             }

             /* put the final character back on the input */
             if(isNotEOF) {
                 u_fungetc(c, input);
             }
         }
     }

     return count;
 }

 static int32_t
 u_scanf_simple_percent_handler(UFILE        *input,
                                u_scanf_spec_info *info,
                                ufmt_args    *args,
                                const UChar  *fmt,
                                int32_t      *fmtConsumed,
                                int32_t      *argConverted)
 {
     /* make sure the next character in the input is a percent */
     *argConverted = 0;
     if(u_fgetc(input) != 0x0025) {
         *argConverted = -1;
     }
     return 1;
 }

 static int32_t
 u_scanf_count_handler(UFILE         *input,
                       u_scanf_spec_info *info,
                       ufmt_args     *args,
                       const UChar   *fmt,
                       int32_t       *fmtConsumed,
                       int32_t       *argConverted)
 {
     /* in the special case of count, the u_scanf_spec_info's width */
     /* will contain the # of items converted thus far */
     if (!info->fSkipArg) {
         if (info->fIsShort)
             *(int16_t*)(args[0].ptrValue) = (int16_t)(UINT16_MAX & info->fWidth);
         else if (info->fIsLongLong)
             *(int64_t*)(args[0].ptrValue) = info->fWidth;
         else
             *(int32_t*)(args[0].ptrValue) = (int32_t)(UINT32_MAX & info->fWidth);
     }
     *argConverted = 0;

     /* we converted 0 args */
     return 0;
 }

 static int32_t
 u_scanf_double_handler(UFILE        *input,
                        u_scanf_spec_info *info,
                        ufmt_args    *args,
                        const UChar  *fmt,
                        int32_t      *fmtConsumed,
                        int32_t      *argConverted)
 {
     int32_t         len;
     double          num;
     UNumberFormat   *format;
     int32_t         parsePos    = 0;
     int32_t         skipped;
     UErrorCode      status      = U_ZERO_ERROR;


     /* skip all ws in the input */
     skipped = u_scanf_skip_leading_ws(input, info->fPadChar);

     /* fill the input's internal buffer */
     ufile_fill_uchar_buffer(input);

     /* determine the size of the input's buffer */
     len = (int32_t)(input->str.fLimit - input->str.fPos);

     /* truncate to the width, if specified */
     if(info->fWidth != -1)
         len = ufmt_min(len, info->fWidth);

     /* get the formatter */
     format = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_DECIMAL);

     /* handle error */
     if(format == 0)
         return 0;

     /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */
     skipped += u_scanf_skip_leading_positive_sign(input, format, &status);

     /* parse the number */
     num = unum_parseDouble(format, input->str.fPos, len, &parsePos, &status);

     if (!info->fSkipArg) {
         if (info->fIsLong)
             *(double*)(args[0].ptrValue) = num;
         else if (info->fIsLongDouble)
             *(long double*)(args[0].ptrValue) = num;
         else
             *(float*)(args[0].ptrValue) = (float)num;
     }

     /* mask off any necessary bits */
     /*  if(! info->fIsLong_double)
     num &= DBL_MAX;*/

     /* update the input's position to reflect consumed data */
     input->str.fPos += parsePos;

     /* we converted 1 arg */
     *argConverted = !info->fSkipArg;
     return parsePos + skipped;
 }

 #define UPRINTF_SYMBOL_BUFFER_SIZE 8

 static int32_t
 u_scanf_scientific_handler(UFILE        *input,
                            u_scanf_spec_info *info,
                            ufmt_args    *args,
                            const UChar  *fmt,
                            int32_t      *fmtConsumed,
                            int32_t      *argConverted)
 {
     int32_t         len;
     double          num;
     UNumberFormat   *format;
     int32_t         parsePos    = 0;
     int32_t         skipped;
     UErrorCode      status      = U_ZERO_ERROR;
     UChar srcExpBuf[UPRINTF_SYMBOL_BUFFER_SIZE];
     int32_t srcLen, expLen;
     UChar expBuf[UPRINTF_SYMBOL_BUFFER_SIZE];


     /* skip all ws in the input */
     skipped = u_scanf_skip_leading_ws(input, info->fPadChar);

     /* fill the input's internal buffer */
     ufile_fill_uchar_buffer(input);

     /* determine the size of the input's buffer */
     len = (int32_t)(input->str.fLimit - input->str.fPos);

     /* truncate to the width, if specified */
     if(info->fWidth != -1)
         len = ufmt_min(len, info->fWidth);

     /* get the formatter */
     format = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_SCIENTIFIC);

     /* handle error */
     if(format == 0)
         return 0;

     /* set the appropriate flags on the formatter */

     srcLen = unum_getSymbol(format,
         UNUM_EXPONENTIAL_SYMBOL,
         srcExpBuf,
         sizeof(srcExpBuf),
         &status);

     /* Upper/lower case the e */
     if (info->fSpec == (UChar)0x65 /* e */) {
         expLen = u_strToLower(expBuf, (int32_t)sizeof(expBuf),
             srcExpBuf, srcLen,
             input->str.fBundle.fLocale,
             &status);
     }
     else {
         expLen = u_strToUpper(expBuf, (int32_t)sizeof(expBuf),
             srcExpBuf, srcLen,
             input->str.fBundle.fLocale,
             &status);
     }

     unum_setSymbol(format,
         UNUM_EXPONENTIAL_SYMBOL,
         expBuf,
         expLen,
         &status);


     /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */
     skipped += u_scanf_skip_leading_positive_sign(input, format, &status);

     /* parse the number */
     num = unum_parseDouble(format, input->str.fPos, len, &parsePos, &status);

     if (!info->fSkipArg) {
         if (info->fIsLong)
             *(double*)(args[0].ptrValue) = num;
         else if (info->fIsLongDouble)
             *(long double*)(args[0].ptrValue) = num;
         else
             *(float*)(args[0].ptrValue) = (float)num;
     }

     /* mask off any necessary bits */
     /*  if(! info->fIsLong_double)
     num &= DBL_MAX;*/

     /* update the input's position to reflect consumed data */
     input->str.fPos += parsePos;

     /* we converted 1 arg */
     *argConverted = !info->fSkipArg;
     return parsePos + skipped;
 }

 static int32_t
 u_scanf_scidbl_handler(UFILE        *input,
                        u_scanf_spec_info *info,
                        ufmt_args    *args,
                        const UChar  *fmt,
                        int32_t      *fmtConsumed,
                        int32_t      *argConverted)
 {
     int32_t       len;
     double        num;
     UNumberFormat *scientificFormat, *genericFormat;
     /*int32_t       scientificResult, genericResult;*/
     double        scientificResult, genericResult;
     int32_t       scientificParsePos = 0, genericParsePos = 0, parsePos = 0;
     int32_t       skipped;
     UErrorCode    scientificStatus = U_ZERO_ERROR;
     UErrorCode    genericStatus = U_ZERO_ERROR;


     /* since we can't determine by scanning the characters whether */
     /* a number was formatted in the 'f' or 'g' styles, parse the */
     /* string with both formatters, and assume whichever one */
     /* parsed the most is the correct formatter to use */


     /* skip all ws in the input */
     skipped = u_scanf_skip_leading_ws(input, info->fPadChar);

     /* fill the input's internal buffer */
     ufile_fill_uchar_buffer(input);

     /* determine the size of the input's buffer */
     len = (int32_t)(input->str.fLimit - input->str.fPos);

     /* truncate to the width, if specified */
     if(info->fWidth != -1)
         len = ufmt_min(len, info->fWidth);

     /* get the formatters */
     scientificFormat = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_SCIENTIFIC);
     genericFormat = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_DECIMAL);

     /* handle error */
     if(scientificFormat == 0 || genericFormat == 0)
         return 0;

     /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */
     skipped += u_scanf_skip_leading_positive_sign(input, genericFormat, &genericStatus);

     /* parse the number using each format*/

     scientificResult = unum_parseDouble(scientificFormat, input->str.fPos, len,
         &scientificParsePos, &scientificStatus);

     genericResult = unum_parseDouble(genericFormat, input->str.fPos, len,
         &genericParsePos, &genericStatus);

     /* determine which parse made it farther */
     if(scientificParsePos > genericParsePos) {
         /* stash the result in num */
         num = scientificResult;
         /* update the input's position to reflect consumed data */
         parsePos += scientificParsePos;
     }
     else {
         /* stash the result in num */
         num = genericResult;
         /* update the input's position to reflect consumed data */
         parsePos += genericParsePos;
     }
     input->str.fPos += parsePos;

     if (!info->fSkipArg) {
         if (info->fIsLong)
             *(double*)(args[0].ptrValue) = num;
         else if (info->fIsLongDouble)
             *(long double*)(args[0].ptrValue) = num;
         else
             *(float*)(args[0].ptrValue) = (float)num;
     }

     /* mask off any necessary bits */
     /*  if(! info->fIsLong_double)
     num &= DBL_MAX;*/

     /* we converted 1 arg */
     *argConverted = !info->fSkipArg;
     return parsePos + skipped;
 }

 static int32_t
 u_scanf_integer_handler(UFILE       *input,
                         u_scanf_spec_info *info,
                         ufmt_args   *args,
                         const UChar *fmt,
                         int32_t     *fmtConsumed,
                         int32_t     *argConverted)
 {
     int32_t         len;
     void            *num        = (void*) (args[0].ptrValue);
     UNumberFormat   *format;
     int32_t         parsePos    = 0;
     int32_t         skipped;
     UErrorCode      status      = U_ZERO_ERROR;
     int64_t         result;


     /* skip all ws in the input */
     skipped = u_scanf_skip_leading_ws(input, info->fPadChar);

     /* fill the input's internal buffer */
     ufile_fill_uchar_buffer(input);

     /* determine the size of the input's buffer */
     len = (int32_t)(input->str.fLimit - input->str.fPos);

     /* truncate to the width, if specified */
     if(info->fWidth != -1)
         len = ufmt_min(len, info->fWidth);

     /* get the formatter */
     format = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_DECIMAL);

     /* handle error */
     if(format == 0)
         return 0;

     /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */
     skipped += u_scanf_skip_leading_positive_sign(input, format, &status);

     /* parse the number */
     result = unum_parseInt64(format, input->str.fPos, len, &parsePos, &status);

     /* mask off any necessary bits */
     if (!info->fSkipArg) {
         if (info->fIsShort)
             *(int16_t*)num = (int16_t)(UINT16_MAX & result);
         else if (info->fIsLongLong)
             *(int64_t*)num = result;
         else
             *(int32_t*)num = (int32_t)(UINT32_MAX & result);
     }

     /* update the input's position to reflect consumed data */
     input->str.fPos += parsePos;

     /* we converted 1 arg */
     *argConverted = !info->fSkipArg;
     return parsePos + skipped;
 }

 static int32_t
 u_scanf_uinteger_handler(UFILE          *input,
                          u_scanf_spec_info *info,
                          ufmt_args      *args,
                          const UChar    *fmt,
                          int32_t        *fmtConsumed,
                          int32_t        *argConverted)
 {
     /* TODO Fix this when Numberformat handles uint64_t */
     return u_scanf_integer_handler(input, info, args, fmt, fmtConsumed, argConverted);
 }

 static int32_t
 u_scanf_percent_handler(UFILE       *input,
                         u_scanf_spec_info *info,
                         ufmt_args   *args,
                         const UChar *fmt,
                         int32_t     *fmtConsumed,
                         int32_t     *argConverted)
 {
     int32_t         len;
     double          num;
     UNumberFormat   *format;
     int32_t         parsePos    = 0;
     int32_t         skipped;
     UErrorCode      status      = U_ZERO_ERROR;


     /* skip all ws in the input */
     skipped = u_scanf_skip_leading_ws(input, info->fPadChar);

     /* fill the input's internal buffer */
     ufile_fill_uchar_buffer(input);

     /* determine the size of the input's buffer */
     len = (int32_t)(input->str.fLimit - input->str.fPos);

     /* truncate to the width, if specified */
     if(info->fWidth != -1)
         len = ufmt_min(len, info->fWidth);

     /* get the formatter */
     format = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_PERCENT);

     /* handle error */
     if(format == 0)
         return 0;

     /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */
     skipped += u_scanf_skip_leading_positive_sign(input, format, &status);

     /* parse the number */
     num = unum_parseDouble(format, input->str.fPos, len, &parsePos, &status);

     if (!info->fSkipArg) {
         *(double*)(args[0].ptrValue) = num;
     }

     /* mask off any necessary bits */
     /*  if(! info->fIsLong_double)
     num &= DBL_MAX;*/

     /* update the input's position to reflect consumed data */
     input->str.fPos += parsePos;

     /* we converted 1 arg */
     *argConverted = !info->fSkipArg;
     return parsePos;
 }

 static int32_t
 u_scanf_string_handler(UFILE        *input,
                        u_scanf_spec_info *info,
                        ufmt_args    *args,
                        const UChar  *fmt,
                        int32_t      *fmtConsumed,
                        int32_t      *argConverted)
 {
     const UChar *source;
     UConverter  *conv;
     char        *arg    = (char*)(args[0].ptrValue);
     char        *alias  = arg;
     char        *limit;
     UErrorCode  status  = U_ZERO_ERROR;
     int32_t     count;
     int32_t     skipped = 0;
     UChar       c;
     UBool       isNotEOF = FALSE;

     /* skip all ws in the input */
     if (info->fIsString) {
         skipped = u_scanf_skip_leading_ws(input, info->fPadChar);
     }

     /* get the string one character at a time, truncating to the width */
     count = 0;

     /* open the default converter */
     conv = u_getDefaultConverter(&status);

     if(U_FAILURE(status))
         return -1;

     while( (info->fWidth == -1 || count < info->fWidth)
         && (isNotEOF = ufile_getch(input, &c))
         && (!info->fIsString || (c != info->fPadChar && !u_isWhitespace(c))))
     {

         if (!info->fSkipArg) {
             /* put the character from the input onto the target */
             source = &c;
             /* Since we do this one character at a time, do it this way. */
             if (info->fWidth > 0) {
                 limit = alias + info->fWidth - count;
             }
             else {
                 limit = alias + ucnv_getMaxCharSize(conv);
             }

             /* convert the character to the default codepage */
             ucnv_fromUnicode(conv, &alias, limit, &source, source + 1,
                 NULL, TRUE, &status);

             if(U_FAILURE(status)) {
                 /* clean up */
                 u_releaseDefaultConverter(conv);
                 return -1;
             }
         }

         /* increment the count */
         ++count;
     }

     /* put the final character we read back on the input */
     if (!info->fSkipArg) {
         if ((info->fWidth == -1 || count < info->fWidth) && isNotEOF)
             u_fungetc(c, input);

         /* add the terminator */
         if (info->fIsString) {
             *alias = 0x00;
         }
     }

     /* clean up */
     u_releaseDefaultConverter(conv);

     /* we converted 1 arg */
     *argConverted = !info->fSkipArg;
     return count + skipped;
 }

 static int32_t
 u_scanf_char_handler(UFILE          *input,
                      u_scanf_spec_info *info,
                      ufmt_args      *args,
                      const UChar    *fmt,
                      int32_t        *fmtConsumed,
                      int32_t        *argConverted)
 {
     if (info->fWidth < 0) {
         info->fWidth = 1;
     }
     info->fIsString = FALSE;
     return u_scanf_string_handler(input, info, args, fmt, fmtConsumed, argConverted);
 }

 static int32_t
 u_scanf_ustring_handler(UFILE       *input,
                         u_scanf_spec_info *info,
                         ufmt_args   *args,
                         const UChar *fmt,
                         int32_t     *fmtConsumed,
                         int32_t     *argConverted)
 {
     UChar   *arg     = (UChar*)(args[0].ptrValue);
     UChar   *alias     = arg;
     int32_t count;
     int32_t skipped = 0;
     UChar   c;
     UBool   isNotEOF = FALSE;

     /* skip all ws in the input */
     if (info->fIsString) {
         skipped = u_scanf_skip_leading_ws(input, info->fPadChar);
     }

     /* get the string one character at a time, truncating to the width */
     count = 0;

     while( (info->fWidth == -1 || count < info->fWidth)
         && (isNotEOF = ufile_getch(input, &c))
         && (!info->fIsString || (c != info->fPadChar && !u_isWhitespace(c))))
     {

         /* put the character from the input onto the target */
         if (!info->fSkipArg) {
             *alias++ = c;
         }

         /* increment the count */
         ++count;
     }

     /* put the final character we read back on the input */
     if (!info->fSkipArg) {
         if((info->fWidth == -1 || count < info->fWidth) && isNotEOF) {
             u_fungetc(c, input);
         }

         /* add the terminator */
         if (info->fIsString) {
             *alias = 0x0000;
         }
     }

     /* we converted 1 arg */
     *argConverted = !info->fSkipArg;
     return count + skipped;
 }

 static int32_t
 u_scanf_uchar_handler(UFILE         *input,
                       u_scanf_spec_info *info,
                       ufmt_args     *args,
                       const UChar   *fmt,
                       int32_t       *fmtConsumed,
                       int32_t       *argConverted)
 {
     if (info->fWidth < 0) {
         info->fWidth = 1;
     }
     info->fIsString = FALSE;
     return u_scanf_ustring_handler(input, info, args, fmt, fmtConsumed, argConverted);
 }

 static int32_t
 u_scanf_spellout_handler(UFILE          *input,
                          u_scanf_spec_info *info,
                          ufmt_args      *args,
                          const UChar    *fmt,
                          int32_t        *fmtConsumed,
                          int32_t        *argConverted)
 {
     int32_t         len;
     double          num;
     UNumberFormat   *format;
     int32_t         parsePos    = 0;
     int32_t         skipped;
     UErrorCode      status      = U_ZERO_ERROR;


     /* skip all ws in the input */
     skipped = u_scanf_skip_leading_ws(input, info->fPadChar);

     /* fill the input's internal buffer */
     ufile_fill_uchar_buffer(input);

     /* determine the size of the input's buffer */
     len = (int32_t)(input->str.fLimit - input->str.fPos);

     /* truncate to the width, if specified */
     if(info->fWidth != -1)
         len = ufmt_min(len, info->fWidth);

     /* get the formatter */
     format = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_SPELLOUT);

     /* handle error */
     if(format == 0)
         return 0;

     /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */
     /* This is not applicable to RBNF. */
     /*skipped += u_scanf_skip_leading_positive_sign(input, format, &status);*/

     /* parse the number */
     num = unum_parseDouble(format, input->str.fPos, len, &parsePos, &status);

     if (!info->fSkipArg) {
         *(double*)(args[0].ptrValue) = num;
     }

     /* mask off any necessary bits */
     /*  if(! info->fIsLong_double)
     num &= DBL_MAX;*/

     /* update the input's position to reflect consumed data */
     input->str.fPos += parsePos;

     /* we converted 1 arg */
     *argConverted = !info->fSkipArg;
     return parsePos + skipped;
 }

 static int32_t
 u_scanf_hex_handler(UFILE       *input,
                     u_scanf_spec_info *info,
                     ufmt_args   *args,
                     const UChar *fmt,
                     int32_t     *fmtConsumed,
                     int32_t     *argConverted)
 {
     int32_t     len;
     int32_t     skipped;
     void        *num    = (void*) (args[0].ptrValue);
     int64_t     result;

     /* skip all ws in the input */
     skipped = u_scanf_skip_leading_ws(input, info->fPadChar);

     /* fill the input's internal buffer */
     ufile_fill_uchar_buffer(input);

     /* determine the size of the input's buffer */
     len = (int32_t)(input->str.fLimit - input->str.fPos);

     /* truncate to the width, if specified */
     if(info->fWidth != -1)
         len = ufmt_min(len, info->fWidth);

     /* check for alternate form */
     if( *(input->str.fPos) == 0x0030 &&
         (*(input->str.fPos + 1) == 0x0078 || *(input->str.fPos + 1) == 0x0058) ) {

         /* skip the '0' and 'x' or 'X' if present */
         input->str.fPos += 2;
         len -= 2;
     }

     /* parse the number */
     result = ufmt_uto64(input->str.fPos, &len, 16);

     /* update the input's position to reflect consumed data */
     input->str.fPos += len;

     /* mask off any necessary bits */
     if (!info->fSkipArg) {
         if (info->fIsShort)
             *(int16_t*)num = (int16_t)(UINT16_MAX & result);
         else if (info->fIsLongLong)
             *(int64_t*)num = result;
         else
             *(int32_t*)num = (int32_t)(UINT32_MAX & result);
     }

     /* we converted 1 arg */
     *argConverted = !info->fSkipArg;
     return len + skipped;
 }

 static int32_t
 u_scanf_octal_handler(UFILE         *input,
                       u_scanf_spec_info *info,
                       ufmt_args     *args,
                       const UChar   *fmt,
                       int32_t       *fmtConsumed,
                       int32_t       *argConverted)
 {
     int32_t     len;
     int32_t     skipped;
     void        *num         = (void*) (args[0].ptrValue);
     int64_t     result;

     /* skip all ws in the input */
     skipped = u_scanf_skip_leading_ws(input, info->fPadChar);

     /* fill the input's internal buffer */
     ufile_fill_uchar_buffer(input);

     /* determine the size of the input's buffer */
     len = (int32_t)(input->str.fLimit - input->str.fPos);

     /* truncate to the width, if specified */
     if(info->fWidth != -1)
         len = ufmt_min(len, info->fWidth);

     /* parse the number */
     result = ufmt_uto64(input->str.fPos, &len, 8);

     /* update the input's position to reflect consumed data */
     input->str.fPos += len;

     /* mask off any necessary bits */
     if (!info->fSkipArg) {
         if (info->fIsShort)
             *(int16_t*)num = (int16_t)(UINT16_MAX & result);
         else if (info->fIsLongLong)
             *(int64_t*)num = result;
         else
             *(int32_t*)num = (int32_t)(UINT32_MAX & result);
     }

     /* we converted 1 arg */
     *argConverted = !info->fSkipArg;
     return len + skipped;
 }

 static int32_t
 u_scanf_pointer_handler(UFILE       *input,
                         u_scanf_spec_info *info,
                         ufmt_args   *args,
                         const UChar *fmt,
                         int32_t     *fmtConsumed,
                         int32_t     *argConverted)
 {
     int32_t len;
     int32_t skipped;
     void    *result;
     void    **p     = (void**)(args[0].ptrValue);


     /* skip all ws in the input */
     skipped = u_scanf_skip_leading_ws(input, info->fPadChar);

     /* fill the input's internal buffer */
     ufile_fill_uchar_buffer(input);

     /* determine the size of the input's buffer */
     len = (int32_t)(input->str.fLimit - input->str.fPos);

     /* truncate to the width, if specified */
     if(info->fWidth != -1) {
         len = ufmt_min(len, info->fWidth);
     }

     /* Make sure that we don't consume too much */
     if (len > (int32_t)(sizeof(void*)*2)) {
         len = (int32_t)(sizeof(void*)*2);
     }

     /* parse the pointer - assign to temporary value */
     result = ufmt_utop(input->str.fPos, &len);

     if (!info->fSkipArg) {
         *p = result;
     }

     /* update the input's position to reflect consumed data */
     input->str.fPos += len;

     /* we converted 1 arg */
     *argConverted = !info->fSkipArg;
     return len + skipped;
 }

 static int32_t
 u_scanf_scanset_handler(UFILE       *input,
                         u_scanf_spec_info *info,
                         ufmt_args   *args,
                         const UChar *fmt,
                         int32_t     *fmtConsumed,
                         int32_t     *argConverted)
 {
     USet        *scanset;
     UErrorCode  status = U_ZERO_ERROR;
     int32_t     chLeft = INT32_MAX;
     UChar32     c;
     UChar       *alias = (UChar*) (args[0].ptrValue);
     UBool       isNotEOF = FALSE;
     UBool       readCharacter = FALSE;

     /* Create an empty set */
     scanset = uset_open(0, -1);

     /* Back up one to get the [ */
     fmt--;

     /* truncate to the width, if specified and alias the target */
     if(info->fWidth >= 0) {
         chLeft = info->fWidth;
     }

     /* parse the scanset from the fmt string */
     *fmtConsumed = uset_applyPattern(scanset, fmt, -1, 0, &status);

     /* verify that the parse was successful */
     if (U_SUCCESS(status)) {
         c=0;

         /* grab characters one at a time and make sure they are in the scanset */
         while(chLeft > 0) {
             if ((isNotEOF = ufile_getch32(input, &c)) && uset_contains(scanset, c)) {
                 readCharacter = TRUE;
                 if (!info->fSkipArg) {
                     int32_t idx = 0;
                     UBool isError = FALSE;

                     U16_APPEND(alias, idx, chLeft, c, isError);
                     if (isError) {
                         break;
                     }
                     alias += idx;
                 }
                 chLeft -= (1 + U_IS_SUPPLEMENTARY(c));
             }
             else {
                 /* if the character's not in the scanset, break out */
                 break;
             }
         }

         /* put the final character we read back on the input */
         if(isNotEOF && chLeft > 0) {
             u_fungetc(c, input);
         }
     }

     uset_close(scanset);

     /* if we didn't match at least 1 character, fail */
     if(!readCharacter)
         return -1;
     /* otherwise, add the terminator */
     else if (!info->fSkipArg) {
         *alias = 0x00;
     }

     /* we converted 1 arg */
     *argConverted = !info->fSkipArg;
     return (info->fWidth >= 0 ? info->fWidth : INT32_MAX) - chLeft;
 }

 /* Use US-ASCII characters only for formatting. Most codepages have
  characters 20-7F from Unicode. Using any other codepage specific
  characters will make it very difficult to format the string on
  non-Unicode machines */
 static const u_scanf_info g_u_scanf_infos[USCANF_NUM_FMT_HANDLERS] = {
 /* 0x20 */
     UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,
     UFMT_EMPTY,         UFMT_SIMPLE_PERCENT,UFMT_EMPTY,         UFMT_EMPTY,
     UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,
     UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,

 /* 0x30 */
     UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,
     UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,
     UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,
     UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,

 /* 0x40 */
     UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,         UFMT_UCHAR,
     UFMT_EMPTY,         UFMT_SCIENTIFIC,    UFMT_EMPTY,         UFMT_SCIDBL,
 #ifdef U_USE_OBSOLETE_IO_FORMATTING
     UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,         UFMT_UCHAR/*deprecated*/,
 #else
     UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,
 #endif
     UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,

 /* 0x50 */
     UFMT_PERCENT,       UFMT_EMPTY,         UFMT_EMPTY,         UFMT_USTRING,
 #ifdef U_USE_OBSOLETE_IO_FORMATTING
     UFMT_EMPTY,         UFMT_USTRING/*deprecated*/,UFMT_SPELLOUT,      UFMT_EMPTY,
 #else
     UFMT_EMPTY,         UFMT_EMPTY,         UFMT_SPELLOUT,      UFMT_EMPTY,
 #endif
     UFMT_HEX,           UFMT_EMPTY,         UFMT_EMPTY,         UFMT_SCANSET,
     UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,

 /* 0x60 */
     UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,         UFMT_CHAR,
     UFMT_INT,           UFMT_SCIENTIFIC,    UFMT_DOUBLE,        UFMT_SCIDBL,
     UFMT_EMPTY,         UFMT_INT,           UFMT_EMPTY,         UFMT_EMPTY,
     UFMT_EMPTY,         UFMT_EMPTY,         UFMT_COUNT,         UFMT_OCTAL,

 /* 0x70 */
     UFMT_POINTER,       UFMT_EMPTY,         UFMT_EMPTY,         UFMT_STRING,
     UFMT_EMPTY,         UFMT_UINT,          UFMT_EMPTY,         UFMT_EMPTY,
     UFMT_HEX,           UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,
     UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,
 };

 U_CFUNC int32_t
 u_scanf_parse(UFILE     *f,
             const UChar *patternSpecification,
             va_list     ap)
 {
     const UChar     *alias;
     int32_t         count, converted, argConsumed, cpConsumed;
     uint16_t        handlerNum;

     ufmt_args       args;
     u_scanf_spec    spec;
     ufmt_type_info  info;
     u_scanf_handler handler;

     /* alias the pattern */
     alias = patternSpecification;

     /* haven't converted anything yet */
     argConsumed = 0;
     converted = 0;
     cpConsumed = 0;

     /* iterate through the pattern */
     for(;;) {

         /* match any characters up to the next '%' */
         while(*alias != UP_PERCENT && *alias != 0x0000 && u_fgetc(f) == *alias) {
             alias++;
         }

         /* if we aren't at a '%', or if we're at end of string, break*/
         if(*alias != UP_PERCENT || *alias == 0x0000)
             break;

         /* parse the specifier */
         count = u_scanf_parse_spec(alias, &spec);

         /* update the pointer in pattern */
         alias += count;

         handlerNum = (uint16_t)(spec.fInfo.fSpec - USCANF_BASE_FMT_HANDLERS);
         if (handlerNum < USCANF_NUM_FMT_HANDLERS) {
             /* skip the argument, if necessary */
             /* query the info function for argument information */
             info = g_u_scanf_infos[ handlerNum ].info;
             if (info != ufmt_count && u_feof(f)) {
                 break;
             }
             else if(spec.fInfo.fSkipArg) {
                 args.ptrValue = NULL;
             }
             else {
                 switch(info) {
                 case ufmt_count:
                     /* set the spec's width to the # of items converted */
                     spec.fInfo.fWidth = cpConsumed;
                     /* fall through to next case */
                 case ufmt_char:
                 case ufmt_uchar:
                 case ufmt_int:
                 case ufmt_string:
                 case ufmt_ustring:
                 case ufmt_pointer:
                 case ufmt_float:
                 case ufmt_double:
                     args.ptrValue = va_arg(ap, void*);
                     break;

                 default:
                     /* else args is ignored */
                     args.ptrValue = NULL;
                     break;
                 }
             }

             /* call the handler function */
             handler = g_u_scanf_infos[ handlerNum ].handler;
             if(handler != 0) {

                 /* reset count to 1 so that += for alias works. */
                 count = 1;

                 cpConsumed += (*handler)(f, &spec.fInfo, &args, alias, &count, &argConsumed);

                 /* if the handler encountered an error condition, break */
                 if(argConsumed < 0) {
                     converted = -1;
                     break;
                 }

                 /* add to the # of items converted */
                 converted += argConsumed;

                 /* update the pointer in pattern */
                 alias += count-1;
             }
             /* else do nothing */
         }
         /* else do nothing */

         /* just ignore unknown tags */
     }

     /* return # of items converted */
     return converted;
 }

 #endif /* #if !UCONFIG_NO_FORMATTING */