| /* |
| * Copyright (C) 2015, International Business Machines |
| * Corporation and others. All Rights Reserved. |
| * |
| * file name: affixpatternparser.cpp |
| */ |
| |
| #include "unicode/utypes.h" |
| |
| #if !UCONFIG_NO_FORMATTING |
| |
| #include "starboard/client_porting/poem/assert_poem.h" |
| #include "starboard/client_porting/poem/string_poem.h" |
| #include "unicode/dcfmtsym.h" |
| #include "unicode/plurrule.h" |
| #include "unicode/ucurr.h" |
| #include "affixpatternparser.h" |
| #include "charstr.h" |
| #include "precision.h" |
| #include "uassert.h" |
| #include "unistrappender.h" |
| |
| static UChar gDefaultSymbols[] = {0xa4, 0xa4, 0xa4}; |
| |
| static UChar gPercent = 0x25; |
| static UChar gPerMill = 0x2030; |
| static UChar gNegative = 0x2D; |
| static UChar gPositive = 0x2B; |
| |
| #define PACK_TOKEN_AND_LENGTH(t, l) ((UChar) (((t) << 8) | (l & 0xFF))) |
| |
| #define UNPACK_TOKEN(c) ((AffixPattern::ETokenType) (((c) >> 8) & 0x7F)) |
| |
| #define UNPACK_LONG(c) (((c) >> 8) & 0x80) |
| |
| #define UNPACK_LENGTH(c) ((c) & 0xFF) |
| |
| U_NAMESPACE_BEGIN |
| |
| static int32_t |
| nextToken(const UChar *buffer, int32_t idx, int32_t len, UChar *token) { |
| if (buffer[idx] != 0x27 || idx + 1 == len) { |
| *token = buffer[idx]; |
| return 1; |
| } |
| *token = buffer[idx + 1]; |
| if (buffer[idx + 1] == 0xA4) { |
| int32_t i = 2; |
| for (; idx + i < len && i < 4 && buffer[idx + i] == buffer[idx + 1]; ++i); |
| return i; |
| } |
| return 2; |
| } |
| |
| static int32_t |
| nextUserToken(const UChar *buffer, int32_t idx, int32_t len, UChar *token) { |
| *token = buffer[idx]; |
| int32_t max; |
| switch (buffer[idx]) { |
| case 0x27: |
| max = 2; |
| break; |
| case 0xA4: |
| max = 3; |
| break; |
| default: |
| max = 1; |
| break; |
| } |
| int32_t i = 1; |
| for (; idx + i < len && i < max && buffer[idx + i] == buffer[idx]; ++i); |
| return i; |
| } |
| |
| CurrencyAffixInfo::CurrencyAffixInfo() |
| : fSymbol(gDefaultSymbols, 1), |
| fISO(gDefaultSymbols, 2), |
| fLong(DigitAffix(gDefaultSymbols, 3)), |
| fIsDefault(TRUE) { |
| } |
| |
| void |
| CurrencyAffixInfo::set( |
| const char *locale, |
| const PluralRules *rules, |
| const UChar *currency, |
| UErrorCode &status) { |
| if (U_FAILURE(status)) { |
| return; |
| } |
| fIsDefault = FALSE; |
| if (currency == NULL) { |
| fSymbol.setTo(gDefaultSymbols, 1); |
| fISO.setTo(gDefaultSymbols, 2); |
| fLong.remove(); |
| fLong.append(gDefaultSymbols, 3); |
| fIsDefault = TRUE; |
| return; |
| } |
| int32_t len; |
| UBool unusedIsChoice; |
| const UChar *symbol = ucurr_getName( |
| currency, locale, UCURR_SYMBOL_NAME, &unusedIsChoice, |
| &len, &status); |
| if (U_FAILURE(status)) { |
| return; |
| } |
| fSymbol.setTo(symbol, len); |
| fISO.setTo(currency, u_strlen(currency)); |
| fLong.remove(); |
| StringEnumeration* keywords = rules->getKeywords(status); |
| if (U_FAILURE(status)) { |
| return; |
| } |
| const UnicodeString* pluralCount; |
| while ((pluralCount = keywords->snext(status)) != NULL) { |
| CharString pCount; |
| pCount.appendInvariantChars(*pluralCount, status); |
| const UChar *pluralName = ucurr_getPluralName( |
| currency, locale, &unusedIsChoice, pCount.data(), |
| &len, &status); |
| fLong.setVariant(pCount.data(), UnicodeString(pluralName, len), status); |
| } |
| delete keywords; |
| } |
| |
| void |
| CurrencyAffixInfo::adjustPrecision( |
| const UChar *currency, const UCurrencyUsage usage, |
| FixedPrecision &precision, UErrorCode &status) { |
| if (U_FAILURE(status)) { |
| return; |
| } |
| |
| int32_t digitCount = ucurr_getDefaultFractionDigitsForUsage( |
| currency, usage, &status); |
| precision.fMin.setFracDigitCount(digitCount); |
| precision.fMax.setFracDigitCount(digitCount); |
| double increment = ucurr_getRoundingIncrementForUsage( |
| currency, usage, &status); |
| if (increment == 0.0) { |
| precision.fRoundingIncrement.clear(); |
| } else { |
| precision.fRoundingIncrement.set(increment); |
| // guard against round-off error |
| precision.fRoundingIncrement.round(6); |
| } |
| } |
| |
| void |
| AffixPattern::addLiteral( |
| const UChar *literal, int32_t start, int32_t len) { |
| char32Count += u_countChar32(literal + start, len); |
| literals.append(literal, start, len); |
| int32_t tlen = tokens.length(); |
| // Takes 4 UChars to encode maximum literal length. |
| UChar *tokenChars = tokens.getBuffer(tlen + 4); |
| |
| // find start of literal size. May be tlen if there is no literal. |
| // While finding start of literal size, compute literal length |
| int32_t literalLength = 0; |
| int32_t tLiteralStart = tlen; |
| while (tLiteralStart > 0 && UNPACK_TOKEN(tokenChars[tLiteralStart - 1]) == kLiteral) { |
| tLiteralStart--; |
| literalLength <<= 8; |
| literalLength |= UNPACK_LENGTH(tokenChars[tLiteralStart]); |
| } |
| // Add number of chars we just added to literal |
| literalLength += len; |
| |
| // Now encode the new length starting at tLiteralStart |
| tlen = tLiteralStart; |
| tokenChars[tlen++] = PACK_TOKEN_AND_LENGTH(kLiteral, literalLength & 0xFF); |
| literalLength >>= 8; |
| while (literalLength) { |
| tokenChars[tlen++] = PACK_TOKEN_AND_LENGTH(kLiteral | 0x80, literalLength & 0xFF); |
| literalLength >>= 8; |
| } |
| tokens.releaseBuffer(tlen); |
| } |
| |
| void |
| AffixPattern::add(ETokenType t) { |
| add(t, 1); |
| } |
| |
| void |
| AffixPattern::addCurrency(uint8_t count) { |
| add(kCurrency, count); |
| } |
| |
| void |
| AffixPattern::add(ETokenType t, uint8_t count) { |
| U_ASSERT(t != kLiteral); |
| char32Count += count; |
| switch (t) { |
| case kCurrency: |
| hasCurrencyToken = TRUE; |
| break; |
| case kPercent: |
| hasPercentToken = TRUE; |
| break; |
| case kPerMill: |
| hasPermillToken = TRUE; |
| break; |
| default: |
| // Do nothing |
| break; |
| } |
| tokens.append(PACK_TOKEN_AND_LENGTH(t, count)); |
| } |
| |
| AffixPattern & |
| AffixPattern::append(const AffixPattern &other) { |
| AffixPatternIterator iter; |
| other.iterator(iter); |
| UnicodeString literal; |
| while (iter.nextToken()) { |
| switch (iter.getTokenType()) { |
| case kLiteral: |
| iter.getLiteral(literal); |
| addLiteral(literal.getBuffer(), 0, literal.length()); |
| break; |
| case kCurrency: |
| addCurrency(iter.getTokenLength()); |
| break; |
| default: |
| add(iter.getTokenType()); |
| break; |
| } |
| } |
| return *this; |
| } |
| |
| void |
| AffixPattern::remove() { |
| tokens.remove(); |
| literals.remove(); |
| hasCurrencyToken = FALSE; |
| hasPercentToken = FALSE; |
| hasPermillToken = FALSE; |
| char32Count = 0; |
| } |
| |
| // escapes literals for strings where special characters are NOT escaped |
| // except for apostrophe. |
| static void escapeApostropheInLiteral( |
| const UnicodeString &literal, UnicodeStringAppender &appender) { |
| int32_t len = literal.length(); |
| const UChar *buffer = literal.getBuffer(); |
| for (int32_t i = 0; i < len; ++i) { |
| UChar ch = buffer[i]; |
| switch (ch) { |
| case 0x27: |
| appender.append((UChar) 0x27); |
| appender.append((UChar) 0x27); |
| break; |
| default: |
| appender.append(ch); |
| break; |
| } |
| } |
| } |
| |
| |
| // escapes literals for user strings where special characters in literals |
| // are escaped with apostrophe. |
| static void escapeLiteral( |
| const UnicodeString &literal, UnicodeStringAppender &appender) { |
| int32_t len = literal.length(); |
| const UChar *buffer = literal.getBuffer(); |
| for (int32_t i = 0; i < len; ++i) { |
| UChar ch = buffer[i]; |
| switch (ch) { |
| case 0x27: |
| appender.append((UChar) 0x27); |
| appender.append((UChar) 0x27); |
| break; |
| case 0x25: |
| appender.append((UChar) 0x27); |
| appender.append((UChar) 0x25); |
| appender.append((UChar) 0x27); |
| break; |
| case 0x2030: |
| appender.append((UChar) 0x27); |
| appender.append((UChar) 0x2030); |
| appender.append((UChar) 0x27); |
| break; |
| case 0xA4: |
| appender.append((UChar) 0x27); |
| appender.append((UChar) 0xA4); |
| appender.append((UChar) 0x27); |
| break; |
| case 0x2D: |
| appender.append((UChar) 0x27); |
| appender.append((UChar) 0x2D); |
| appender.append((UChar) 0x27); |
| break; |
| case 0x2B: |
| appender.append((UChar) 0x27); |
| appender.append((UChar) 0x2B); |
| appender.append((UChar) 0x27); |
| break; |
| default: |
| appender.append(ch); |
| break; |
| } |
| } |
| } |
| |
| UnicodeString & |
| AffixPattern::toString(UnicodeString &appendTo) const { |
| AffixPatternIterator iter; |
| iterator(iter); |
| UnicodeStringAppender appender(appendTo); |
| UnicodeString literal; |
| while (iter.nextToken()) { |
| switch (iter.getTokenType()) { |
| case kLiteral: |
| escapeApostropheInLiteral(iter.getLiteral(literal), appender); |
| break; |
| case kPercent: |
| appender.append((UChar) 0x27); |
| appender.append((UChar) 0x25); |
| break; |
| case kPerMill: |
| appender.append((UChar) 0x27); |
| appender.append((UChar) 0x2030); |
| break; |
| case kCurrency: |
| { |
| appender.append((UChar) 0x27); |
| int32_t cl = iter.getTokenLength(); |
| for (int32_t i = 0; i < cl; ++i) { |
| appender.append((UChar) 0xA4); |
| } |
| } |
| break; |
| case kNegative: |
| appender.append((UChar) 0x27); |
| appender.append((UChar) 0x2D); |
| break; |
| case kPositive: |
| appender.append((UChar) 0x27); |
| appender.append((UChar) 0x2B); |
| break; |
| default: |
| U_ASSERT(FALSE); |
| break; |
| } |
| } |
| return appendTo; |
| } |
| |
| UnicodeString & |
| AffixPattern::toUserString(UnicodeString &appendTo) const { |
| AffixPatternIterator iter; |
| iterator(iter); |
| UnicodeStringAppender appender(appendTo); |
| UnicodeString literal; |
| while (iter.nextToken()) { |
| switch (iter.getTokenType()) { |
| case kLiteral: |
| escapeLiteral(iter.getLiteral(literal), appender); |
| break; |
| case kPercent: |
| appender.append((UChar) 0x25); |
| break; |
| case kPerMill: |
| appender.append((UChar) 0x2030); |
| break; |
| case kCurrency: |
| { |
| int32_t cl = iter.getTokenLength(); |
| for (int32_t i = 0; i < cl; ++i) { |
| appender.append((UChar) 0xA4); |
| } |
| } |
| break; |
| case kNegative: |
| appender.append((UChar) 0x2D); |
| break; |
| case kPositive: |
| appender.append((UChar) 0x2B); |
| break; |
| default: |
| U_ASSERT(FALSE); |
| break; |
| } |
| } |
| return appendTo; |
| } |
| |
| class AffixPatternAppender : public UMemory { |
| public: |
| AffixPatternAppender(AffixPattern &dest) : fDest(&dest), fIdx(0) { } |
| |
| inline void append(UChar x) { |
| if (fIdx == UPRV_LENGTHOF(fBuffer)) { |
| fDest->addLiteral(fBuffer, 0, fIdx); |
| fIdx = 0; |
| } |
| fBuffer[fIdx++] = x; |
| } |
| |
| inline void append(UChar32 x) { |
| if (fIdx >= UPRV_LENGTHOF(fBuffer) - 1) { |
| fDest->addLiteral(fBuffer, 0, fIdx); |
| fIdx = 0; |
| } |
| U16_APPEND_UNSAFE(fBuffer, fIdx, x); |
| } |
| |
| inline void flush() { |
| if (fIdx) { |
| fDest->addLiteral(fBuffer, 0, fIdx); |
| } |
| fIdx = 0; |
| } |
| |
| /** |
| * flush the buffer when we go out of scope. |
| */ |
| ~AffixPatternAppender() { |
| flush(); |
| } |
| private: |
| AffixPattern *fDest; |
| int32_t fIdx; |
| UChar fBuffer[32]; |
| AffixPatternAppender(const AffixPatternAppender &other); |
| AffixPatternAppender &operator=(const AffixPatternAppender &other); |
| }; |
| |
| |
| AffixPattern & |
| AffixPattern::parseUserAffixString( |
| const UnicodeString &affixStr, |
| AffixPattern &appendTo, |
| UErrorCode &status) { |
| if (U_FAILURE(status)) { |
| return appendTo; |
| } |
| int32_t len = affixStr.length(); |
| const UChar *buffer = affixStr.getBuffer(); |
| // 0 = not quoted; 1 = quoted. |
| int32_t state = 0; |
| AffixPatternAppender appender(appendTo); |
| for (int32_t i = 0; i < len; ) { |
| UChar token; |
| int32_t tokenSize = nextUserToken(buffer, i, len, &token); |
| i += tokenSize; |
| if (token == 0x27 && tokenSize == 1) { // quote |
| state = 1 - state; |
| continue; |
| } |
| if (state == 0) { |
| switch (token) { |
| case 0x25: |
| appender.flush(); |
| appendTo.add(kPercent, 1); |
| break; |
| case 0x27: // double quote |
| appender.append((UChar) 0x27); |
| break; |
| case 0x2030: |
| appender.flush(); |
| appendTo.add(kPerMill, 1); |
| break; |
| case 0x2D: |
| appender.flush(); |
| appendTo.add(kNegative, 1); |
| break; |
| case 0x2B: |
| appender.flush(); |
| appendTo.add(kPositive, 1); |
| break; |
| case 0xA4: |
| appender.flush(); |
| appendTo.add(kCurrency, tokenSize); |
| break; |
| default: |
| appender.append(token); |
| break; |
| } |
| } else { |
| switch (token) { |
| case 0x27: // double quote |
| appender.append((UChar) 0x27); |
| break; |
| case 0xA4: // included b/c tokenSize can be > 1 |
| for (int32_t j = 0; j < tokenSize; ++j) { |
| appender.append((UChar) 0xA4); |
| } |
| break; |
| default: |
| appender.append(token); |
| break; |
| } |
| } |
| } |
| return appendTo; |
| } |
| |
| AffixPattern & |
| AffixPattern::parseAffixString( |
| const UnicodeString &affixStr, |
| AffixPattern &appendTo, |
| UErrorCode &status) { |
| if (U_FAILURE(status)) { |
| return appendTo; |
| } |
| int32_t len = affixStr.length(); |
| const UChar *buffer = affixStr.getBuffer(); |
| for (int32_t i = 0; i < len; ) { |
| UChar token; |
| int32_t tokenSize = nextToken(buffer, i, len, &token); |
| if (tokenSize == 1) { |
| int32_t literalStart = i; |
| ++i; |
| while (i < len && (tokenSize = nextToken(buffer, i, len, &token)) == 1) { |
| ++i; |
| } |
| appendTo.addLiteral(buffer, literalStart, i - literalStart); |
| |
| // If we reached end of string, we are done |
| if (i == len) { |
| return appendTo; |
| } |
| } |
| i += tokenSize; |
| switch (token) { |
| case 0x25: |
| appendTo.add(kPercent, 1); |
| break; |
| case 0x2030: |
| appendTo.add(kPerMill, 1); |
| break; |
| case 0x2D: |
| appendTo.add(kNegative, 1); |
| break; |
| case 0x2B: |
| appendTo.add(kPositive, 1); |
| break; |
| case 0xA4: |
| { |
| if (tokenSize - 1 > 3) { |
| status = U_PARSE_ERROR; |
| return appendTo; |
| } |
| appendTo.add(kCurrency, tokenSize - 1); |
| } |
| break; |
| default: |
| appendTo.addLiteral(&token, 0, 1); |
| break; |
| } |
| } |
| return appendTo; |
| } |
| |
| AffixPatternIterator & |
| AffixPattern::iterator(AffixPatternIterator &result) const { |
| result.nextLiteralIndex = 0; |
| result.lastLiteralLength = 0; |
| result.nextTokenIndex = 0; |
| result.tokens = &tokens; |
| result.literals = &literals; |
| return result; |
| } |
| |
| UBool |
| AffixPatternIterator::nextToken() { |
| int32_t tlen = tokens->length(); |
| if (nextTokenIndex == tlen) { |
| return FALSE; |
| } |
| ++nextTokenIndex; |
| const UChar *tokenBuffer = tokens->getBuffer(); |
| if (UNPACK_TOKEN(tokenBuffer[nextTokenIndex - 1]) == |
| AffixPattern::kLiteral) { |
| while (nextTokenIndex < tlen && |
| UNPACK_LONG(tokenBuffer[nextTokenIndex])) { |
| ++nextTokenIndex; |
| } |
| lastLiteralLength = 0; |
| int32_t i = nextTokenIndex - 1; |
| for (; UNPACK_LONG(tokenBuffer[i]); --i) { |
| lastLiteralLength <<= 8; |
| lastLiteralLength |= UNPACK_LENGTH(tokenBuffer[i]); |
| } |
| lastLiteralLength <<= 8; |
| lastLiteralLength |= UNPACK_LENGTH(tokenBuffer[i]); |
| nextLiteralIndex += lastLiteralLength; |
| } |
| return TRUE; |
| } |
| |
| AffixPattern::ETokenType |
| AffixPatternIterator::getTokenType() const { |
| return UNPACK_TOKEN(tokens->charAt(nextTokenIndex - 1)); |
| } |
| |
| UnicodeString & |
| AffixPatternIterator::getLiteral(UnicodeString &result) const { |
| const UChar *buffer = literals->getBuffer(); |
| result.setTo(buffer + (nextLiteralIndex - lastLiteralLength), lastLiteralLength); |
| return result; |
| } |
| |
| int32_t |
| AffixPatternIterator::getTokenLength() const { |
| const UChar *tokenBuffer = tokens->getBuffer(); |
| AffixPattern::ETokenType type = UNPACK_TOKEN(tokenBuffer[nextTokenIndex - 1]); |
| return type == AffixPattern::kLiteral ? lastLiteralLength : UNPACK_LENGTH(tokenBuffer[nextTokenIndex - 1]); |
| } |
| |
| AffixPatternParser::AffixPatternParser() |
| : fPercent(gPercent), fPermill(gPerMill), fNegative(gNegative), fPositive(gPositive) { |
| } |
| |
| AffixPatternParser::AffixPatternParser( |
| const DecimalFormatSymbols &symbols) { |
| setDecimalFormatSymbols(symbols); |
| } |
| |
| void |
| AffixPatternParser::setDecimalFormatSymbols( |
| const DecimalFormatSymbols &symbols) { |
| fPercent = symbols.getConstSymbol(DecimalFormatSymbols::kPercentSymbol); |
| fPermill = symbols.getConstSymbol(DecimalFormatSymbols::kPerMillSymbol); |
| fNegative = symbols.getConstSymbol(DecimalFormatSymbols::kMinusSignSymbol); |
| fPositive = symbols.getConstSymbol(DecimalFormatSymbols::kPlusSignSymbol); |
| } |
| |
| PluralAffix & |
| AffixPatternParser::parse( |
| const AffixPattern &affixPattern, |
| const CurrencyAffixInfo ¤cyAffixInfo, |
| PluralAffix &appendTo, |
| UErrorCode &status) const { |
| if (U_FAILURE(status)) { |
| return appendTo; |
| } |
| AffixPatternIterator iter; |
| affixPattern.iterator(iter); |
| UnicodeString literal; |
| while (iter.nextToken()) { |
| switch (iter.getTokenType()) { |
| case AffixPattern::kPercent: |
| appendTo.append(fPercent, UNUM_PERCENT_FIELD); |
| break; |
| case AffixPattern::kPerMill: |
| appendTo.append(fPermill, UNUM_PERMILL_FIELD); |
| break; |
| case AffixPattern::kNegative: |
| appendTo.append(fNegative, UNUM_SIGN_FIELD); |
| break; |
| case AffixPattern::kPositive: |
| appendTo.append(fPositive, UNUM_SIGN_FIELD); |
| break; |
| case AffixPattern::kCurrency: |
| switch (iter.getTokenLength()) { |
| case 1: |
| appendTo.append( |
| currencyAffixInfo.getSymbol(), UNUM_CURRENCY_FIELD); |
| break; |
| case 2: |
| appendTo.append( |
| currencyAffixInfo.getISO(), UNUM_CURRENCY_FIELD); |
| break; |
| case 3: |
| appendTo.append( |
| currencyAffixInfo.getLong(), UNUM_CURRENCY_FIELD, status); |
| break; |
| default: |
| U_ASSERT(FALSE); |
| break; |
| } |
| break; |
| case AffixPattern::kLiteral: |
| appendTo.append(iter.getLiteral(literal)); |
| break; |
| default: |
| U_ASSERT(FALSE); |
| break; |
| } |
| } |
| return appendTo; |
| } |
| |
| |
| U_NAMESPACE_END |
| #endif /* #if !UCONFIG_NO_FORMATTING */ |