| // © 2017 and later: Unicode, Inc. and others. |
| // License & terms of use: http://www.unicode.org/copyright.html |
| |
| #include "unicode/utypes.h" |
| |
| #if !UCONFIG_NO_FORMATTING |
| |
| // Allow implicit conversion from char16_t* to UnicodeString for this file: |
| // Helpful in toString methods and elsewhere. |
| #define UNISTR_FROM_STRING_EXPLICIT |
| #define UNISTR_FROM_CHAR_EXPLICIT |
| |
| #include "uassert.h" |
| #include "number_patternstring.h" |
| #include "unicode/utf16.h" |
| #include "number_utils.h" |
| #include "number_roundingutils.h" |
| #include "number_mapper.h" |
| |
| using namespace icu; |
| using namespace icu::number; |
| using namespace icu::number::impl; |
| |
| |
| void PatternParser::parseToPatternInfo(const UnicodeString& patternString, ParsedPatternInfo& patternInfo, |
| UErrorCode& status) { |
| patternInfo.consumePattern(patternString, status); |
| } |
| |
| DecimalFormatProperties |
| PatternParser::parseToProperties(const UnicodeString& pattern, IgnoreRounding ignoreRounding, |
| UErrorCode& status) { |
| DecimalFormatProperties properties; |
| parseToExistingPropertiesImpl(pattern, properties, ignoreRounding, status); |
| return properties; |
| } |
| |
| DecimalFormatProperties PatternParser::parseToProperties(const UnicodeString& pattern, |
| UErrorCode& status) { |
| return parseToProperties(pattern, IGNORE_ROUNDING_NEVER, status); |
| } |
| |
| void |
| PatternParser::parseToExistingProperties(const UnicodeString& pattern, DecimalFormatProperties& properties, |
| IgnoreRounding ignoreRounding, UErrorCode& status) { |
| parseToExistingPropertiesImpl(pattern, properties, ignoreRounding, status); |
| } |
| |
| |
| char16_t ParsedPatternInfo::charAt(int32_t flags, int32_t index) const { |
| const Endpoints& endpoints = getEndpoints(flags); |
| if (index < 0 || index >= endpoints.end - endpoints.start) { |
| UPRV_UNREACHABLE; |
| } |
| return pattern.charAt(endpoints.start + index); |
| } |
| |
| int32_t ParsedPatternInfo::length(int32_t flags) const { |
| return getLengthFromEndpoints(getEndpoints(flags)); |
| } |
| |
| int32_t ParsedPatternInfo::getLengthFromEndpoints(const Endpoints& endpoints) { |
| return endpoints.end - endpoints.start; |
| } |
| |
| UnicodeString ParsedPatternInfo::getString(int32_t flags) const { |
| const Endpoints& endpoints = getEndpoints(flags); |
| if (endpoints.start == endpoints.end) { |
| return UnicodeString(); |
| } |
| // Create a new UnicodeString |
| return UnicodeString(pattern, endpoints.start, endpoints.end - endpoints.start); |
| } |
| |
| const Endpoints& ParsedPatternInfo::getEndpoints(int32_t flags) const { |
| bool prefix = (flags & AFFIX_PREFIX) != 0; |
| bool isNegative = (flags & AFFIX_NEGATIVE_SUBPATTERN) != 0; |
| bool padding = (flags & AFFIX_PADDING) != 0; |
| if (isNegative && padding) { |
| return negative.paddingEndpoints; |
| } else if (padding) { |
| return positive.paddingEndpoints; |
| } else if (prefix && isNegative) { |
| return negative.prefixEndpoints; |
| } else if (prefix) { |
| return positive.prefixEndpoints; |
| } else if (isNegative) { |
| return negative.suffixEndpoints; |
| } else { |
| return positive.suffixEndpoints; |
| } |
| } |
| |
| bool ParsedPatternInfo::positiveHasPlusSign() const { |
| return positive.hasPlusSign; |
| } |
| |
| bool ParsedPatternInfo::hasNegativeSubpattern() const { |
| return fHasNegativeSubpattern; |
| } |
| |
| bool ParsedPatternInfo::negativeHasMinusSign() const { |
| return negative.hasMinusSign; |
| } |
| |
| bool ParsedPatternInfo::hasCurrencySign() const { |
| return positive.hasCurrencySign || (fHasNegativeSubpattern && negative.hasCurrencySign); |
| } |
| |
| bool ParsedPatternInfo::containsSymbolType(AffixPatternType type, UErrorCode& status) const { |
| return AffixUtils::containsType(pattern, type, status); |
| } |
| |
| bool ParsedPatternInfo::hasBody() const { |
| return positive.integerTotal > 0; |
| } |
| |
| ///////////////////////////////////////////////////// |
| /// BEGIN RECURSIVE DESCENT PARSER IMPLEMENTATION /// |
| ///////////////////////////////////////////////////// |
| |
| UChar32 ParsedPatternInfo::ParserState::peek() { |
| if (offset == pattern.length()) { |
| return -1; |
| } else { |
| return pattern.char32At(offset); |
| } |
| } |
| |
| UChar32 ParsedPatternInfo::ParserState::next() { |
| int codePoint = peek(); |
| offset += U16_LENGTH(codePoint); |
| return codePoint; |
| } |
| |
| void ParsedPatternInfo::consumePattern(const UnicodeString& patternString, UErrorCode& status) { |
| if (U_FAILURE(status)) { return; } |
| this->pattern = patternString; |
| |
| // This class is not intended for writing twice! |
| // Use move assignment to overwrite instead. |
| U_ASSERT(state.offset == 0); |
| |
| // pattern := subpattern (';' subpattern)? |
| currentSubpattern = &positive; |
| consumeSubpattern(status); |
| if (U_FAILURE(status)) { return; } |
| if (state.peek() == u';') { |
| state.next(); // consume the ';' |
| // Don't consume the negative subpattern if it is empty (trailing ';') |
| if (state.peek() != -1) { |
| fHasNegativeSubpattern = true; |
| currentSubpattern = &negative; |
| consumeSubpattern(status); |
| if (U_FAILURE(status)) { return; } |
| } |
| } |
| if (state.peek() != -1) { |
| state.toParseException(u"Found unquoted special character"); |
| status = U_UNQUOTED_SPECIAL; |
| } |
| } |
| |
| void ParsedPatternInfo::consumeSubpattern(UErrorCode& status) { |
| // subpattern := literals? number exponent? literals? |
| consumePadding(PadPosition::UNUM_PAD_BEFORE_PREFIX, status); |
| if (U_FAILURE(status)) { return; } |
| consumeAffix(currentSubpattern->prefixEndpoints, status); |
| if (U_FAILURE(status)) { return; } |
| consumePadding(PadPosition::UNUM_PAD_AFTER_PREFIX, status); |
| if (U_FAILURE(status)) { return; } |
| consumeFormat(status); |
| if (U_FAILURE(status)) { return; } |
| consumeExponent(status); |
| if (U_FAILURE(status)) { return; } |
| consumePadding(PadPosition::UNUM_PAD_BEFORE_SUFFIX, status); |
| if (U_FAILURE(status)) { return; } |
| consumeAffix(currentSubpattern->suffixEndpoints, status); |
| if (U_FAILURE(status)) { return; } |
| consumePadding(PadPosition::UNUM_PAD_AFTER_SUFFIX, status); |
| if (U_FAILURE(status)) { return; } |
| } |
| |
| void ParsedPatternInfo::consumePadding(PadPosition paddingLocation, UErrorCode& status) { |
| if (state.peek() != u'*') { |
| return; |
| } |
| if (currentSubpattern->hasPadding) { |
| state.toParseException(u"Cannot have multiple pad specifiers"); |
| status = U_MULTIPLE_PAD_SPECIFIERS; |
| return; |
| } |
| currentSubpattern->paddingLocation = paddingLocation; |
| currentSubpattern->hasPadding = true; |
| state.next(); // consume the '*' |
| currentSubpattern->paddingEndpoints.start = state.offset; |
| consumeLiteral(status); |
| currentSubpattern->paddingEndpoints.end = state.offset; |
| } |
| |
| void ParsedPatternInfo::consumeAffix(Endpoints& endpoints, UErrorCode& status) { |
| // literals := { literal } |
| endpoints.start = state.offset; |
| while (true) { |
| switch (state.peek()) { |
| case u'#': |
| case u'@': |
| case u';': |
| case u'*': |
| case u'.': |
| case u',': |
| case u'0': |
| case u'1': |
| case u'2': |
| case u'3': |
| case u'4': |
| case u'5': |
| case u'6': |
| case u'7': |
| case u'8': |
| case u'9': |
| case -1: |
| // Characters that cannot appear unquoted in a literal |
| // break outer; |
| goto after_outer; |
| |
| case u'%': |
| currentSubpattern->hasPercentSign = true; |
| break; |
| |
| case u'‰': |
| currentSubpattern->hasPerMilleSign = true; |
| break; |
| |
| case u'¤': |
| currentSubpattern->hasCurrencySign = true; |
| break; |
| |
| case u'-': |
| currentSubpattern->hasMinusSign = true; |
| break; |
| |
| case u'+': |
| currentSubpattern->hasPlusSign = true; |
| break; |
| |
| default: |
| break; |
| } |
| consumeLiteral(status); |
| if (U_FAILURE(status)) { return; } |
| } |
| after_outer: |
| endpoints.end = state.offset; |
| } |
| |
| void ParsedPatternInfo::consumeLiteral(UErrorCode& status) { |
| if (state.peek() == -1) { |
| state.toParseException(u"Expected unquoted literal but found EOL"); |
| status = U_PATTERN_SYNTAX_ERROR; |
| return; |
| } else if (state.peek() == u'\'') { |
| state.next(); // consume the starting quote |
| while (state.peek() != u'\'') { |
| if (state.peek() == -1) { |
| state.toParseException(u"Expected quoted literal but found EOL"); |
| status = U_PATTERN_SYNTAX_ERROR; |
| return; |
| } else { |
| state.next(); // consume a quoted character |
| } |
| } |
| state.next(); // consume the ending quote |
| } else { |
| // consume a non-quoted literal character |
| state.next(); |
| } |
| } |
| |
| void ParsedPatternInfo::consumeFormat(UErrorCode& status) { |
| consumeIntegerFormat(status); |
| if (U_FAILURE(status)) { return; } |
| if (state.peek() == u'.') { |
| state.next(); // consume the decimal point |
| currentSubpattern->hasDecimal = true; |
| currentSubpattern->widthExceptAffixes += 1; |
| consumeFractionFormat(status); |
| if (U_FAILURE(status)) { return; } |
| } |
| } |
| |
| void ParsedPatternInfo::consumeIntegerFormat(UErrorCode& status) { |
| // Convenience reference: |
| ParsedSubpatternInfo& result = *currentSubpattern; |
| |
| while (true) { |
| switch (state.peek()) { |
| case u',': |
| result.widthExceptAffixes += 1; |
| result.groupingSizes <<= 16; |
| break; |
| |
| case u'#': |
| if (result.integerNumerals > 0) { |
| state.toParseException(u"# cannot follow 0 before decimal point"); |
| status = U_UNEXPECTED_TOKEN; |
| return; |
| } |
| result.widthExceptAffixes += 1; |
| result.groupingSizes += 1; |
| if (result.integerAtSigns > 0) { |
| result.integerTrailingHashSigns += 1; |
| } else { |
| result.integerLeadingHashSigns += 1; |
| } |
| result.integerTotal += 1; |
| break; |
| |
| case u'@': |
| if (result.integerNumerals > 0) { |
| state.toParseException(u"Cannot mix 0 and @"); |
| status = U_UNEXPECTED_TOKEN; |
| return; |
| } |
| if (result.integerTrailingHashSigns > 0) { |
| state.toParseException(u"Cannot nest # inside of a run of @"); |
| status = U_UNEXPECTED_TOKEN; |
| return; |
| } |
| result.widthExceptAffixes += 1; |
| result.groupingSizes += 1; |
| result.integerAtSigns += 1; |
| result.integerTotal += 1; |
| break; |
| |
| case u'0': |
| case u'1': |
| case u'2': |
| case u'3': |
| case u'4': |
| case u'5': |
| case u'6': |
| case u'7': |
| case u'8': |
| case u'9': |
| if (result.integerAtSigns > 0) { |
| state.toParseException(u"Cannot mix @ and 0"); |
| status = U_UNEXPECTED_TOKEN; |
| return; |
| } |
| result.widthExceptAffixes += 1; |
| result.groupingSizes += 1; |
| result.integerNumerals += 1; |
| result.integerTotal += 1; |
| if (!result.rounding.isZeroish() || state.peek() != u'0') { |
| result.rounding.appendDigit(static_cast<int8_t>(state.peek() - u'0'), 0, true); |
| } |
| break; |
| |
| default: |
| goto after_outer; |
| } |
| state.next(); // consume the symbol |
| } |
| |
| after_outer: |
| // Disallow patterns with a trailing ',' or with two ',' next to each other |
| auto grouping1 = static_cast<int16_t> (result.groupingSizes & 0xffff); |
| auto grouping2 = static_cast<int16_t> ((result.groupingSizes >> 16) & 0xffff); |
| auto grouping3 = static_cast<int16_t> ((result.groupingSizes >> 32) & 0xffff); |
| if (grouping1 == 0 && grouping2 != -1) { |
| state.toParseException(u"Trailing grouping separator is invalid"); |
| status = U_UNEXPECTED_TOKEN; |
| return; |
| } |
| if (grouping2 == 0 && grouping3 != -1) { |
| state.toParseException(u"Grouping width of zero is invalid"); |
| status = U_PATTERN_SYNTAX_ERROR; |
| return; |
| } |
| } |
| |
| void ParsedPatternInfo::consumeFractionFormat(UErrorCode& status) { |
| // Convenience reference: |
| ParsedSubpatternInfo& result = *currentSubpattern; |
| |
| int32_t zeroCounter = 0; |
| while (true) { |
| switch (state.peek()) { |
| case u'#': |
| result.widthExceptAffixes += 1; |
| result.fractionHashSigns += 1; |
| result.fractionTotal += 1; |
| zeroCounter++; |
| break; |
| |
| case u'0': |
| case u'1': |
| case u'2': |
| case u'3': |
| case u'4': |
| case u'5': |
| case u'6': |
| case u'7': |
| case u'8': |
| case u'9': |
| if (result.fractionHashSigns > 0) { |
| state.toParseException(u"0 cannot follow # after decimal point"); |
| status = U_UNEXPECTED_TOKEN; |
| return; |
| } |
| result.widthExceptAffixes += 1; |
| result.fractionNumerals += 1; |
| result.fractionTotal += 1; |
| if (state.peek() == u'0') { |
| zeroCounter++; |
| } else { |
| result.rounding |
| .appendDigit(static_cast<int8_t>(state.peek() - u'0'), zeroCounter, false); |
| zeroCounter = 0; |
| } |
| break; |
| |
| default: |
| return; |
| } |
| state.next(); // consume the symbol |
| } |
| } |
| |
| void ParsedPatternInfo::consumeExponent(UErrorCode& status) { |
| // Convenience reference: |
| ParsedSubpatternInfo& result = *currentSubpattern; |
| |
| if (state.peek() != u'E') { |
| return; |
| } |
| if ((result.groupingSizes & 0xffff0000L) != 0xffff0000L) { |
| state.toParseException(u"Cannot have grouping separator in scientific notation"); |
| status = U_MALFORMED_EXPONENTIAL_PATTERN; |
| return; |
| } |
| state.next(); // consume the E |
| result.widthExceptAffixes++; |
| if (state.peek() == u'+') { |
| state.next(); // consume the + |
| result.exponentHasPlusSign = true; |
| result.widthExceptAffixes++; |
| } |
| while (state.peek() == u'0') { |
| state.next(); // consume the 0 |
| result.exponentZeros += 1; |
| result.widthExceptAffixes++; |
| } |
| } |
| |
| /////////////////////////////////////////////////// |
| /// END RECURSIVE DESCENT PARSER IMPLEMENTATION /// |
| /////////////////////////////////////////////////// |
| |
| void PatternParser::parseToExistingPropertiesImpl(const UnicodeString& pattern, |
| DecimalFormatProperties& properties, |
| IgnoreRounding ignoreRounding, UErrorCode& status) { |
| if (pattern.length() == 0) { |
| // Backwards compatibility requires that we reset to the default values. |
| // TODO: Only overwrite the properties that "saveToProperties" normally touches? |
| properties.clear(); |
| return; |
| } |
| |
| ParsedPatternInfo patternInfo; |
| parseToPatternInfo(pattern, patternInfo, status); |
| if (U_FAILURE(status)) { return; } |
| patternInfoToProperties(properties, patternInfo, ignoreRounding, status); |
| } |
| |
| void |
| PatternParser::patternInfoToProperties(DecimalFormatProperties& properties, ParsedPatternInfo& patternInfo, |
| IgnoreRounding _ignoreRounding, UErrorCode& status) { |
| // Translate from PatternParseResult to Properties. |
| // Note that most data from "negative" is ignored per the specification of DecimalFormat. |
| |
| const ParsedSubpatternInfo& positive = patternInfo.positive; |
| |
| bool ignoreRounding; |
| if (_ignoreRounding == IGNORE_ROUNDING_NEVER) { |
| ignoreRounding = false; |
| } else if (_ignoreRounding == IGNORE_ROUNDING_IF_CURRENCY) { |
| ignoreRounding = positive.hasCurrencySign; |
| } else { |
| U_ASSERT(_ignoreRounding == IGNORE_ROUNDING_ALWAYS); |
| ignoreRounding = true; |
| } |
| |
| // Grouping settings |
| auto grouping1 = static_cast<int16_t> (positive.groupingSizes & 0xffff); |
| auto grouping2 = static_cast<int16_t> ((positive.groupingSizes >> 16) & 0xffff); |
| auto grouping3 = static_cast<int16_t> ((positive.groupingSizes >> 32) & 0xffff); |
| if (grouping2 != -1) { |
| properties.groupingSize = grouping1; |
| properties.groupingUsed = true; |
| } else { |
| properties.groupingSize = -1; |
| properties.groupingUsed = false; |
| } |
| if (grouping3 != -1) { |
| properties.secondaryGroupingSize = grouping2; |
| } else { |
| properties.secondaryGroupingSize = -1; |
| } |
| |
| // For backwards compatibility, require that the pattern emit at least one min digit. |
| int minInt, minFrac; |
| if (positive.integerTotal == 0 && positive.fractionTotal > 0) { |
| // patterns like ".##" |
| minInt = 0; |
| minFrac = uprv_max(1, positive.fractionNumerals); |
| } else if (positive.integerNumerals == 0 && positive.fractionNumerals == 0) { |
| // patterns like "#.##" |
| minInt = 1; |
| minFrac = 0; |
| } else { |
| minInt = positive.integerNumerals; |
| minFrac = positive.fractionNumerals; |
| } |
| |
| // Rounding settings |
| // Don't set basic rounding when there is a currency sign; defer to CurrencyUsage |
| if (positive.integerAtSigns > 0) { |
| properties.minimumFractionDigits = -1; |
| properties.maximumFractionDigits = -1; |
| properties.roundingIncrement = 0.0; |
| properties.minimumSignificantDigits = positive.integerAtSigns; |
| properties.maximumSignificantDigits = positive.integerAtSigns + positive.integerTrailingHashSigns; |
| } else if (!positive.rounding.isZeroish()) { |
| if (!ignoreRounding) { |
| properties.minimumFractionDigits = minFrac; |
| properties.maximumFractionDigits = positive.fractionTotal; |
| properties.roundingIncrement = positive.rounding.toDouble(); |
| } else { |
| properties.minimumFractionDigits = -1; |
| properties.maximumFractionDigits = -1; |
| properties.roundingIncrement = 0.0; |
| } |
| properties.minimumSignificantDigits = -1; |
| properties.maximumSignificantDigits = -1; |
| } else { |
| if (!ignoreRounding) { |
| properties.minimumFractionDigits = minFrac; |
| properties.maximumFractionDigits = positive.fractionTotal; |
| properties.roundingIncrement = 0.0; |
| } else { |
| properties.minimumFractionDigits = -1; |
| properties.maximumFractionDigits = -1; |
| properties.roundingIncrement = 0.0; |
| } |
| properties.minimumSignificantDigits = -1; |
| properties.maximumSignificantDigits = -1; |
| } |
| |
| // If the pattern ends with a '.' then force the decimal point. |
| if (positive.hasDecimal && positive.fractionTotal == 0) { |
| properties.decimalSeparatorAlwaysShown = true; |
| } else { |
| properties.decimalSeparatorAlwaysShown = false; |
| } |
| |
| // Scientific notation settings |
| if (positive.exponentZeros > 0) { |
| properties.exponentSignAlwaysShown = positive.exponentHasPlusSign; |
| properties.minimumExponentDigits = positive.exponentZeros; |
| if (positive.integerAtSigns == 0) { |
| // patterns without '@' can define max integer digits, used for engineering notation |
| properties.minimumIntegerDigits = positive.integerNumerals; |
| properties.maximumIntegerDigits = positive.integerTotal; |
| } else { |
| // patterns with '@' cannot define max integer digits |
| properties.minimumIntegerDigits = 1; |
| properties.maximumIntegerDigits = -1; |
| } |
| } else { |
| properties.exponentSignAlwaysShown = false; |
| properties.minimumExponentDigits = -1; |
| properties.minimumIntegerDigits = minInt; |
| properties.maximumIntegerDigits = -1; |
| } |
| |
| // Compute the affix patterns (required for both padding and affixes) |
| UnicodeString posPrefix = patternInfo.getString(AffixPatternProvider::AFFIX_PREFIX); |
| UnicodeString posSuffix = patternInfo.getString(0); |
| |
| // Padding settings |
| if (positive.hasPadding) { |
| // The width of the positive prefix and suffix templates are included in the padding |
| int paddingWidth = positive.widthExceptAffixes + |
| AffixUtils::estimateLength(posPrefix, status) + |
| AffixUtils::estimateLength(posSuffix, status); |
| properties.formatWidth = paddingWidth; |
| UnicodeString rawPaddingString = patternInfo.getString(AffixPatternProvider::AFFIX_PADDING); |
| if (rawPaddingString.length() == 1) { |
| properties.padString = rawPaddingString; |
| } else if (rawPaddingString.length() == 2) { |
| if (rawPaddingString.charAt(0) == u'\'') { |
| properties.padString.setTo(u"'", -1); |
| } else { |
| properties.padString = rawPaddingString; |
| } |
| } else { |
| properties.padString = UnicodeString(rawPaddingString, 1, rawPaddingString.length() - 2); |
| } |
| properties.padPosition = positive.paddingLocation; |
| } else { |
| properties.formatWidth = -1; |
| properties.padString.setToBogus(); |
| properties.padPosition.nullify(); |
| } |
| |
| // Set the affixes |
| // Always call the setter, even if the prefixes are empty, especially in the case of the |
| // negative prefix pattern, to prevent default values from overriding the pattern. |
| properties.positivePrefixPattern = posPrefix; |
| properties.positiveSuffixPattern = posSuffix; |
| if (patternInfo.fHasNegativeSubpattern) { |
| properties.negativePrefixPattern = patternInfo.getString( |
| AffixPatternProvider::AFFIX_NEGATIVE_SUBPATTERN | AffixPatternProvider::AFFIX_PREFIX); |
| properties.negativeSuffixPattern = patternInfo.getString( |
| AffixPatternProvider::AFFIX_NEGATIVE_SUBPATTERN); |
| } else { |
| properties.negativePrefixPattern.setToBogus(); |
| properties.negativeSuffixPattern.setToBogus(); |
| } |
| |
| // Set the magnitude multiplier |
| if (positive.hasPercentSign) { |
| properties.magnitudeMultiplier = 2; |
| } else if (positive.hasPerMilleSign) { |
| properties.magnitudeMultiplier = 3; |
| } else { |
| properties.magnitudeMultiplier = 0; |
| } |
| } |
| |
| /////////////////////////////////////////////////////////////////// |
| /// End PatternStringParser.java; begin PatternStringUtils.java /// |
| /////////////////////////////////////////////////////////////////// |
| |
| // Determine whether a given roundingIncrement should be ignored for formatting |
| // based on the current maxFrac value (maximum fraction digits). For example a |
| // roundingIncrement of 0.01 should be ignored if maxFrac is 1, but not if maxFrac |
| // is 2 or more. Note that roundingIncrements are rounded in significance, so |
| // a roundingIncrement of 0.006 is treated like 0.01 for this determination, i.e. |
| // it should not be ignored if maxFrac is 2 or more (but a roundingIncrement of |
| // 0.005 is treated like 0.001 for significance). This is the reason for the |
| // initial doubling below. |
| // roundIncr must be non-zero. |
| bool PatternStringUtils::ignoreRoundingIncrement(double roundIncr, int32_t maxFrac) { |
| if (maxFrac < 0) { |
| return false; |
| } |
| int32_t frac = 0; |
| roundIncr *= 2.0; |
| for (frac = 0; frac <= maxFrac && roundIncr <= 1.0; frac++, roundIncr *= 10.0); |
| return (frac > maxFrac); |
| } |
| |
| UnicodeString PatternStringUtils::propertiesToPatternString(const DecimalFormatProperties& properties, |
| UErrorCode& status) { |
| UnicodeString sb; |
| |
| // Convenience references |
| // The uprv_min() calls prevent DoS |
| int32_t dosMax = 100; |
| int32_t grouping1 = uprv_max(0, uprv_min(properties.groupingSize, dosMax)); |
| int32_t grouping2 = uprv_max(0, uprv_min(properties.secondaryGroupingSize, dosMax)); |
| bool useGrouping = properties.groupingUsed; |
| int32_t paddingWidth = uprv_min(properties.formatWidth, dosMax); |
| NullableValue<PadPosition> paddingLocation = properties.padPosition; |
| UnicodeString paddingString = properties.padString; |
| int32_t minInt = uprv_max(0, uprv_min(properties.minimumIntegerDigits, dosMax)); |
| int32_t maxInt = uprv_min(properties.maximumIntegerDigits, dosMax); |
| int32_t minFrac = uprv_max(0, uprv_min(properties.minimumFractionDigits, dosMax)); |
| int32_t maxFrac = uprv_min(properties.maximumFractionDigits, dosMax); |
| int32_t minSig = uprv_min(properties.minimumSignificantDigits, dosMax); |
| int32_t maxSig = uprv_min(properties.maximumSignificantDigits, dosMax); |
| bool alwaysShowDecimal = properties.decimalSeparatorAlwaysShown; |
| int32_t exponentDigits = uprv_min(properties.minimumExponentDigits, dosMax); |
| bool exponentShowPlusSign = properties.exponentSignAlwaysShown; |
| |
| AutoAffixPatternProvider affixProvider(properties, status); |
| |
| // Prefixes |
| sb.append(affixProvider.get().getString(AffixPatternProvider::AFFIX_POS_PREFIX)); |
| int32_t afterPrefixPos = sb.length(); |
| |
| // Figure out the grouping sizes. |
| if (!useGrouping) { |
| grouping1 = 0; |
| grouping2 = 0; |
| } else if (grouping1 == grouping2) { |
| grouping1 = 0; |
| } |
| int32_t groupingLength = grouping1 + grouping2 + 1; |
| |
| // Figure out the digits we need to put in the pattern. |
| double roundingInterval = properties.roundingIncrement; |
| UnicodeString digitsString; |
| int32_t digitsStringScale = 0; |
| if (maxSig != uprv_min(dosMax, -1)) { |
| // Significant Digits. |
| while (digitsString.length() < minSig) { |
| digitsString.append(u'@'); |
| } |
| while (digitsString.length() < maxSig) { |
| digitsString.append(u'#'); |
| } |
| } else if (roundingInterval != 0.0 && !ignoreRoundingIncrement(roundingInterval,maxFrac)) { |
| // Rounding Interval. |
| digitsStringScale = -roundingutils::doubleFractionLength(roundingInterval, nullptr); |
| // TODO: Check for DoS here? |
| DecimalQuantity incrementQuantity; |
| incrementQuantity.setToDouble(roundingInterval); |
| incrementQuantity.adjustMagnitude(-digitsStringScale); |
| incrementQuantity.roundToMagnitude(0, kDefaultMode, status); |
| UnicodeString str = incrementQuantity.toPlainString(); |
| if (str.charAt(0) == u'-') { |
| // TODO: Unsupported operation exception or fail silently? |
| digitsString.append(str, 1, str.length() - 1); |
| } else { |
| digitsString.append(str); |
| } |
| } |
| while (digitsString.length() + digitsStringScale < minInt) { |
| digitsString.insert(0, u'0'); |
| } |
| while (-digitsStringScale < minFrac) { |
| digitsString.append(u'0'); |
| digitsStringScale--; |
| } |
| |
| // Write the digits to the string builder |
| int32_t m0 = uprv_max(groupingLength, digitsString.length() + digitsStringScale); |
| m0 = (maxInt != dosMax) ? uprv_max(maxInt, m0) - 1 : m0 - 1; |
| int32_t mN = (maxFrac != dosMax) ? uprv_min(-maxFrac, digitsStringScale) : digitsStringScale; |
| for (int32_t magnitude = m0; magnitude >= mN; magnitude--) { |
| int32_t di = digitsString.length() + digitsStringScale - magnitude - 1; |
| if (di < 0 || di >= digitsString.length()) { |
| sb.append(u'#'); |
| } else { |
| sb.append(digitsString.charAt(di)); |
| } |
| // Decimal separator |
| if (magnitude == 0 && (alwaysShowDecimal || mN < 0)) { |
| sb.append(u'.'); |
| } |
| if (!useGrouping) { |
| continue; |
| } |
| // Least-significant grouping separator |
| if (magnitude > 0 && magnitude == grouping1) { |
| sb.append(u','); |
| } |
| // All other grouping separators |
| if (magnitude > grouping1 && grouping2 > 0 && (magnitude - grouping1) % grouping2 == 0) { |
| sb.append(u','); |
| } |
| } |
| |
| // Exponential notation |
| if (exponentDigits != uprv_min(dosMax, -1)) { |
| sb.append(u'E'); |
| if (exponentShowPlusSign) { |
| sb.append(u'+'); |
| } |
| for (int32_t i = 0; i < exponentDigits; i++) { |
| sb.append(u'0'); |
| } |
| } |
| |
| // Suffixes |
| int32_t beforeSuffixPos = sb.length(); |
| sb.append(affixProvider.get().getString(AffixPatternProvider::AFFIX_POS_SUFFIX)); |
| |
| // Resolve Padding |
| if (paddingWidth > 0 && !paddingLocation.isNull()) { |
| while (paddingWidth - sb.length() > 0) { |
| sb.insert(afterPrefixPos, u'#'); |
| beforeSuffixPos++; |
| } |
| int32_t addedLength; |
| switch (paddingLocation.get(status)) { |
| case PadPosition::UNUM_PAD_BEFORE_PREFIX: |
| addedLength = escapePaddingString(paddingString, sb, 0, status); |
| sb.insert(0, u'*'); |
| afterPrefixPos += addedLength + 1; |
| beforeSuffixPos += addedLength + 1; |
| break; |
| case PadPosition::UNUM_PAD_AFTER_PREFIX: |
| addedLength = escapePaddingString(paddingString, sb, afterPrefixPos, status); |
| sb.insert(afterPrefixPos, u'*'); |
| afterPrefixPos += addedLength + 1; |
| beforeSuffixPos += addedLength + 1; |
| break; |
| case PadPosition::UNUM_PAD_BEFORE_SUFFIX: |
| escapePaddingString(paddingString, sb, beforeSuffixPos, status); |
| sb.insert(beforeSuffixPos, u'*'); |
| break; |
| case PadPosition::UNUM_PAD_AFTER_SUFFIX: |
| sb.append(u'*'); |
| escapePaddingString(paddingString, sb, sb.length(), status); |
| break; |
| } |
| if (U_FAILURE(status)) { return sb; } |
| } |
| |
| // Negative affixes |
| // Ignore if the negative prefix pattern is "-" and the negative suffix is empty |
| if (affixProvider.get().hasNegativeSubpattern()) { |
| sb.append(u';'); |
| sb.append(affixProvider.get().getString(AffixPatternProvider::AFFIX_NEG_PREFIX)); |
| // Copy the positive digit format into the negative. |
| // This is optional; the pattern is the same as if '#' were appended here instead. |
| // NOTE: It is not safe to append the UnicodeString to itself, so we need to copy. |
| // See http://bugs.icu-project.org/trac/ticket/13707 |
| UnicodeString copy(sb); |
| sb.append(copy, afterPrefixPos, beforeSuffixPos - afterPrefixPos); |
| sb.append(affixProvider.get().getString(AffixPatternProvider::AFFIX_NEG_SUFFIX)); |
| } |
| |
| return sb; |
| } |
| |
| int PatternStringUtils::escapePaddingString(UnicodeString input, UnicodeString& output, int startIndex, |
| UErrorCode& status) { |
| (void) status; |
| if (input.length() == 0) { |
| input.setTo(kFallbackPaddingString, -1); |
| } |
| int startLength = output.length(); |
| if (input.length() == 1) { |
| if (input.compare(u"'", -1) == 0) { |
| output.insert(startIndex, u"''", -1); |
| } else { |
| output.insert(startIndex, input); |
| } |
| } else { |
| output.insert(startIndex, u'\''); |
| int offset = 1; |
| for (int i = 0; i < input.length(); i++) { |
| // it's okay to deal in chars here because the quote mark is the only interesting thing. |
| char16_t ch = input.charAt(i); |
| if (ch == u'\'') { |
| output.insert(startIndex + offset, u"''", -1); |
| offset += 2; |
| } else { |
| output.insert(startIndex + offset, ch); |
| offset += 1; |
| } |
| } |
| output.insert(startIndex + offset, u'\''); |
| } |
| return output.length() - startLength; |
| } |
| |
| UnicodeString |
| PatternStringUtils::convertLocalized(const UnicodeString& input, const DecimalFormatSymbols& symbols, |
| bool toLocalized, UErrorCode& status) { |
| // Construct a table of strings to be converted between localized and standard. |
| static constexpr int32_t LEN = 21; |
| UnicodeString table[LEN][2]; |
| int standIdx = toLocalized ? 0 : 1; |
| int localIdx = toLocalized ? 1 : 0; |
| table[0][standIdx] = u"%"; |
| table[0][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kPercentSymbol); |
| table[1][standIdx] = u"‰"; |
| table[1][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kPerMillSymbol); |
| table[2][standIdx] = u"."; |
| table[2][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kDecimalSeparatorSymbol); |
| table[3][standIdx] = u","; |
| table[3][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kGroupingSeparatorSymbol); |
| table[4][standIdx] = u"-"; |
| table[4][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kMinusSignSymbol); |
| table[5][standIdx] = u"+"; |
| table[5][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kPlusSignSymbol); |
| table[6][standIdx] = u";"; |
| table[6][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kPatternSeparatorSymbol); |
| table[7][standIdx] = u"@"; |
| table[7][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kSignificantDigitSymbol); |
| table[8][standIdx] = u"E"; |
| table[8][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kExponentialSymbol); |
| table[9][standIdx] = u"*"; |
| table[9][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kPadEscapeSymbol); |
| table[10][standIdx] = u"#"; |
| table[10][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kDigitSymbol); |
| for (int i = 0; i < 10; i++) { |
| table[11 + i][standIdx] = u'0' + i; |
| table[11 + i][localIdx] = symbols.getConstDigitSymbol(i); |
| } |
| |
| // Special case: quotes are NOT allowed to be in any localIdx strings. |
| // Substitute them with '’' instead. |
| for (int32_t i = 0; i < LEN; i++) { |
| table[i][localIdx].findAndReplace(u'\'', u'’'); |
| } |
| |
| // Iterate through the string and convert. |
| // State table: |
| // 0 => base state |
| // 1 => first char inside a quoted sequence in input and output string |
| // 2 => inside a quoted sequence in input and output string |
| // 3 => first char after a close quote in input string; |
| // close quote still needs to be written to output string |
| // 4 => base state in input string; inside quoted sequence in output string |
| // 5 => first char inside a quoted sequence in input string; |
| // inside quoted sequence in output string |
| UnicodeString result; |
| int state = 0; |
| for (int offset = 0; offset < input.length(); offset++) { |
| UChar ch = input.charAt(offset); |
| |
| // Handle a quote character (state shift) |
| if (ch == u'\'') { |
| if (state == 0) { |
| result.append(u'\''); |
| state = 1; |
| continue; |
| } else if (state == 1) { |
| result.append(u'\''); |
| state = 0; |
| continue; |
| } else if (state == 2) { |
| state = 3; |
| continue; |
| } else if (state == 3) { |
| result.append(u'\''); |
| result.append(u'\''); |
| state = 1; |
| continue; |
| } else if (state == 4) { |
| state = 5; |
| continue; |
| } else { |
| U_ASSERT(state == 5); |
| result.append(u'\''); |
| result.append(u'\''); |
| state = 4; |
| continue; |
| } |
| } |
| |
| if (state == 0 || state == 3 || state == 4) { |
| for (auto& pair : table) { |
| // Perform a greedy match on this symbol string |
| UnicodeString temp = input.tempSubString(offset, pair[0].length()); |
| if (temp == pair[0]) { |
| // Skip ahead past this region for the next iteration |
| offset += pair[0].length() - 1; |
| if (state == 3 || state == 4) { |
| result.append(u'\''); |
| state = 0; |
| } |
| result.append(pair[1]); |
| goto continue_outer; |
| } |
| } |
| // No replacement found. Check if a special quote is necessary |
| for (auto& pair : table) { |
| UnicodeString temp = input.tempSubString(offset, pair[1].length()); |
| if (temp == pair[1]) { |
| if (state == 0) { |
| result.append(u'\''); |
| state = 4; |
| } |
| result.append(ch); |
| goto continue_outer; |
| } |
| } |
| // Still nothing. Copy the char verbatim. (Add a close quote if necessary) |
| if (state == 3 || state == 4) { |
| result.append(u'\''); |
| state = 0; |
| } |
| result.append(ch); |
| } else { |
| U_ASSERT(state == 1 || state == 2 || state == 5); |
| result.append(ch); |
| state = 2; |
| } |
| continue_outer:; |
| } |
| // Resolve final quotes |
| if (state == 3 || state == 4) { |
| result.append(u'\''); |
| state = 0; |
| } |
| if (state != 0) { |
| // Malformed localized pattern: unterminated quote |
| status = U_PATTERN_SYNTAX_ERROR; |
| } |
| return result; |
| } |
| |
| void PatternStringUtils::patternInfoToStringBuilder(const AffixPatternProvider& patternInfo, bool isPrefix, |
| PatternSignType patternSignType, |
| StandardPlural::Form plural, |
| bool perMilleReplacesPercent, UnicodeString& output) { |
| |
| // Should the output render '+' where '-' would normally appear in the pattern? |
| bool plusReplacesMinusSign = (patternSignType == PATTERN_SIGN_TYPE_POS_SIGN) |
| && !patternInfo.positiveHasPlusSign(); |
| |
| // Should we use the affix from the negative subpattern? |
| // (If not, we will use the positive subpattern.) |
| bool useNegativeAffixPattern = patternInfo.hasNegativeSubpattern() |
| && (patternSignType == PATTERN_SIGN_TYPE_NEG |
| || (patternInfo.negativeHasMinusSign() && plusReplacesMinusSign)); |
| |
| // Resolve the flags for the affix pattern. |
| int flags = 0; |
| if (useNegativeAffixPattern) { |
| flags |= AffixPatternProvider::AFFIX_NEGATIVE_SUBPATTERN; |
| } |
| if (isPrefix) { |
| flags |= AffixPatternProvider::AFFIX_PREFIX; |
| } |
| if (plural != StandardPlural::Form::COUNT) { |
| U_ASSERT(plural == (AffixPatternProvider::AFFIX_PLURAL_MASK & plural)); |
| flags |= plural; |
| } |
| |
| // Should we prepend a sign to the pattern? |
| bool prependSign; |
| if (!isPrefix || useNegativeAffixPattern) { |
| prependSign = false; |
| } else if (patternSignType == PATTERN_SIGN_TYPE_NEG) { |
| prependSign = true; |
| } else { |
| prependSign = plusReplacesMinusSign; |
| } |
| |
| // Compute the length of the affix pattern. |
| int length = patternInfo.length(flags) + (prependSign ? 1 : 0); |
| |
| // Finally, set the result into the StringBuilder. |
| output.remove(); |
| for (int index = 0; index < length; index++) { |
| char16_t candidate; |
| if (prependSign && index == 0) { |
| candidate = u'-'; |
| } else if (prependSign) { |
| candidate = patternInfo.charAt(flags, index - 1); |
| } else { |
| candidate = patternInfo.charAt(flags, index); |
| } |
| if (plusReplacesMinusSign && candidate == u'-') { |
| candidate = u'+'; |
| } |
| if (perMilleReplacesPercent && candidate == u'%') { |
| candidate = u'‰'; |
| } |
| output.append(candidate); |
| } |
| } |
| |
| PatternSignType PatternStringUtils::resolveSignDisplay(UNumberSignDisplay signDisplay, Signum signum) { |
| switch (signDisplay) { |
| case UNUM_SIGN_AUTO: |
| case UNUM_SIGN_ACCOUNTING: |
| switch (signum) { |
| case SIGNUM_NEG: |
| case SIGNUM_NEG_ZERO: |
| return PATTERN_SIGN_TYPE_NEG; |
| case SIGNUM_POS_ZERO: |
| case SIGNUM_POS: |
| return PATTERN_SIGN_TYPE_POS; |
| default: |
| break; |
| } |
| break; |
| |
| case UNUM_SIGN_ALWAYS: |
| case UNUM_SIGN_ACCOUNTING_ALWAYS: |
| switch (signum) { |
| case SIGNUM_NEG: |
| case SIGNUM_NEG_ZERO: |
| return PATTERN_SIGN_TYPE_NEG; |
| case SIGNUM_POS_ZERO: |
| case SIGNUM_POS: |
| return PATTERN_SIGN_TYPE_POS_SIGN; |
| default: |
| break; |
| } |
| break; |
| |
| case UNUM_SIGN_EXCEPT_ZERO: |
| case UNUM_SIGN_ACCOUNTING_EXCEPT_ZERO: |
| switch (signum) { |
| case SIGNUM_NEG: |
| return PATTERN_SIGN_TYPE_NEG; |
| case SIGNUM_NEG_ZERO: |
| case SIGNUM_POS_ZERO: |
| return PATTERN_SIGN_TYPE_POS; |
| case SIGNUM_POS: |
| return PATTERN_SIGN_TYPE_POS_SIGN; |
| default: |
| break; |
| } |
| break; |
| |
| case UNUM_SIGN_NEVER: |
| return PATTERN_SIGN_TYPE_POS; |
| |
| default: |
| break; |
| } |
| |
| UPRV_UNREACHABLE; |
| return PATTERN_SIGN_TYPE_POS; |
| } |
| |
| #endif /* #if !UCONFIG_NO_FORMATTING */ |