| // © 2018 and later: Unicode, Inc. and others. |
| // License & terms of use: http://www.unicode.org/copyright.html |
| |
| #include "unicode/utypes.h" |
| |
| #if !UCONFIG_NO_FORMATTING |
| |
| // Allow implicit conversion from char16_t* to UnicodeString for this file: |
| // Helpful in toString methods and elsewhere. |
| #define UNISTR_FROM_STRING_EXPLICIT |
| |
| #include "numparse_types.h" |
| #include "numparse_scientific.h" |
| #include "static_unicode_sets.h" |
| #include "string_segment.h" |
| |
| using namespace icu; |
| using namespace icu::numparse; |
| using namespace icu::numparse::impl; |
| |
| |
| namespace { |
| |
| inline const UnicodeSet& minusSignSet() { |
| return *unisets::get(unisets::MINUS_SIGN); |
| } |
| |
| inline const UnicodeSet& plusSignSet() { |
| return *unisets::get(unisets::PLUS_SIGN); |
| } |
| |
| } // namespace |
| |
| |
| ScientificMatcher::ScientificMatcher(const DecimalFormatSymbols& dfs, const Grouper& grouper) |
| : fExponentSeparatorString(dfs.getConstSymbol(DecimalFormatSymbols::kExponentialSymbol)), |
| fExponentMatcher(dfs, grouper, PARSE_FLAG_INTEGER_ONLY | PARSE_FLAG_GROUPING_DISABLED), |
| fIgnorablesMatcher(PARSE_FLAG_STRICT_IGNORABLES) { |
| |
| const UnicodeString& minusSign = dfs.getConstSymbol(DecimalFormatSymbols::kMinusSignSymbol); |
| if (minusSignSet().contains(minusSign)) { |
| fCustomMinusSign.setToBogus(); |
| } else { |
| fCustomMinusSign = minusSign; |
| } |
| |
| const UnicodeString& plusSign = dfs.getConstSymbol(DecimalFormatSymbols::kPlusSignSymbol); |
| if (plusSignSet().contains(plusSign)) { |
| fCustomPlusSign.setToBogus(); |
| } else { |
| fCustomPlusSign = plusSign; |
| } |
| } |
| |
| bool ScientificMatcher::match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const { |
| // Only accept scientific notation after the mantissa. |
| if (!result.seenNumber()) { |
| return false; |
| } |
| |
| // Only accept one exponent per string. |
| if (0 != (result.flags & FLAG_HAS_EXPONENT)) { |
| return false; |
| } |
| |
| // First match the scientific separator, and then match another number after it. |
| // NOTE: This is guarded by the smoke test; no need to check fExponentSeparatorString length again. |
| int32_t initialOffset = segment.getOffset(); |
| int32_t overlap = segment.getCommonPrefixLength(fExponentSeparatorString); |
| if (overlap == fExponentSeparatorString.length()) { |
| // Full exponent separator match. |
| |
| // First attempt to get a code point, returning true if we can't get one. |
| if (segment.length() == overlap) { |
| return true; |
| } |
| segment.adjustOffset(overlap); |
| |
| // Allow ignorables before the sign. |
| // Note: call site is guarded by the segment.length() check above. |
| // Note: the ignorables matcher should not touch the result. |
| fIgnorablesMatcher.match(segment, result, status); |
| if (segment.length() == 0) { |
| segment.setOffset(initialOffset); |
| return true; |
| } |
| |
| // Allow a sign, and then try to match digits. |
| int8_t exponentSign = 1; |
| if (segment.startsWith(minusSignSet())) { |
| exponentSign = -1; |
| segment.adjustOffsetByCodePoint(); |
| } else if (segment.startsWith(plusSignSet())) { |
| segment.adjustOffsetByCodePoint(); |
| } else if (segment.startsWith(fCustomMinusSign)) { |
| overlap = segment.getCommonPrefixLength(fCustomMinusSign); |
| if (overlap != fCustomMinusSign.length()) { |
| // Partial custom sign match |
| segment.setOffset(initialOffset); |
| return true; |
| } |
| exponentSign = -1; |
| segment.adjustOffset(overlap); |
| } else if (segment.startsWith(fCustomPlusSign)) { |
| overlap = segment.getCommonPrefixLength(fCustomPlusSign); |
| if (overlap != fCustomPlusSign.length()) { |
| // Partial custom sign match |
| segment.setOffset(initialOffset); |
| return true; |
| } |
| segment.adjustOffset(overlap); |
| } |
| |
| // Return true if the segment is empty. |
| if (segment.length() == 0) { |
| segment.setOffset(initialOffset); |
| return true; |
| } |
| |
| // Allow ignorables after the sign. |
| // Note: call site is guarded by the segment.length() check above. |
| // Note: the ignorables matcher should not touch the result. |
| fIgnorablesMatcher.match(segment, result, status); |
| if (segment.length() == 0) { |
| segment.setOffset(initialOffset); |
| return true; |
| } |
| |
| // We are supposed to accept E0 after NaN, so we need to make sure result.quantity is available. |
| bool wasBogus = result.quantity.bogus; |
| result.quantity.bogus = false; |
| int digitsOffset = segment.getOffset(); |
| bool digitsReturnValue = fExponentMatcher.match(segment, result, exponentSign, status); |
| result.quantity.bogus = wasBogus; |
| |
| if (segment.getOffset() != digitsOffset) { |
| // At least one exponent digit was matched. |
| result.flags |= FLAG_HAS_EXPONENT; |
| } else { |
| // No exponent digits were matched |
| segment.setOffset(initialOffset); |
| } |
| return digitsReturnValue; |
| |
| } else if (overlap == segment.length()) { |
| // Partial exponent separator match |
| return true; |
| } |
| |
| // No match |
| return false; |
| } |
| |
| bool ScientificMatcher::smokeTest(const StringSegment& segment) const { |
| return segment.startsWith(fExponentSeparatorString); |
| } |
| |
| UnicodeString ScientificMatcher::toString() const { |
| return u"<Scientific>"; |
| } |
| |
| |
| #endif /* #if !UCONFIG_NO_FORMATTING */ |