blob: 4b88cd998fee09a8d10680144db204bc5effe46f [file] [log] [blame]
// © 2018 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
#include "unicode/utypes.h"
#if !UCONFIG_NO_FORMATTING
// Allow implicit conversion from char16_t* to UnicodeString for this file:
// Helpful in toString methods and elsewhere.
#define UNISTR_FROM_STRING_EXPLICIT
#include "numparse_types.h"
#include "numparse_scientific.h"
#include "static_unicode_sets.h"
#include "string_segment.h"
using namespace icu;
using namespace icu::numparse;
using namespace icu::numparse::impl;
namespace {
inline const UnicodeSet& minusSignSet() {
return *unisets::get(unisets::MINUS_SIGN);
}
inline const UnicodeSet& plusSignSet() {
return *unisets::get(unisets::PLUS_SIGN);
}
} // namespace
ScientificMatcher::ScientificMatcher(const DecimalFormatSymbols& dfs, const Grouper& grouper)
: fExponentSeparatorString(dfs.getConstSymbol(DecimalFormatSymbols::kExponentialSymbol)),
fExponentMatcher(dfs, grouper, PARSE_FLAG_INTEGER_ONLY | PARSE_FLAG_GROUPING_DISABLED),
fIgnorablesMatcher(PARSE_FLAG_STRICT_IGNORABLES) {
const UnicodeString& minusSign = dfs.getConstSymbol(DecimalFormatSymbols::kMinusSignSymbol);
if (minusSignSet().contains(minusSign)) {
fCustomMinusSign.setToBogus();
} else {
fCustomMinusSign = minusSign;
}
const UnicodeString& plusSign = dfs.getConstSymbol(DecimalFormatSymbols::kPlusSignSymbol);
if (plusSignSet().contains(plusSign)) {
fCustomPlusSign.setToBogus();
} else {
fCustomPlusSign = plusSign;
}
}
bool ScientificMatcher::match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const {
// Only accept scientific notation after the mantissa.
if (!result.seenNumber()) {
return false;
}
// Only accept one exponent per string.
if (0 != (result.flags & FLAG_HAS_EXPONENT)) {
return false;
}
// First match the scientific separator, and then match another number after it.
// NOTE: This is guarded by the smoke test; no need to check fExponentSeparatorString length again.
int32_t initialOffset = segment.getOffset();
int32_t overlap = segment.getCommonPrefixLength(fExponentSeparatorString);
if (overlap == fExponentSeparatorString.length()) {
// Full exponent separator match.
// First attempt to get a code point, returning true if we can't get one.
if (segment.length() == overlap) {
return true;
}
segment.adjustOffset(overlap);
// Allow ignorables before the sign.
// Note: call site is guarded by the segment.length() check above.
// Note: the ignorables matcher should not touch the result.
fIgnorablesMatcher.match(segment, result, status);
if (segment.length() == 0) {
segment.setOffset(initialOffset);
return true;
}
// Allow a sign, and then try to match digits.
int8_t exponentSign = 1;
if (segment.startsWith(minusSignSet())) {
exponentSign = -1;
segment.adjustOffsetByCodePoint();
} else if (segment.startsWith(plusSignSet())) {
segment.adjustOffsetByCodePoint();
} else if (segment.startsWith(fCustomMinusSign)) {
overlap = segment.getCommonPrefixLength(fCustomMinusSign);
if (overlap != fCustomMinusSign.length()) {
// Partial custom sign match
segment.setOffset(initialOffset);
return true;
}
exponentSign = -1;
segment.adjustOffset(overlap);
} else if (segment.startsWith(fCustomPlusSign)) {
overlap = segment.getCommonPrefixLength(fCustomPlusSign);
if (overlap != fCustomPlusSign.length()) {
// Partial custom sign match
segment.setOffset(initialOffset);
return true;
}
segment.adjustOffset(overlap);
}
// Return true if the segment is empty.
if (segment.length() == 0) {
segment.setOffset(initialOffset);
return true;
}
// Allow ignorables after the sign.
// Note: call site is guarded by the segment.length() check above.
// Note: the ignorables matcher should not touch the result.
fIgnorablesMatcher.match(segment, result, status);
if (segment.length() == 0) {
segment.setOffset(initialOffset);
return true;
}
// We are supposed to accept E0 after NaN, so we need to make sure result.quantity is available.
bool wasBogus = result.quantity.bogus;
result.quantity.bogus = false;
int digitsOffset = segment.getOffset();
bool digitsReturnValue = fExponentMatcher.match(segment, result, exponentSign, status);
result.quantity.bogus = wasBogus;
if (segment.getOffset() != digitsOffset) {
// At least one exponent digit was matched.
result.flags |= FLAG_HAS_EXPONENT;
} else {
// No exponent digits were matched
segment.setOffset(initialOffset);
}
return digitsReturnValue;
} else if (overlap == segment.length()) {
// Partial exponent separator match
return true;
}
// No match
return false;
}
bool ScientificMatcher::smokeTest(const StringSegment& segment) const {
return segment.startsWith(fExponentSeparatorString);
}
UnicodeString ScientificMatcher::toString() const {
return u"<Scientific>";
}
#endif /* #if !UCONFIG_NO_FORMATTING */