third_party/icu/source/i18n/numparse_scientific.cpp - cobalt - Git at Google

 // © 2018 and later: Unicode, Inc. and others.
 // License & terms of use: http://www.unicode.org/copyright.html

 #include "unicode/utypes.h"

 #if !UCONFIG_NO_FORMATTING

 // Allow implicit conversion from char16_t* to UnicodeString for this file:
 // Helpful in toString methods and elsewhere.
 #define UNISTR_FROM_STRING_EXPLICIT

 #include "numparse_types.h"
 #include "numparse_scientific.h"
 #include "static_unicode_sets.h"
 #include "string_segment.h"

 using namespace icu;
 using namespace icu::numparse;
 using namespace icu::numparse::impl;


 namespace {

 inline const UnicodeSet& minusSignSet() {
     return *unisets::get(unisets::MINUS_SIGN);
 }

 inline const UnicodeSet& plusSignSet() {
     return *unisets::get(unisets::PLUS_SIGN);
 }

 } // namespace


 ScientificMatcher::ScientificMatcher(const DecimalFormatSymbols& dfs, const Grouper& grouper)
         : fExponentSeparatorString(dfs.getConstSymbol(DecimalFormatSymbols::kExponentialSymbol)),
           fExponentMatcher(dfs, grouper, PARSE_FLAG_INTEGER_ONLY | PARSE_FLAG_GROUPING_DISABLED),
           fIgnorablesMatcher(PARSE_FLAG_STRICT_IGNORABLES) {

     const UnicodeString& minusSign = dfs.getConstSymbol(DecimalFormatSymbols::kMinusSignSymbol);
     if (minusSignSet().contains(minusSign)) {
         fCustomMinusSign.setToBogus();
     } else {
         fCustomMinusSign = minusSign;
     }

     const UnicodeString& plusSign = dfs.getConstSymbol(DecimalFormatSymbols::kPlusSignSymbol);
     if (plusSignSet().contains(plusSign)) {
         fCustomPlusSign.setToBogus();
     } else {
         fCustomPlusSign = plusSign;
     }
 }

 bool ScientificMatcher::match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const {
     // Only accept scientific notation after the mantissa.
     if (!result.seenNumber()) {
         return false;
     }

     // Only accept one exponent per string.
     if (0 != (result.flags & FLAG_HAS_EXPONENT)) {
         return false;
     }

     // First match the scientific separator, and then match another number after it.
     // NOTE: This is guarded by the smoke test; no need to check fExponentSeparatorString length again.
     int32_t initialOffset = segment.getOffset();
     int32_t overlap = segment.getCommonPrefixLength(fExponentSeparatorString);
     if (overlap == fExponentSeparatorString.length()) {
         // Full exponent separator match.

         // First attempt to get a code point, returning true if we can't get one.
         if (segment.length() == overlap) {
             return true;
         }
         segment.adjustOffset(overlap);

         // Allow ignorables before the sign.
         // Note: call site is guarded by the segment.length() check above.
         // Note: the ignorables matcher should not touch the result.
         fIgnorablesMatcher.match(segment, result, status);
         if (segment.length() == 0) {
             segment.setOffset(initialOffset);
             return true;
         }

         // Allow a sign, and then try to match digits.
         int8_t exponentSign = 1;
         if (segment.startsWith(minusSignSet())) {
             exponentSign = -1;
             segment.adjustOffsetByCodePoint();
         } else if (segment.startsWith(plusSignSet())) {
             segment.adjustOffsetByCodePoint();
         } else if (segment.startsWith(fCustomMinusSign)) {
             overlap = segment.getCommonPrefixLength(fCustomMinusSign);
             if (overlap != fCustomMinusSign.length()) {
                 // Partial custom sign match
                 segment.setOffset(initialOffset);
                 return true;
             }
             exponentSign = -1;
             segment.adjustOffset(overlap);
         } else if (segment.startsWith(fCustomPlusSign)) {
             overlap = segment.getCommonPrefixLength(fCustomPlusSign);
             if (overlap != fCustomPlusSign.length()) {
                 // Partial custom sign match
                 segment.setOffset(initialOffset);
                 return true;
             }
             segment.adjustOffset(overlap);
         }

         // Return true if the segment is empty.
         if (segment.length() == 0) {
             segment.setOffset(initialOffset);
             return true;
         }

         // Allow ignorables after the sign.
         // Note: call site is guarded by the segment.length() check above.
         // Note: the ignorables matcher should not touch the result.
         fIgnorablesMatcher.match(segment, result, status);
         if (segment.length() == 0) {
             segment.setOffset(initialOffset);
             return true;
         }

         // We are supposed to accept E0 after NaN, so we need to make sure result.quantity is available.
         bool wasBogus = result.quantity.bogus;
         result.quantity.bogus = false;
         int digitsOffset = segment.getOffset();
         bool digitsReturnValue = fExponentMatcher.match(segment, result, exponentSign, status);
         result.quantity.bogus = wasBogus;

         if (segment.getOffset() != digitsOffset) {
             // At least one exponent digit was matched.
             result.flags |= FLAG_HAS_EXPONENT;
         } else {
             // No exponent digits were matched
             segment.setOffset(initialOffset);
         }
         return digitsReturnValue;

     } else if (overlap == segment.length()) {
         // Partial exponent separator match
         return true;
     }

     // No match
     return false;
 }

 bool ScientificMatcher::smokeTest(const StringSegment& segment) const {
     return segment.startsWith(fExponentSeparatorString);
 }

 UnicodeString ScientificMatcher::toString() const {
     return u"<Scientific>";
 }


 #endif /* #if !UCONFIG_NO_FORMATTING */
	// © 2018 and later: Unicode, Inc. and others.
	// License & terms of use: http://www.unicode.org/copyright.html

	#include "unicode/utypes.h"

	#if !UCONFIG_NO_FORMATTING

	// Allow implicit conversion from char16_t* to UnicodeString for this file:
	// Helpful in toString methods and elsewhere.
	#define UNISTR_FROM_STRING_EXPLICIT

	#include "numparse_types.h"
	#include "numparse_scientific.h"
	#include "static_unicode_sets.h"
	#include "string_segment.h"

	using namespace icu;
	using namespace icu::numparse;
	using namespace icu::numparse::impl;


	namespace {

	inline const UnicodeSet& minusSignSet() {
	return *unisets::get(unisets::MINUS_SIGN);
	}

	inline const UnicodeSet& plusSignSet() {
	return *unisets::get(unisets::PLUS_SIGN);
	}

	} // namespace


	ScientificMatcher::ScientificMatcher(const DecimalFormatSymbols& dfs, const Grouper& grouper)
	: fExponentSeparatorString(dfs.getConstSymbol(DecimalFormatSymbols::kExponentialSymbol)),
	fExponentMatcher(dfs, grouper, PARSE_FLAG_INTEGER_ONLY \| PARSE_FLAG_GROUPING_DISABLED),
	fIgnorablesMatcher(PARSE_FLAG_STRICT_IGNORABLES) {

	const UnicodeString& minusSign = dfs.getConstSymbol(DecimalFormatSymbols::kMinusSignSymbol);
	if (minusSignSet().contains(minusSign)) {
	fCustomMinusSign.setToBogus();
	} else {
	fCustomMinusSign = minusSign;
	}

	const UnicodeString& plusSign = dfs.getConstSymbol(DecimalFormatSymbols::kPlusSignSymbol);
	if (plusSignSet().contains(plusSign)) {
	fCustomPlusSign.setToBogus();
	} else {
	fCustomPlusSign = plusSign;
	}
	}

	bool ScientificMatcher::match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const {
	// Only accept scientific notation after the mantissa.
	if (!result.seenNumber()) {
	return false;
	}

	// Only accept one exponent per string.
	if (0 != (result.flags & FLAG_HAS_EXPONENT)) {
	return false;
	}

	// First match the scientific separator, and then match another number after it.
	// NOTE: This is guarded by the smoke test; no need to check fExponentSeparatorString length again.
	int32_t initialOffset = segment.getOffset();
	int32_t overlap = segment.getCommonPrefixLength(fExponentSeparatorString);
	if (overlap == fExponentSeparatorString.length()) {
	// Full exponent separator match.

	// First attempt to get a code point, returning true if we can't get one.
	if (segment.length() == overlap) {
	return true;
	}
	segment.adjustOffset(overlap);

	// Allow ignorables before the sign.
	// Note: call site is guarded by the segment.length() check above.
	// Note: the ignorables matcher should not touch the result.
	fIgnorablesMatcher.match(segment, result, status);
	if (segment.length() == 0) {
	segment.setOffset(initialOffset);
	return true;
	}

	// Allow a sign, and then try to match digits.
	int8_t exponentSign = 1;
	if (segment.startsWith(minusSignSet())) {
	exponentSign = -1;
	segment.adjustOffsetByCodePoint();
	} else if (segment.startsWith(plusSignSet())) {
	segment.adjustOffsetByCodePoint();
	} else if (segment.startsWith(fCustomMinusSign)) {
	overlap = segment.getCommonPrefixLength(fCustomMinusSign);
	if (overlap != fCustomMinusSign.length()) {
	// Partial custom sign match
	segment.setOffset(initialOffset);
	return true;
	}
	exponentSign = -1;
	segment.adjustOffset(overlap);
	} else if (segment.startsWith(fCustomPlusSign)) {
	overlap = segment.getCommonPrefixLength(fCustomPlusSign);
	if (overlap != fCustomPlusSign.length()) {
	// Partial custom sign match
	segment.setOffset(initialOffset);
	return true;
	}
	segment.adjustOffset(overlap);
	}

	// Return true if the segment is empty.
	if (segment.length() == 0) {
	segment.setOffset(initialOffset);
	return true;
	}

	// Allow ignorables after the sign.
	// Note: call site is guarded by the segment.length() check above.
	// Note: the ignorables matcher should not touch the result.
	fIgnorablesMatcher.match(segment, result, status);
	if (segment.length() == 0) {
	segment.setOffset(initialOffset);
	return true;
	}

	// We are supposed to accept E0 after NaN, so we need to make sure result.quantity is available.
	bool wasBogus = result.quantity.bogus;
	result.quantity.bogus = false;
	int digitsOffset = segment.getOffset();
	bool digitsReturnValue = fExponentMatcher.match(segment, result, exponentSign, status);
	result.quantity.bogus = wasBogus;

	if (segment.getOffset() != digitsOffset) {
	// At least one exponent digit was matched.
	result.flags \|= FLAG_HAS_EXPONENT;
	} else {
	// No exponent digits were matched
	segment.setOffset(initialOffset);
	}
	return digitsReturnValue;

	} else if (overlap == segment.length()) {
	// Partial exponent separator match
	return true;
	}

	// No match
	return false;
	}

	bool ScientificMatcher::smokeTest(const StringSegment& segment) const {
	return segment.startsWith(fExponentSeparatorString);
	}

	UnicodeString ScientificMatcher::toString() const {
	return u"<Scientific>";
	}


	#endif /* #if !UCONFIG_NO_FORMATTING */