blob: 201267e635cd6a90e1b712646dc67dba7092be72 [file] [log] [blame]
// © 2018 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
#include "unicode/utypes.h"
#if !UCONFIG_NO_FORMATTING
#ifndef __SOURCE_NUMBER_SKELETONS_H__
#define __SOURCE_NUMBER_SKELETONS_H__
#include "number_types.h"
#include "numparse_types.h"
#include "unicode/ucharstrie.h"
#include "string_segment.h"
U_NAMESPACE_BEGIN
namespace number {
namespace impl {
// Forward-declaration
struct SeenMacroProps;
// namespace for enums and entrypoint functions
namespace skeleton {
////////////////////////////////////////////////////////////////////////////////////////
// NOTE: For examples of how to add a new stem to the number skeleton parser, see: //
// https://github.com/unicode-org/icu/commit/a2a7982216b2348070dc71093775ac7195793d73 //
// and //
// https://github.com/unicode-org/icu/commit/6fe86f3934a8a5701034f648a8f7c5087e84aa28 //
////////////////////////////////////////////////////////////////////////////////////////
/**
* While parsing a skeleton, this enum records what type of option we expect to find next.
*/
enum ParseState {
// Section 0: We expect whitespace or a stem, but not an option:
STATE_NULL,
// Section 1: We might accept an option, but it is not required:
STATE_SCIENTIFIC,
STATE_FRACTION_PRECISION,
// Section 2: An option is required:
STATE_INCREMENT_PRECISION,
STATE_MEASURE_UNIT,
STATE_PER_MEASURE_UNIT,
STATE_IDENTIFIER_UNIT,
STATE_UNIT_USAGE,
STATE_CURRENCY_UNIT,
STATE_INTEGER_WIDTH,
STATE_NUMBERING_SYSTEM,
STATE_SCALE,
};
/**
* All possible stem literals have an entry in the StemEnum. The enum name is the kebab case stem
* string literal written in upper snake case.
*
* @see StemToObject
* @see #SERIALIZED_STEM_TRIE
*/
enum StemEnum {
// Section 1: Stems that do not require an option:
STEM_COMPACT_SHORT,
STEM_COMPACT_LONG,
STEM_SCIENTIFIC,
STEM_ENGINEERING,
STEM_NOTATION_SIMPLE,
STEM_BASE_UNIT,
STEM_PERCENT,
STEM_PERMILLE,
STEM_PERCENT_100, // concise-only
STEM_PRECISION_INTEGER,
STEM_PRECISION_UNLIMITED,
STEM_PRECISION_CURRENCY_STANDARD,
STEM_PRECISION_CURRENCY_CASH,
STEM_ROUNDING_MODE_CEILING,
STEM_ROUNDING_MODE_FLOOR,
STEM_ROUNDING_MODE_DOWN,
STEM_ROUNDING_MODE_UP,
STEM_ROUNDING_MODE_HALF_EVEN,
STEM_ROUNDING_MODE_HALF_DOWN,
STEM_ROUNDING_MODE_HALF_UP,
STEM_ROUNDING_MODE_UNNECESSARY,
STEM_GROUP_OFF,
STEM_GROUP_MIN2,
STEM_GROUP_AUTO,
STEM_GROUP_ON_ALIGNED,
STEM_GROUP_THOUSANDS,
STEM_LATIN,
STEM_UNIT_WIDTH_NARROW,
STEM_UNIT_WIDTH_SHORT,
STEM_UNIT_WIDTH_FULL_NAME,
STEM_UNIT_WIDTH_ISO_CODE,
STEM_UNIT_WIDTH_FORMAL,
STEM_UNIT_WIDTH_VARIANT,
STEM_UNIT_WIDTH_HIDDEN,
STEM_SIGN_AUTO,
STEM_SIGN_ALWAYS,
STEM_SIGN_NEVER,
STEM_SIGN_ACCOUNTING,
STEM_SIGN_ACCOUNTING_ALWAYS,
STEM_SIGN_EXCEPT_ZERO,
STEM_SIGN_ACCOUNTING_EXCEPT_ZERO,
STEM_DECIMAL_AUTO,
STEM_DECIMAL_ALWAYS,
// Section 2: Stems that DO require an option:
STEM_PRECISION_INCREMENT,
STEM_MEASURE_UNIT,
STEM_PER_MEASURE_UNIT,
STEM_UNIT,
STEM_UNIT_USAGE,
STEM_CURRENCY,
STEM_INTEGER_WIDTH,
STEM_NUMBERING_SYSTEM,
STEM_SCALE,
};
/** Default wildcard char, accepted on input and printed in output */
constexpr char16_t kWildcardChar = u'*';
/** Alternative wildcard char, accept on input but not printed in output */
constexpr char16_t kAltWildcardChar = u'+';
/** Checks whether the char is a wildcard on input */
inline bool isWildcardChar(char16_t c) {
return c == kWildcardChar || c == kAltWildcardChar;
}
/**
* Creates a NumberFormatter corresponding to the given skeleton string.
*
* @param skeletonString
* A number skeleton string, possibly not in its shortest form.
* @return An UnlocalizedNumberFormatter with behavior defined by the given skeleton string.
*/
UnlocalizedNumberFormatter create(
const UnicodeString& skeletonString, UParseError* perror, UErrorCode& status);
/**
* Create a skeleton string corresponding to the given NumberFormatter.
*
* @param macros
* The NumberFormatter options object.
* @return A skeleton string in normalized form.
*/
UnicodeString generate(const MacroProps& macros, UErrorCode& status);
/**
* Converts from a skeleton string to a MacroProps. This method contains the primary parse loop.
*
* Internal: use the create() endpoint instead of this function.
*/
MacroProps parseSkeleton(const UnicodeString& skeletonString, int32_t& errOffset, UErrorCode& status);
/**
* Given that the current segment represents a stem, parse it and save the result.
*
* @return The next state after parsing this stem, corresponding to what subset of options to expect.
*/
ParseState parseStem(const StringSegment& segment, const UCharsTrie& stemTrie, SeenMacroProps& seen,
MacroProps& macros, UErrorCode& status);
/**
* Given that the current segment represents an option, parse it and save the result.
*
* @return The next state after parsing this option, corresponding to what subset of options to
* expect next.
*/
ParseState
parseOption(ParseState stem, const StringSegment& segment, MacroProps& macros, UErrorCode& status);
} // namespace skeleton
/**
* Namespace for utility methods that convert from StemEnum to corresponding objects or enums. This
* applies to only the "Section 1" stems, those that are well-defined without an option.
*/
namespace stem_to_object {
Notation notation(skeleton::StemEnum stem);
MeasureUnit unit(skeleton::StemEnum stem);
Precision precision(skeleton::StemEnum stem);
UNumberFormatRoundingMode roundingMode(skeleton::StemEnum stem);
UNumberGroupingStrategy groupingStrategy(skeleton::StemEnum stem);
UNumberUnitWidth unitWidth(skeleton::StemEnum stem);
UNumberSignDisplay signDisplay(skeleton::StemEnum stem);
UNumberDecimalSeparatorDisplay decimalSeparatorDisplay(skeleton::StemEnum stem);
} // namespace stem_to_object
/**
* Namespace for utility methods that convert from enums to stem strings. More complex object conversions
* take place in the object_to_stem_string namespace.
*/
namespace enum_to_stem_string {
void roundingMode(UNumberFormatRoundingMode value, UnicodeString& sb);
void groupingStrategy(UNumberGroupingStrategy value, UnicodeString& sb);
void unitWidth(UNumberUnitWidth value, UnicodeString& sb);
void signDisplay(UNumberSignDisplay value, UnicodeString& sb);
void decimalSeparatorDisplay(UNumberDecimalSeparatorDisplay value, UnicodeString& sb);
} // namespace enum_to_stem_string
/**
* Namespace for utility methods for processing stems and options that cannot be interpreted literally.
*/
namespace blueprint_helpers {
/** @return Whether we successfully found and parsed an exponent width option. */
bool parseExponentWidthOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
void generateExponentWidthOption(int32_t minExponentDigits, UnicodeString& sb, UErrorCode& status);
/** @return Whether we successfully found and parsed an exponent sign option. */
bool parseExponentSignOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
void parseCurrencyOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
void generateCurrencyOption(const CurrencyUnit& currency, UnicodeString& sb, UErrorCode& status);
// "measure-unit/" is deprecated in favour of "unit/".
void parseMeasureUnitOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
// "per-measure-unit/" is deprecated in favour of "unit/".
void parseMeasurePerUnitOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
/**
* Parses unit identifiers like "meter-per-second" and "foot-and-inch", as
* specified via a "unit/" concise skeleton.
*/
void parseIdentifierUnitOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
void parseUnitUsageOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
void parseFractionStem(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
void generateFractionStem(int32_t minFrac, int32_t maxFrac, UnicodeString& sb, UErrorCode& status);
void parseDigitsStem(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
void generateDigitsStem(int32_t minSig, int32_t maxSig, UnicodeString& sb, UErrorCode& status);
void parseScientificStem(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
// Note: no generateScientificStem since this syntax was added later in ICU 67
void parseIntegerStem(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
// Note: no generateIntegerStem since this syntax was added later in ICU 67
/** @return Whether we successfully found and parsed a frac-sig option. */
bool parseFracSigOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
void parseIncrementOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
void
generateIncrementOption(double increment, int32_t trailingZeros, UnicodeString& sb, UErrorCode& status);
void parseIntegerWidthOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
void generateIntegerWidthOption(int32_t minInt, int32_t maxInt, UnicodeString& sb, UErrorCode& status);
void parseNumberingSystemOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
void generateNumberingSystemOption(const NumberingSystem& ns, UnicodeString& sb, UErrorCode& status);
void parseScaleOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
void generateScaleOption(int32_t magnitude, const DecNum* arbitrary, UnicodeString& sb,
UErrorCode& status);
} // namespace blueprint_helpers
/**
* Class for utility methods for generating a token corresponding to each macro-prop. Each method
* returns whether or not a token was written to the string builder.
*
* This needs to be a class, not a namespace, so it can be friended.
*/
class GeneratorHelpers {
public:
/**
* Main skeleton generator function. Appends the normalized skeleton for the MacroProps to the given
* StringBuilder.
*
* Internal: use the create() endpoint instead of this function.
*/
static void generateSkeleton(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
private:
static bool notation(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
static bool unit(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
static bool usage(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
static bool precision(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
static bool roundingMode(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
static bool grouping(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
static bool integerWidth(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
static bool symbols(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
static bool unitWidth(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
static bool sign(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
static bool decimal(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
static bool scale(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
};
/**
* Struct for null-checking.
* In Java, we can just check the object reference. In C++, we need a different method.
*/
struct SeenMacroProps {
bool notation = false;
bool unit = false;
bool perUnit = false;
bool usage = false;
bool precision = false;
bool roundingMode = false;
bool grouper = false;
bool padder = false;
bool integerWidth = false;
bool symbols = false;
bool unitWidth = false;
bool sign = false;
bool decimal = false;
bool scale = false;
};
namespace {
#define SKELETON_UCHAR_TO_CHAR(dest, src, start, end, status) (void)(dest); \
UPRV_BLOCK_MACRO_BEGIN { \
UErrorCode conversionStatus = U_ZERO_ERROR; \
(dest).appendInvariantChars({false, (src).getBuffer() + (start), (end) - (start)}, conversionStatus); \
if (conversionStatus == U_INVARIANT_CONVERSION_ERROR) { \
/* Don't propagate the invariant conversion error; it is a skeleton syntax error */ \
(status) = U_NUMBER_SKELETON_SYNTAX_ERROR; \
return; \
} else if (U_FAILURE(conversionStatus)) { \
(status) = conversionStatus; \
return; \
} \
} UPRV_BLOCK_MACRO_END
} // namespace
} // namespace impl
} // namespace number
U_NAMESPACE_END
#endif //__SOURCE_NUMBER_SKELETONS_H__
#endif /* #if !UCONFIG_NO_FORMATTING */