| /* |
| ****************************************************************************** |
| * Copyright (C) 2014, International Business Machines |
| * Corporation and others. All Rights Reserved. |
| ****************************************************************************** |
| * simplepatternformatter.cpp |
| */ |
| #include "simplepatternformatter.h" |
| #include "cstring.h" |
| #include "uassert.h" |
| |
| U_NAMESPACE_BEGIN |
| |
| static UBool isInvalidArray(const void *array, int32_t size) { |
| return (size < 0 || (size > 0 && array == NULL)); |
| } |
| |
| typedef enum SimplePatternFormatterCompileState { |
| INIT, |
| APOSTROPHE, |
| PLACEHOLDER |
| } SimplePatternFormatterCompileState; |
| |
| // Handles parsing placeholders in the pattern string, e.g {4} or {35} |
| class SimplePatternFormatterIdBuilder { |
| public: |
| SimplePatternFormatterIdBuilder() : id(0), idLen(0) { } |
| ~SimplePatternFormatterIdBuilder() { } |
| |
| // Resets so that this object has seen no placeholder ID. |
| void reset() { id = 0; idLen = 0; } |
| |
| // Returns the numeric placeholder ID parsed so far |
| int32_t getId() const { return id; } |
| |
| // Appends the numeric placeholder ID parsed so far back to a |
| // UChar buffer. Used to recover if parser using this object finds |
| // no closing curly brace. |
| void appendTo(UChar *buffer, int32_t *len) const; |
| |
| // Returns true if this object has seen a placeholder ID. |
| UBool isValid() const { return (idLen > 0); } |
| |
| // Processes a single digit character. Pattern string parser calls this |
| // as it processes digits after an opening curly brace. |
| void add(UChar ch); |
| private: |
| int32_t id; |
| int32_t idLen; |
| SimplePatternFormatterIdBuilder( |
| const SimplePatternFormatterIdBuilder &other); |
| SimplePatternFormatterIdBuilder &operator=( |
| const SimplePatternFormatterIdBuilder &other); |
| }; |
| |
| void SimplePatternFormatterIdBuilder::appendTo( |
| UChar *buffer, int32_t *len) const { |
| int32_t origLen = *len; |
| int32_t kId = id; |
| for (int32_t i = origLen + idLen - 1; i >= origLen; i--) { |
| int32_t digit = kId % 10; |
| buffer[i] = digit + 0x30; |
| kId /= 10; |
| } |
| *len = origLen + idLen; |
| } |
| |
| void SimplePatternFormatterIdBuilder::add(UChar ch) { |
| id = id * 10 + (ch - 0x30); |
| idLen++; |
| } |
| |
| // Represents placeholder values. |
| class SimplePatternFormatterPlaceholderValues : public UMemory { |
| public: |
| SimplePatternFormatterPlaceholderValues( |
| const UnicodeString * const *values, |
| int32_t valuesCount); |
| |
| // Returns TRUE if appendTo value is at any index besides exceptIndex. |
| UBool isAppendToInAnyIndexExcept( |
| const UnicodeString &appendTo, int32_t exceptIndex) const; |
| |
| // For each appendTo value, stores the snapshot of it in its place. |
| void snapshotAppendTo(const UnicodeString &appendTo); |
| |
| // Returns the placeholder value at index. No range checking performed. |
| // Returned reference is valid for as long as this object exists. |
| const UnicodeString &get(int32_t index) const; |
| private: |
| const UnicodeString * const *fValues; |
| int32_t fValuesCount; |
| const UnicodeString *fAppendTo; |
| UnicodeString fAppendToCopy; |
| SimplePatternFormatterPlaceholderValues( |
| const SimplePatternFormatterPlaceholderValues &); |
| SimplePatternFormatterPlaceholderValues &operator=( |
| const SimplePatternFormatterPlaceholderValues &); |
| }; |
| |
| SimplePatternFormatterPlaceholderValues::SimplePatternFormatterPlaceholderValues( |
| const UnicodeString * const *values, |
| int32_t valuesCount) |
| : fValues(values), |
| fValuesCount(valuesCount), |
| fAppendTo(NULL), |
| fAppendToCopy() { |
| } |
| |
| UBool SimplePatternFormatterPlaceholderValues::isAppendToInAnyIndexExcept( |
| const UnicodeString &appendTo, int32_t exceptIndex) const { |
| for (int32_t i = 0; i < fValuesCount; ++i) { |
| if (i != exceptIndex && fValues[i] == &appendTo) { |
| return TRUE; |
| } |
| } |
| return FALSE; |
| } |
| |
| void SimplePatternFormatterPlaceholderValues::snapshotAppendTo( |
| const UnicodeString &appendTo) { |
| fAppendTo = &appendTo; |
| fAppendToCopy = appendTo; |
| } |
| |
| const UnicodeString &SimplePatternFormatterPlaceholderValues::get( |
| int32_t index) const { |
| if (fAppendTo == NULL || fAppendTo != fValues[index]) { |
| return *fValues[index]; |
| } |
| return fAppendToCopy; |
| } |
| |
| SimplePatternFormatter::SimplePatternFormatter() : |
| noPlaceholders(), |
| placeholders(), |
| placeholderSize(0), |
| placeholderCount(0), |
| firstPlaceholderReused(FALSE) { |
| } |
| |
| SimplePatternFormatter::SimplePatternFormatter(const UnicodeString &pattern) : |
| noPlaceholders(), |
| placeholders(), |
| placeholderSize(0), |
| placeholderCount(0), |
| firstPlaceholderReused(FALSE) { |
| UErrorCode status = U_ZERO_ERROR; |
| compile(pattern, status); |
| } |
| |
| SimplePatternFormatter::SimplePatternFormatter( |
| const SimplePatternFormatter &other) : |
| noPlaceholders(other.noPlaceholders), |
| placeholders(), |
| placeholderSize(0), |
| placeholderCount(other.placeholderCount), |
| firstPlaceholderReused(other.firstPlaceholderReused) { |
| placeholderSize = ensureCapacity(other.placeholderSize); |
| uprv_memcpy( |
| placeholders.getAlias(), |
| other.placeholders.getAlias(), |
| placeholderSize * sizeof(PlaceholderInfo)); |
| } |
| |
| SimplePatternFormatter &SimplePatternFormatter::operator=( |
| const SimplePatternFormatter& other) { |
| if (this == &other) { |
| return *this; |
| } |
| noPlaceholders = other.noPlaceholders; |
| placeholderSize = ensureCapacity(other.placeholderSize); |
| placeholderCount = other.placeholderCount; |
| firstPlaceholderReused = other.firstPlaceholderReused; |
| uprv_memcpy( |
| placeholders.getAlias(), |
| other.placeholders.getAlias(), |
| placeholderSize * sizeof(PlaceholderInfo)); |
| return *this; |
| } |
| |
| SimplePatternFormatter::~SimplePatternFormatter() { |
| } |
| |
| UBool SimplePatternFormatter::compile( |
| const UnicodeString &pattern, UErrorCode &status) { |
| if (U_FAILURE(status)) { |
| return FALSE; |
| } |
| const UChar *patternBuffer = pattern.getBuffer(); |
| int32_t patternLength = pattern.length(); |
| UChar *buffer = noPlaceholders.getBuffer(patternLength); |
| int32_t len = 0; |
| placeholderSize = 0; |
| placeholderCount = 0; |
| SimplePatternFormatterCompileState state = INIT; |
| SimplePatternFormatterIdBuilder idBuilder; |
| for (int32_t i = 0; i < patternLength; ++i) { |
| UChar ch = patternBuffer[i]; |
| switch (state) { |
| case INIT: |
| if (ch == 0x27) { |
| state = APOSTROPHE; |
| } else if (ch == 0x7B) { |
| state = PLACEHOLDER; |
| idBuilder.reset(); |
| } else { |
| buffer[len++] = ch; |
| } |
| break; |
| case APOSTROPHE: |
| if (ch == 0x27) { |
| buffer[len++] = 0x27; |
| } else if (ch == 0x7B) { |
| buffer[len++] = 0x7B; |
| } else { |
| buffer[len++] = 0x27; |
| buffer[len++] = ch; |
| } |
| state = INIT; |
| break; |
| case PLACEHOLDER: |
| if (ch >= 0x30 && ch <= 0x39) { |
| idBuilder.add(ch); |
| } else if (ch == 0x7D && idBuilder.isValid()) { |
| if (!addPlaceholder(idBuilder.getId(), len)) { |
| status = U_MEMORY_ALLOCATION_ERROR; |
| return FALSE; |
| } |
| state = INIT; |
| } else { |
| buffer[len++] = 0x7B; |
| idBuilder.appendTo(buffer, &len); |
| buffer[len++] = ch; |
| state = INIT; |
| } |
| break; |
| default: |
| U_ASSERT(FALSE); |
| break; |
| } |
| } |
| switch (state) { |
| case INIT: |
| break; |
| case APOSTROPHE: |
| buffer[len++] = 0x27; |
| break; |
| case PLACEHOLDER: |
| buffer[len++] = 0X7B; |
| idBuilder.appendTo(buffer, &len); |
| break; |
| default: |
| U_ASSERT(false); |
| break; |
| } |
| noPlaceholders.releaseBuffer(len); |
| return TRUE; |
| } |
| |
| UnicodeString& SimplePatternFormatter::format( |
| const UnicodeString &arg0, |
| UnicodeString &appendTo, |
| UErrorCode &status) const { |
| const UnicodeString *params[] = {&arg0}; |
| return formatAndAppend( |
| params, |
| UPRV_LENGTHOF(params), |
| appendTo, |
| NULL, |
| 0, |
| status); |
| } |
| |
| UnicodeString& SimplePatternFormatter::format( |
| const UnicodeString &arg0, |
| const UnicodeString &arg1, |
| UnicodeString &appendTo, |
| UErrorCode &status) const { |
| const UnicodeString *params[] = {&arg0, &arg1}; |
| return formatAndAppend( |
| params, |
| UPRV_LENGTHOF(params), |
| appendTo, |
| NULL, |
| 0, |
| status); |
| } |
| |
| UnicodeString& SimplePatternFormatter::format( |
| const UnicodeString &arg0, |
| const UnicodeString &arg1, |
| const UnicodeString &arg2, |
| UnicodeString &appendTo, |
| UErrorCode &status) const { |
| const UnicodeString *params[] = {&arg0, &arg1, &arg2}; |
| return formatAndAppend( |
| params, |
| UPRV_LENGTHOF(params), |
| appendTo, |
| NULL, |
| 0, |
| status); |
| } |
| |
| static void updatePlaceholderOffset( |
| int32_t placeholderId, |
| int32_t placeholderOffset, |
| int32_t *offsetArray, |
| int32_t offsetArrayLength) { |
| if (placeholderId < offsetArrayLength) { |
| offsetArray[placeholderId] = placeholderOffset; |
| } |
| } |
| |
| static void appendRange( |
| const UnicodeString &src, |
| int32_t start, |
| int32_t end, |
| UnicodeString &dest) { |
| // This check improves performance significantly. |
| if (start == end) { |
| return; |
| } |
| dest.append(src, start, end - start); |
| } |
| |
| UnicodeString& SimplePatternFormatter::formatAndAppend( |
| const UnicodeString * const *placeholderValues, |
| int32_t placeholderValueCount, |
| UnicodeString &appendTo, |
| int32_t *offsetArray, |
| int32_t offsetArrayLength, |
| UErrorCode &status) const { |
| if (U_FAILURE(status)) { |
| return appendTo; |
| } |
| if (isInvalidArray(placeholderValues, placeholderValueCount) |
| || isInvalidArray(offsetArray, offsetArrayLength)) { |
| status = U_ILLEGAL_ARGUMENT_ERROR; |
| return appendTo; |
| } |
| if (placeholderValueCount < placeholderCount) { |
| status = U_ILLEGAL_ARGUMENT_ERROR; |
| return appendTo; |
| } |
| |
| // Since we are disallowing parameter values that are the same as |
| // appendTo, we have to check all placeholderValues as opposed to |
| // the first placeholderCount placeholder values. |
| SimplePatternFormatterPlaceholderValues values( |
| placeholderValues, placeholderValueCount); |
| if (values.isAppendToInAnyIndexExcept(appendTo, -1)) { |
| status = U_ILLEGAL_ARGUMENT_ERROR; |
| return appendTo; |
| } |
| return formatAndAppend( |
| values, |
| appendTo, |
| offsetArray, |
| offsetArrayLength); |
| } |
| |
| UnicodeString& SimplePatternFormatter::formatAndReplace( |
| const UnicodeString * const *placeholderValues, |
| int32_t placeholderValueCount, |
| UnicodeString &result, |
| int32_t *offsetArray, |
| int32_t offsetArrayLength, |
| UErrorCode &status) const { |
| if (U_FAILURE(status)) { |
| return result; |
| } |
| if (isInvalidArray(placeholderValues, placeholderValueCount) |
| || isInvalidArray(offsetArray, offsetArrayLength)) { |
| status = U_ILLEGAL_ARGUMENT_ERROR; |
| return result; |
| } |
| if (placeholderValueCount < placeholderCount) { |
| status = U_ILLEGAL_ARGUMENT_ERROR; |
| return result; |
| } |
| SimplePatternFormatterPlaceholderValues values( |
| placeholderValues, placeholderCount); |
| int32_t placeholderAtStart = getUniquePlaceholderAtStart(); |
| |
| // If pattern starts with a unique placeholder and that placeholder |
| // value is result, we may be able to optimize by just appending to result. |
| if (placeholderAtStart >= 0 |
| && placeholderValues[placeholderAtStart] == &result) { |
| |
| // If result is the value for other placeholders, call off optimization. |
| if (values.isAppendToInAnyIndexExcept(result, placeholderAtStart)) { |
| values.snapshotAppendTo(result); |
| result.remove(); |
| return formatAndAppend( |
| values, |
| result, |
| offsetArray, |
| offsetArrayLength); |
| } |
| |
| // Otherwise we can optimize |
| formatAndAppend( |
| values, |
| result, |
| offsetArray, |
| offsetArrayLength); |
| |
| // We have to make the offset for the placeholderAtStart |
| // placeholder be 0. Otherwise it would be the length of the |
| // previous value of result. |
| if (offsetArrayLength > placeholderAtStart) { |
| offsetArray[placeholderAtStart] = 0; |
| } |
| return result; |
| } |
| if (values.isAppendToInAnyIndexExcept(result, -1)) { |
| values.snapshotAppendTo(result); |
| } |
| result.remove(); |
| return formatAndAppend( |
| values, |
| result, |
| offsetArray, |
| offsetArrayLength); |
| } |
| |
| UnicodeString& SimplePatternFormatter::formatAndAppend( |
| const SimplePatternFormatterPlaceholderValues &values, |
| UnicodeString &appendTo, |
| int32_t *offsetArray, |
| int32_t offsetArrayLength) const { |
| for (int32_t i = 0; i < offsetArrayLength; ++i) { |
| offsetArray[i] = -1; |
| } |
| if (placeholderSize == 0) { |
| appendTo.append(noPlaceholders); |
| return appendTo; |
| } |
| appendRange( |
| noPlaceholders, |
| 0, |
| placeholders[0].offset, |
| appendTo); |
| updatePlaceholderOffset( |
| placeholders[0].id, |
| appendTo.length(), |
| offsetArray, |
| offsetArrayLength); |
| const UnicodeString *placeholderValue = &values.get(placeholders[0].id); |
| if (placeholderValue != &appendTo) { |
| appendTo.append(*placeholderValue); |
| } |
| for (int32_t i = 1; i < placeholderSize; ++i) { |
| appendRange( |
| noPlaceholders, |
| placeholders[i - 1].offset, |
| placeholders[i].offset, |
| appendTo); |
| updatePlaceholderOffset( |
| placeholders[i].id, |
| appendTo.length(), |
| offsetArray, |
| offsetArrayLength); |
| placeholderValue = &values.get(placeholders[i].id); |
| if (placeholderValue != &appendTo) { |
| appendTo.append(*placeholderValue); |
| } |
| } |
| appendRange( |
| noPlaceholders, |
| placeholders[placeholderSize - 1].offset, |
| noPlaceholders.length(), |
| appendTo); |
| return appendTo; |
| } |
| |
| int32_t SimplePatternFormatter::getUniquePlaceholderAtStart() const { |
| if (placeholderSize == 0 |
| || firstPlaceholderReused || placeholders[0].offset != 0) { |
| return -1; |
| } |
| return placeholders[0].id; |
| } |
| |
| int32_t SimplePatternFormatter::ensureCapacity( |
| int32_t desiredCapacity, int32_t allocationSize) { |
| if (allocationSize < desiredCapacity) { |
| allocationSize = desiredCapacity; |
| } |
| if (desiredCapacity <= placeholders.getCapacity()) { |
| return desiredCapacity; |
| } |
| // allocate new buffer |
| if (placeholders.resize(allocationSize, placeholderSize) == NULL) { |
| return placeholders.getCapacity(); |
| } |
| return desiredCapacity; |
| } |
| |
| UBool SimplePatternFormatter::addPlaceholder(int32_t id, int32_t offset) { |
| if (ensureCapacity(placeholderSize + 1, 2 * placeholderSize) < placeholderSize + 1) { |
| return FALSE; |
| } |
| ++placeholderSize; |
| PlaceholderInfo *placeholderEnd = &placeholders[placeholderSize - 1]; |
| placeholderEnd->offset = offset; |
| placeholderEnd->id = id; |
| if (id >= placeholderCount) { |
| placeholderCount = id + 1; |
| } |
| if (placeholderSize > 1 |
| && placeholders[placeholderSize - 1].id == placeholders[0].id) { |
| firstPlaceholderReused = TRUE; |
| } |
| return TRUE; |
| } |
| |
| U_NAMESPACE_END |