| // © 2017 and later: Unicode, Inc. and others. |
| // License & terms of use: http://www.unicode.org/copyright.html |
| |
| // umutablecptrie.cpp (inspired by utrie2_builder.cpp) |
| // created: 2017dec29 Markus W. Scherer |
| |
| // #define UCPTRIE_DEBUG |
| #ifdef UCPTRIE_DEBUG |
| # include <stdio.h> |
| #endif |
| |
| #include "unicode/utypes.h" |
| #include "unicode/ucptrie.h" |
| #include "unicode/umutablecptrie.h" |
| #include "unicode/uobject.h" |
| #include "unicode/utf16.h" |
| #include "cmemory.h" |
| #include "uassert.h" |
| #include "ucptrie_impl.h" |
| |
| // ICU-20235 In case Microsoft math.h has defined this, undefine it. |
| #ifdef OVERFLOW |
| #undef OVERFLOW |
| #endif |
| |
| U_NAMESPACE_BEGIN |
| |
| namespace { |
| |
| constexpr int32_t MAX_UNICODE = 0x10ffff; |
| |
| constexpr int32_t UNICODE_LIMIT = 0x110000; |
| constexpr int32_t BMP_LIMIT = 0x10000; |
| constexpr int32_t ASCII_LIMIT = 0x80; |
| |
| constexpr int32_t I_LIMIT = UNICODE_LIMIT >> UCPTRIE_SHIFT_3; |
| constexpr int32_t BMP_I_LIMIT = BMP_LIMIT >> UCPTRIE_SHIFT_3; |
| constexpr int32_t ASCII_I_LIMIT = ASCII_LIMIT >> UCPTRIE_SHIFT_3; |
| |
| constexpr int32_t SMALL_DATA_BLOCKS_PER_BMP_BLOCK = (1 << (UCPTRIE_FAST_SHIFT - UCPTRIE_SHIFT_3)); |
| |
| // Flag values for data blocks. |
| constexpr uint8_t ALL_SAME = 0; |
| constexpr uint8_t MIXED = 1; |
| constexpr uint8_t SAME_AS = 2; |
| |
| /** Start with allocation of 16k data entries. */ |
| constexpr int32_t INITIAL_DATA_LENGTH = ((int32_t)1 << 14); |
| |
| /** Grow about 8x each time. */ |
| constexpr int32_t MEDIUM_DATA_LENGTH = ((int32_t)1 << 17); |
| |
| /** |
| * Maximum length of the build-time data array. |
| * One entry per 0x110000 code points. |
| */ |
| constexpr int32_t MAX_DATA_LENGTH = UNICODE_LIMIT; |
| |
| // Flag values for index-3 blocks while compacting/building. |
| constexpr uint8_t I3_NULL = 0; |
| constexpr uint8_t I3_BMP = 1; |
| constexpr uint8_t I3_16 = 2; |
| constexpr uint8_t I3_18 = 3; |
| |
| constexpr int32_t INDEX_3_18BIT_BLOCK_LENGTH = UCPTRIE_INDEX_3_BLOCK_LENGTH + UCPTRIE_INDEX_3_BLOCK_LENGTH / 8; |
| |
| class AllSameBlocks; |
| class MixedBlocks; |
| |
| class MutableCodePointTrie : public UMemory { |
| public: |
| MutableCodePointTrie(uint32_t initialValue, uint32_t errorValue, UErrorCode &errorCode); |
| MutableCodePointTrie(const MutableCodePointTrie &other, UErrorCode &errorCode); |
| MutableCodePointTrie(const MutableCodePointTrie &other) = delete; |
| ~MutableCodePointTrie(); |
| |
| MutableCodePointTrie &operator=(const MutableCodePointTrie &other) = delete; |
| |
| static MutableCodePointTrie *fromUCPMap(const UCPMap *map, UErrorCode &errorCode); |
| static MutableCodePointTrie *fromUCPTrie(const UCPTrie *trie, UErrorCode &errorCode); |
| |
| uint32_t get(UChar32 c) const; |
| int32_t getRange(UChar32 start, UCPMapValueFilter *filter, const void *context, |
| uint32_t *pValue) const; |
| |
| void set(UChar32 c, uint32_t value, UErrorCode &errorCode); |
| void setRange(UChar32 start, UChar32 end, uint32_t value, UErrorCode &errorCode); |
| |
| UCPTrie *build(UCPTrieType type, UCPTrieValueWidth valueWidth, UErrorCode &errorCode); |
| |
| private: |
| void clear(); |
| |
| bool ensureHighStart(UChar32 c); |
| int32_t allocDataBlock(int32_t blockLength); |
| int32_t getDataBlock(int32_t i); |
| |
| void maskValues(uint32_t mask); |
| UChar32 findHighStart() const; |
| int32_t compactWholeDataBlocks(int32_t fastILimit, AllSameBlocks &allSameBlocks); |
| int32_t compactData( |
| int32_t fastILimit, uint32_t *newData, int32_t newDataCapacity, |
| int32_t dataNullIndex, MixedBlocks &mixedBlocks, UErrorCode &errorCode); |
| int32_t compactIndex(int32_t fastILimit, MixedBlocks &mixedBlocks, UErrorCode &errorCode); |
| int32_t compactTrie(int32_t fastILimit, UErrorCode &errorCode); |
| |
| uint32_t *index = nullptr; |
| int32_t indexCapacity = 0; |
| int32_t index3NullOffset = -1; |
| uint32_t *data = nullptr; |
| int32_t dataCapacity = 0; |
| int32_t dataLength = 0; |
| int32_t dataNullOffset = -1; |
| |
| uint32_t origInitialValue; |
| uint32_t initialValue; |
| uint32_t errorValue; |
| UChar32 highStart; |
| uint32_t highValue; |
| #ifdef UCPTRIE_DEBUG |
| public: |
| const char *name; |
| #endif |
| private: |
| /** Temporary array while building the final data. */ |
| uint16_t *index16 = nullptr; |
| uint8_t flags[UNICODE_LIMIT >> UCPTRIE_SHIFT_3]; |
| }; |
| |
| MutableCodePointTrie::MutableCodePointTrie(uint32_t iniValue, uint32_t errValue, UErrorCode &errorCode) : |
| origInitialValue(iniValue), initialValue(iniValue), errorValue(errValue), |
| highStart(0), highValue(initialValue) |
| #ifdef UCPTRIE_DEBUG |
| , name("open") |
| #endif |
| { |
| if (U_FAILURE(errorCode)) { return; } |
| index = (uint32_t *)uprv_malloc(BMP_I_LIMIT * 4); |
| data = (uint32_t *)uprv_malloc(INITIAL_DATA_LENGTH * 4); |
| if (index == nullptr || data == nullptr) { |
| errorCode = U_MEMORY_ALLOCATION_ERROR; |
| return; |
| } |
| indexCapacity = BMP_I_LIMIT; |
| dataCapacity = INITIAL_DATA_LENGTH; |
| } |
| |
| MutableCodePointTrie::MutableCodePointTrie(const MutableCodePointTrie &other, UErrorCode &errorCode) : |
| index3NullOffset(other.index3NullOffset), |
| dataNullOffset(other.dataNullOffset), |
| origInitialValue(other.origInitialValue), initialValue(other.initialValue), |
| errorValue(other.errorValue), |
| highStart(other.highStart), highValue(other.highValue) |
| #ifdef UCPTRIE_DEBUG |
| , name("mutable clone") |
| #endif |
| { |
| if (U_FAILURE(errorCode)) { return; } |
| int32_t iCapacity = highStart <= BMP_LIMIT ? BMP_I_LIMIT : I_LIMIT; |
| index = (uint32_t *)uprv_malloc(iCapacity * 4); |
| data = (uint32_t *)uprv_malloc(other.dataCapacity * 4); |
| if (index == nullptr || data == nullptr) { |
| errorCode = U_MEMORY_ALLOCATION_ERROR; |
| return; |
| } |
| indexCapacity = iCapacity; |
| dataCapacity = other.dataCapacity; |
| |
| int32_t iLimit = highStart >> UCPTRIE_SHIFT_3; |
| uprv_memcpy(flags, other.flags, iLimit); |
| uprv_memcpy(index, other.index, iLimit * 4); |
| uprv_memcpy(data, other.data, (size_t)other.dataLength * 4); |
| dataLength = other.dataLength; |
| U_ASSERT(other.index16 == nullptr); |
| } |
| |
| MutableCodePointTrie::~MutableCodePointTrie() { |
| uprv_free(index); |
| uprv_free(data); |
| uprv_free(index16); |
| } |
| |
| MutableCodePointTrie *MutableCodePointTrie::fromUCPMap(const UCPMap *map, UErrorCode &errorCode) { |
| // Use the highValue as the initialValue to reduce the highStart. |
| uint32_t errorValue = ucpmap_get(map, -1); |
| uint32_t initialValue = ucpmap_get(map, 0x10ffff); |
| LocalPointer<MutableCodePointTrie> mutableTrie( |
| new MutableCodePointTrie(initialValue, errorValue, errorCode), |
| errorCode); |
| if (U_FAILURE(errorCode)) { |
| return nullptr; |
| } |
| UChar32 start = 0, end; |
| uint32_t value; |
| while ((end = ucpmap_getRange(map, start, UCPMAP_RANGE_NORMAL, 0, |
| nullptr, nullptr, &value)) >= 0) { |
| if (value != initialValue) { |
| if (start == end) { |
| mutableTrie->set(start, value, errorCode); |
| } else { |
| mutableTrie->setRange(start, end, value, errorCode); |
| } |
| } |
| start = end + 1; |
| } |
| if (U_SUCCESS(errorCode)) { |
| return mutableTrie.orphan(); |
| } else { |
| return nullptr; |
| } |
| } |
| |
| MutableCodePointTrie *MutableCodePointTrie::fromUCPTrie(const UCPTrie *trie, UErrorCode &errorCode) { |
| // Use the highValue as the initialValue to reduce the highStart. |
| uint32_t errorValue; |
| uint32_t initialValue; |
| switch (trie->valueWidth) { |
| case UCPTRIE_VALUE_BITS_16: |
| errorValue = trie->data.ptr16[trie->dataLength - UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET]; |
| initialValue = trie->data.ptr16[trie->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET]; |
| break; |
| case UCPTRIE_VALUE_BITS_32: |
| errorValue = trie->data.ptr32[trie->dataLength - UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET]; |
| initialValue = trie->data.ptr32[trie->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET]; |
| break; |
| case UCPTRIE_VALUE_BITS_8: |
| errorValue = trie->data.ptr8[trie->dataLength - UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET]; |
| initialValue = trie->data.ptr8[trie->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET]; |
| break; |
| default: |
| // Unreachable if the trie is properly initialized. |
| errorCode = U_ILLEGAL_ARGUMENT_ERROR; |
| return nullptr; |
| } |
| LocalPointer<MutableCodePointTrie> mutableTrie( |
| new MutableCodePointTrie(initialValue, errorValue, errorCode), |
| errorCode); |
| if (U_FAILURE(errorCode)) { |
| return nullptr; |
| } |
| UChar32 start = 0, end; |
| uint32_t value; |
| while ((end = ucptrie_getRange(trie, start, UCPMAP_RANGE_NORMAL, 0, |
| nullptr, nullptr, &value)) >= 0) { |
| if (value != initialValue) { |
| if (start == end) { |
| mutableTrie->set(start, value, errorCode); |
| } else { |
| mutableTrie->setRange(start, end, value, errorCode); |
| } |
| } |
| start = end + 1; |
| } |
| if (U_SUCCESS(errorCode)) { |
| return mutableTrie.orphan(); |
| } else { |
| return nullptr; |
| } |
| } |
| |
| void MutableCodePointTrie::clear() { |
| index3NullOffset = dataNullOffset = -1; |
| dataLength = 0; |
| highValue = initialValue = origInitialValue; |
| highStart = 0; |
| uprv_free(index16); |
| index16 = nullptr; |
| } |
| |
| uint32_t MutableCodePointTrie::get(UChar32 c) const { |
| if ((uint32_t)c > MAX_UNICODE) { |
| return errorValue; |
| } |
| if (c >= highStart) { |
| return highValue; |
| } |
| int32_t i = c >> UCPTRIE_SHIFT_3; |
| if (flags[i] == ALL_SAME) { |
| return index[i]; |
| } else { |
| return data[index[i] + (c & UCPTRIE_SMALL_DATA_MASK)]; |
| } |
| } |
| |
| inline uint32_t maybeFilterValue(uint32_t value, uint32_t initialValue, uint32_t nullValue, |
| UCPMapValueFilter *filter, const void *context) { |
| if (value == initialValue) { |
| value = nullValue; |
| } else if (filter != nullptr) { |
| value = filter(context, value); |
| } |
| return value; |
| } |
| |
| UChar32 MutableCodePointTrie::getRange( |
| UChar32 start, UCPMapValueFilter *filter, const void *context, |
| uint32_t *pValue) const { |
| if ((uint32_t)start > MAX_UNICODE) { |
| return U_SENTINEL; |
| } |
| if (start >= highStart) { |
| if (pValue != nullptr) { |
| uint32_t value = highValue; |
| if (filter != nullptr) { value = filter(context, value); } |
| *pValue = value; |
| } |
| return MAX_UNICODE; |
| } |
| uint32_t nullValue = initialValue; |
| if (filter != nullptr) { nullValue = filter(context, nullValue); } |
| UChar32 c = start; |
| uint32_t trieValue, value; |
| bool haveValue = false; |
| int32_t i = c >> UCPTRIE_SHIFT_3; |
| do { |
| if (flags[i] == ALL_SAME) { |
| uint32_t trieValue2 = index[i]; |
| if (haveValue) { |
| if (trieValue2 != trieValue) { |
| if (filter == nullptr || |
| maybeFilterValue(trieValue2, initialValue, nullValue, |
| filter, context) != value) { |
| return c - 1; |
| } |
| trieValue = trieValue2; // may or may not help |
| } |
| } else { |
| trieValue = trieValue2; |
| value = maybeFilterValue(trieValue2, initialValue, nullValue, filter, context); |
| if (pValue != nullptr) { *pValue = value; } |
| haveValue = true; |
| } |
| c = (c + UCPTRIE_SMALL_DATA_BLOCK_LENGTH) & ~UCPTRIE_SMALL_DATA_MASK; |
| } else /* MIXED */ { |
| int32_t di = index[i] + (c & UCPTRIE_SMALL_DATA_MASK); |
| uint32_t trieValue2 = data[di]; |
| if (haveValue) { |
| if (trieValue2 != trieValue) { |
| if (filter == nullptr || |
| maybeFilterValue(trieValue2, initialValue, nullValue, |
| filter, context) != value) { |
| return c - 1; |
| } |
| trieValue = trieValue2; // may or may not help |
| } |
| } else { |
| trieValue = trieValue2; |
| value = maybeFilterValue(trieValue2, initialValue, nullValue, filter, context); |
| if (pValue != nullptr) { *pValue = value; } |
| haveValue = true; |
| } |
| while ((++c & UCPTRIE_SMALL_DATA_MASK) != 0) { |
| trieValue2 = data[++di]; |
| if (trieValue2 != trieValue) { |
| if (filter == nullptr || |
| maybeFilterValue(trieValue2, initialValue, nullValue, |
| filter, context) != value) { |
| return c - 1; |
| } |
| } |
| trieValue = trieValue2; // may or may not help |
| } |
| } |
| ++i; |
| } while (c < highStart); |
| U_ASSERT(haveValue); |
| if (maybeFilterValue(highValue, initialValue, nullValue, |
| filter, context) != value) { |
| return c - 1; |
| } else { |
| return MAX_UNICODE; |
| } |
| } |
| |
| void |
| writeBlock(uint32_t *block, uint32_t value) { |
| uint32_t *limit = block + UCPTRIE_SMALL_DATA_BLOCK_LENGTH; |
| while (block < limit) { |
| *block++ = value; |
| } |
| } |
| |
| bool MutableCodePointTrie::ensureHighStart(UChar32 c) { |
| if (c >= highStart) { |
| // Round up to a UCPTRIE_CP_PER_INDEX_2_ENTRY boundary to simplify compaction. |
| c = (c + UCPTRIE_CP_PER_INDEX_2_ENTRY) & ~(UCPTRIE_CP_PER_INDEX_2_ENTRY - 1); |
| int32_t i = highStart >> UCPTRIE_SHIFT_3; |
| int32_t iLimit = c >> UCPTRIE_SHIFT_3; |
| if (iLimit > indexCapacity) { |
| uint32_t *newIndex = (uint32_t *)uprv_malloc(I_LIMIT * 4); |
| if (newIndex == nullptr) { return false; } |
| uprv_memcpy(newIndex, index, i * 4); |
| uprv_free(index); |
| index = newIndex; |
| indexCapacity = I_LIMIT; |
| } |
| do { |
| flags[i] = ALL_SAME; |
| index[i] = initialValue; |
| } while(++i < iLimit); |
| highStart = c; |
| } |
| return true; |
| } |
| |
| int32_t MutableCodePointTrie::allocDataBlock(int32_t blockLength) { |
| int32_t newBlock = dataLength; |
| int32_t newTop = newBlock + blockLength; |
| if (newTop > dataCapacity) { |
| int32_t capacity; |
| if (dataCapacity < MEDIUM_DATA_LENGTH) { |
| capacity = MEDIUM_DATA_LENGTH; |
| } else if (dataCapacity < MAX_DATA_LENGTH) { |
| capacity = MAX_DATA_LENGTH; |
| } else { |
| // Should never occur. |
| // Either MAX_DATA_LENGTH is incorrect, |
| // or the code writes more values than should be possible. |
| return -1; |
| } |
| uint32_t *newData = (uint32_t *)uprv_malloc(capacity * 4); |
| if (newData == nullptr) { |
| return -1; |
| } |
| uprv_memcpy(newData, data, (size_t)dataLength * 4); |
| uprv_free(data); |
| data = newData; |
| dataCapacity = capacity; |
| } |
| dataLength = newTop; |
| return newBlock; |
| } |
| |
| /** |
| * No error checking for illegal arguments. |
| * |
| * @return -1 if no new data block available (out of memory in data array) |
| * @internal |
| */ |
| int32_t MutableCodePointTrie::getDataBlock(int32_t i) { |
| if (flags[i] == MIXED) { |
| return index[i]; |
| } |
| if (i < BMP_I_LIMIT) { |
| int32_t newBlock = allocDataBlock(UCPTRIE_FAST_DATA_BLOCK_LENGTH); |
| if (newBlock < 0) { return newBlock; } |
| int32_t iStart = i & ~(SMALL_DATA_BLOCKS_PER_BMP_BLOCK -1); |
| int32_t iLimit = iStart + SMALL_DATA_BLOCKS_PER_BMP_BLOCK; |
| do { |
| U_ASSERT(flags[iStart] == ALL_SAME); |
| writeBlock(data + newBlock, index[iStart]); |
| flags[iStart] = MIXED; |
| index[iStart++] = newBlock; |
| newBlock += UCPTRIE_SMALL_DATA_BLOCK_LENGTH; |
| } while (iStart < iLimit); |
| return index[i]; |
| } else { |
| int32_t newBlock = allocDataBlock(UCPTRIE_SMALL_DATA_BLOCK_LENGTH); |
| if (newBlock < 0) { return newBlock; } |
| writeBlock(data + newBlock, index[i]); |
| flags[i] = MIXED; |
| index[i] = newBlock; |
| return newBlock; |
| } |
| } |
| |
| void MutableCodePointTrie::set(UChar32 c, uint32_t value, UErrorCode &errorCode) { |
| if (U_FAILURE(errorCode)) { |
| return; |
| } |
| if ((uint32_t)c > MAX_UNICODE) { |
| errorCode = U_ILLEGAL_ARGUMENT_ERROR; |
| return; |
| } |
| |
| int32_t block; |
| if (!ensureHighStart(c) || (block = getDataBlock(c >> UCPTRIE_SHIFT_3)) < 0) { |
| errorCode = U_MEMORY_ALLOCATION_ERROR; |
| return; |
| } |
| |
| data[block + (c & UCPTRIE_SMALL_DATA_MASK)] = value; |
| } |
| |
| void |
| fillBlock(uint32_t *block, UChar32 start, UChar32 limit, uint32_t value) { |
| uint32_t *pLimit = block + limit; |
| block += start; |
| while (block < pLimit) { |
| *block++ = value; |
| } |
| } |
| |
| void MutableCodePointTrie::setRange(UChar32 start, UChar32 end, uint32_t value, UErrorCode &errorCode) { |
| if (U_FAILURE(errorCode)) { |
| return; |
| } |
| if ((uint32_t)start > MAX_UNICODE || (uint32_t)end > MAX_UNICODE || start > end) { |
| errorCode = U_ILLEGAL_ARGUMENT_ERROR; |
| return; |
| } |
| if (!ensureHighStart(end)) { |
| errorCode = U_MEMORY_ALLOCATION_ERROR; |
| return; |
| } |
| |
| UChar32 limit = end + 1; |
| if (start & UCPTRIE_SMALL_DATA_MASK) { |
| // Set partial block at [start..following block boundary[. |
| int32_t block = getDataBlock(start >> UCPTRIE_SHIFT_3); |
| if (block < 0) { |
| errorCode = U_MEMORY_ALLOCATION_ERROR; |
| return; |
| } |
| |
| UChar32 nextStart = (start + UCPTRIE_SMALL_DATA_MASK) & ~UCPTRIE_SMALL_DATA_MASK; |
| if (nextStart <= limit) { |
| fillBlock(data + block, start & UCPTRIE_SMALL_DATA_MASK, UCPTRIE_SMALL_DATA_BLOCK_LENGTH, |
| value); |
| start = nextStart; |
| } else { |
| fillBlock(data + block, start & UCPTRIE_SMALL_DATA_MASK, limit & UCPTRIE_SMALL_DATA_MASK, |
| value); |
| return; |
| } |
| } |
| |
| // Number of positions in the last, partial block. |
| int32_t rest = limit & UCPTRIE_SMALL_DATA_MASK; |
| |
| // Round down limit to a block boundary. |
| limit &= ~UCPTRIE_SMALL_DATA_MASK; |
| |
| // Iterate over all-value blocks. |
| while (start < limit) { |
| int32_t i = start >> UCPTRIE_SHIFT_3; |
| if (flags[i] == ALL_SAME) { |
| index[i] = value; |
| } else /* MIXED */ { |
| fillBlock(data + index[i], 0, UCPTRIE_SMALL_DATA_BLOCK_LENGTH, value); |
| } |
| start += UCPTRIE_SMALL_DATA_BLOCK_LENGTH; |
| } |
| |
| if (rest > 0) { |
| // Set partial block at [last block boundary..limit[. |
| int32_t block = getDataBlock(start >> UCPTRIE_SHIFT_3); |
| if (block < 0) { |
| errorCode = U_MEMORY_ALLOCATION_ERROR; |
| return; |
| } |
| |
| fillBlock(data + block, 0, rest, value); |
| } |
| } |
| |
| /* compaction --------------------------------------------------------------- */ |
| |
| void MutableCodePointTrie::maskValues(uint32_t mask) { |
| initialValue &= mask; |
| errorValue &= mask; |
| highValue &= mask; |
| int32_t iLimit = highStart >> UCPTRIE_SHIFT_3; |
| for (int32_t i = 0; i < iLimit; ++i) { |
| if (flags[i] == ALL_SAME) { |
| index[i] &= mask; |
| } |
| } |
| for (int32_t i = 0; i < dataLength; ++i) { |
| data[i] &= mask; |
| } |
| } |
| |
| template<typename UIntA, typename UIntB> |
| bool equalBlocks(const UIntA *s, const UIntB *t, int32_t length) { |
| while (length > 0 && *s == *t) { |
| ++s; |
| ++t; |
| --length; |
| } |
| return length == 0; |
| } |
| |
| bool allValuesSameAs(const uint32_t *p, int32_t length, uint32_t value) { |
| const uint32_t *pLimit = p + length; |
| while (p < pLimit && *p == value) { ++p; } |
| return p == pLimit; |
| } |
| |
| /** Search for an identical block. */ |
| int32_t findSameBlock(const uint16_t *p, int32_t pStart, int32_t length, |
| const uint16_t *q, int32_t qStart, int32_t blockLength) { |
| // Ensure that we do not even partially get past length. |
| length -= blockLength; |
| |
| q += qStart; |
| while (pStart <= length) { |
| if (equalBlocks(p + pStart, q, blockLength)) { |
| return pStart; |
| } |
| ++pStart; |
| } |
| return -1; |
| } |
| |
| int32_t findAllSameBlock(const uint32_t *p, int32_t start, int32_t limit, |
| uint32_t value, int32_t blockLength) { |
| // Ensure that we do not even partially get past limit. |
| limit -= blockLength; |
| |
| for (int32_t block = start; block <= limit; ++block) { |
| if (p[block] == value) { |
| for (int32_t i = 1;; ++i) { |
| if (i == blockLength) { |
| return block; |
| } |
| if (p[block + i] != value) { |
| block += i; |
| break; |
| } |
| } |
| } |
| } |
| return -1; |
| } |
| |
| /** |
| * Look for maximum overlap of the beginning of the other block |
| * with the previous, adjacent block. |
| */ |
| template<typename UIntA, typename UIntB> |
| int32_t getOverlap(const UIntA *p, int32_t length, |
| const UIntB *q, int32_t qStart, int32_t blockLength) { |
| int32_t overlap = blockLength - 1; |
| U_ASSERT(overlap <= length); |
| q += qStart; |
| while (overlap > 0 && !equalBlocks(p + (length - overlap), q, overlap)) { |
| --overlap; |
| } |
| return overlap; |
| } |
| |
| int32_t getAllSameOverlap(const uint32_t *p, int32_t length, uint32_t value, |
| int32_t blockLength) { |
| int32_t min = length - (blockLength - 1); |
| int32_t i = length; |
| while (min < i && p[i - 1] == value) { --i; } |
| return length - i; |
| } |
| |
| bool isStartOfSomeFastBlock(uint32_t dataOffset, const uint32_t index[], int32_t fastILimit) { |
| for (int32_t i = 0; i < fastILimit; i += SMALL_DATA_BLOCKS_PER_BMP_BLOCK) { |
| if (index[i] == dataOffset) { |
| return true; |
| } |
| } |
| return false; |
| } |
| |
| /** |
| * Finds the start of the last range in the trie by enumerating backward. |
| * Indexes for code points higher than this will be omitted. |
| */ |
| UChar32 MutableCodePointTrie::findHighStart() const { |
| int32_t i = highStart >> UCPTRIE_SHIFT_3; |
| while (i > 0) { |
| bool match; |
| if (flags[--i] == ALL_SAME) { |
| match = index[i] == highValue; |
| } else /* MIXED */ { |
| const uint32_t *p = data + index[i]; |
| for (int32_t j = 0;; ++j) { |
| if (j == UCPTRIE_SMALL_DATA_BLOCK_LENGTH) { |
| match = true; |
| break; |
| } |
| if (p[j] != highValue) { |
| match = false; |
| break; |
| } |
| } |
| } |
| if (!match) { |
| return (i + 1) << UCPTRIE_SHIFT_3; |
| } |
| } |
| return 0; |
| } |
| |
| class AllSameBlocks { |
| public: |
| static constexpr int32_t NEW_UNIQUE = -1; |
| static constexpr int32_t OVERFLOW = -2; |
| |
| AllSameBlocks() : length(0), mostRecent(-1) {} |
| |
| int32_t findOrAdd(int32_t index, int32_t count, uint32_t value) { |
| if (mostRecent >= 0 && values[mostRecent] == value) { |
| refCounts[mostRecent] += count; |
| return indexes[mostRecent]; |
| } |
| for (int32_t i = 0; i < length; ++i) { |
| if (values[i] == value) { |
| mostRecent = i; |
| refCounts[i] += count; |
| return indexes[i]; |
| } |
| } |
| if (length == CAPACITY) { |
| return OVERFLOW; |
| } |
| mostRecent = length; |
| indexes[length] = index; |
| values[length] = value; |
| refCounts[length++] = count; |
| return NEW_UNIQUE; |
| } |
| |
| /** Replaces the block which has the lowest reference count. */ |
| void add(int32_t index, int32_t count, uint32_t value) { |
| U_ASSERT(length == CAPACITY); |
| int32_t least = -1; |
| int32_t leastCount = I_LIMIT; |
| for (int32_t i = 0; i < length; ++i) { |
| U_ASSERT(values[i] != value); |
| if (refCounts[i] < leastCount) { |
| least = i; |
| leastCount = refCounts[i]; |
| } |
| } |
| U_ASSERT(least >= 0); |
| mostRecent = least; |
| indexes[least] = index; |
| values[least] = value; |
| refCounts[least] = count; |
| } |
| |
| int32_t findMostUsed() const { |
| if (length == 0) { return -1; } |
| int32_t max = -1; |
| int32_t maxCount = 0; |
| for (int32_t i = 0; i < length; ++i) { |
| if (refCounts[i] > maxCount) { |
| max = i; |
| maxCount = refCounts[i]; |
| } |
| } |
| return indexes[max]; |
| } |
| |
| private: |
| static constexpr int32_t CAPACITY = 32; |
| |
| int32_t length; |
| int32_t mostRecent; |
| |
| int32_t indexes[CAPACITY]; |
| uint32_t values[CAPACITY]; |
| int32_t refCounts[CAPACITY]; |
| }; |
| |
| // Custom hash table for mixed-value blocks to be found anywhere in the |
| // compacted data or index so far. |
| class MixedBlocks { |
| public: |
| MixedBlocks() {} |
| ~MixedBlocks() { |
| uprv_free(table); |
| } |
| |
| bool init(int32_t maxLength, int32_t newBlockLength) { |
| // We store actual data indexes + 1 to reserve 0 for empty entries. |
| int32_t maxDataIndex = maxLength - newBlockLength + 1; |
| int32_t newLength; |
| if (maxDataIndex <= 0xfff) { // 4k |
| newLength = 6007; |
| shift = 12; |
| mask = 0xfff; |
| } else if (maxDataIndex <= 0x7fff) { // 32k |
| newLength = 50021; |
| shift = 15; |
| mask = 0x7fff; |
| } else if (maxDataIndex <= 0x1ffff) { // 128k |
| newLength = 200003; |
| shift = 17; |
| mask = 0x1ffff; |
| } else { |
| // maxDataIndex up to around MAX_DATA_LENGTH, ca. 1.1M |
| newLength = 1500007; |
| shift = 21; |
| mask = 0x1fffff; |
| } |
| if (newLength > capacity) { |
| uprv_free(table); |
| table = (uint32_t *)uprv_malloc(newLength * 4); |
| if (table == nullptr) { |
| return false; |
| } |
| capacity = newLength; |
| } |
| length = newLength; |
| uprv_memset(table, 0, length * 4); |
| |
| blockLength = newBlockLength; |
| return true; |
| } |
| |
| template<typename UInt> |
| void extend(const UInt *data, int32_t minStart, int32_t prevDataLength, int32_t newDataLength) { |
| int32_t start = prevDataLength - blockLength; |
| if (start >= minStart) { |
| ++start; // Skip the last block that we added last time. |
| } else { |
| start = minStart; // Begin with the first full block. |
| } |
| for (int32_t end = newDataLength - blockLength; start <= end; ++start) { |
| uint32_t hashCode = makeHashCode(data, start); |
| addEntry(data, start, hashCode, start); |
| } |
| } |
| |
| template<typename UIntA, typename UIntB> |
| int32_t findBlock(const UIntA *data, const UIntB *blockData, int32_t blockStart) const { |
| uint32_t hashCode = makeHashCode(blockData, blockStart); |
| int32_t entryIndex = findEntry(data, blockData, blockStart, hashCode); |
| if (entryIndex >= 0) { |
| return (table[entryIndex] & mask) - 1; |
| } else { |
| return -1; |
| } |
| } |
| |
| int32_t findAllSameBlock(const uint32_t *data, uint32_t blockValue) const { |
| uint32_t hashCode = makeHashCode(blockValue); |
| int32_t entryIndex = findEntry(data, blockValue, hashCode); |
| if (entryIndex >= 0) { |
| return (table[entryIndex] & mask) - 1; |
| } else { |
| return -1; |
| } |
| } |
| |
| private: |
| template<typename UInt> |
| uint32_t makeHashCode(const UInt *blockData, int32_t blockStart) const { |
| int32_t blockLimit = blockStart + blockLength; |
| uint32_t hashCode = blockData[blockStart++]; |
| do { |
| hashCode = 37 * hashCode + blockData[blockStart++]; |
| } while (blockStart < blockLimit); |
| return hashCode; |
| } |
| |
| uint32_t makeHashCode(uint32_t blockValue) const { |
| uint32_t hashCode = blockValue; |
| for (int32_t i = 1; i < blockLength; ++i) { |
| hashCode = 37 * hashCode + blockValue; |
| } |
| return hashCode; |
| } |
| |
| template<typename UInt> |
| void addEntry(const UInt *data, int32_t blockStart, uint32_t hashCode, int32_t dataIndex) { |
| U_ASSERT(0 <= dataIndex && dataIndex < (int32_t)mask); |
| int32_t entryIndex = findEntry(data, data, blockStart, hashCode); |
| if (entryIndex < 0) { |
| table[~entryIndex] = (hashCode << shift) | (dataIndex + 1); |
| } |
| } |
| |
| template<typename UIntA, typename UIntB> |
| int32_t findEntry(const UIntA *data, const UIntB *blockData, int32_t blockStart, |
| uint32_t hashCode) const { |
| uint32_t shiftedHashCode = hashCode << shift; |
| int32_t initialEntryIndex = (hashCode % (length - 1)) + 1; // 1..length-1 |
| for (int32_t entryIndex = initialEntryIndex;;) { |
| uint32_t entry = table[entryIndex]; |
| if (entry == 0) { |
| return ~entryIndex; |
| } |
| if ((entry & ~mask) == shiftedHashCode) { |
| int32_t dataIndex = (entry & mask) - 1; |
| if (equalBlocks(data + dataIndex, blockData + blockStart, blockLength)) { |
| return entryIndex; |
| } |
| } |
| entryIndex = nextIndex(initialEntryIndex, entryIndex); |
| } |
| } |
| |
| int32_t findEntry(const uint32_t *data, uint32_t blockValue, uint32_t hashCode) const { |
| uint32_t shiftedHashCode = hashCode << shift; |
| int32_t initialEntryIndex = (hashCode % (length - 1)) + 1; // 1..length-1 |
| for (int32_t entryIndex = initialEntryIndex;;) { |
| uint32_t entry = table[entryIndex]; |
| if (entry == 0) { |
| return ~entryIndex; |
| } |
| if ((entry & ~mask) == shiftedHashCode) { |
| int32_t dataIndex = (entry & mask) - 1; |
| if (allValuesSameAs(data + dataIndex, blockLength, blockValue)) { |
| return entryIndex; |
| } |
| } |
| entryIndex = nextIndex(initialEntryIndex, entryIndex); |
| } |
| } |
| |
| inline int32_t nextIndex(int32_t initialEntryIndex, int32_t entryIndex) const { |
| // U_ASSERT(0 < initialEntryIndex && initialEntryIndex < length); |
| return (entryIndex + initialEntryIndex) % length; |
| } |
| |
| // Hash table. |
| // The length is a prime number, larger than the maximum data length. |
| // The "shift" lower bits store a data index + 1. |
| // The remaining upper bits store a partial hashCode of the block data values. |
| uint32_t *table = nullptr; |
| int32_t capacity = 0; |
| int32_t length = 0; |
| int32_t shift = 0; |
| uint32_t mask = 0; |
| |
| int32_t blockLength = 0; |
| }; |
| |
| int32_t MutableCodePointTrie::compactWholeDataBlocks(int32_t fastILimit, AllSameBlocks &allSameBlocks) { |
| #ifdef UCPTRIE_DEBUG |
| bool overflow = false; |
| #endif |
| |
| // ASCII data will be stored as a linear table, even if the following code |
| // does not yet count it that way. |
| int32_t newDataCapacity = ASCII_LIMIT; |
| // Add room for a small data null block in case it would match the start of |
| // a fast data block where dataNullOffset must not be set in that case. |
| newDataCapacity += UCPTRIE_SMALL_DATA_BLOCK_LENGTH; |
| // Add room for special values (errorValue, highValue) and padding. |
| newDataCapacity += 4; |
| int32_t iLimit = highStart >> UCPTRIE_SHIFT_3; |
| int32_t blockLength = UCPTRIE_FAST_DATA_BLOCK_LENGTH; |
| int32_t inc = SMALL_DATA_BLOCKS_PER_BMP_BLOCK; |
| for (int32_t i = 0; i < iLimit; i += inc) { |
| if (i == fastILimit) { |
| blockLength = UCPTRIE_SMALL_DATA_BLOCK_LENGTH; |
| inc = 1; |
| } |
| uint32_t value = index[i]; |
| if (flags[i] == MIXED) { |
| // Really mixed? |
| const uint32_t *p = data + value; |
| value = *p; |
| if (allValuesSameAs(p + 1, blockLength - 1, value)) { |
| flags[i] = ALL_SAME; |
| index[i] = value; |
| // Fall through to ALL_SAME handling. |
| } else { |
| newDataCapacity += blockLength; |
| continue; |
| } |
| } else { |
| U_ASSERT(flags[i] == ALL_SAME); |
| if (inc > 1) { |
| // Do all of the fast-range data block's ALL_SAME parts have the same value? |
| bool allSame = true; |
| int32_t next_i = i + inc; |
| for (int32_t j = i + 1; j < next_i; ++j) { |
| U_ASSERT(flags[j] == ALL_SAME); |
| if (index[j] != value) { |
| allSame = false; |
| break; |
| } |
| } |
| if (!allSame) { |
| // Turn it into a MIXED block. |
| if (getDataBlock(i) < 0) { |
| return -1; |
| } |
| newDataCapacity += blockLength; |
| continue; |
| } |
| } |
| } |
| // Is there another ALL_SAME block with the same value? |
| int32_t other = allSameBlocks.findOrAdd(i, inc, value); |
| if (other == AllSameBlocks::OVERFLOW) { |
| // The fixed-size array overflowed. Slow check for a duplicate block. |
| #ifdef UCPTRIE_DEBUG |
| if (!overflow) { |
| puts("UCPTrie AllSameBlocks overflow"); |
| overflow = true; |
| } |
| #endif |
| int32_t jInc = SMALL_DATA_BLOCKS_PER_BMP_BLOCK; |
| for (int32_t j = 0;; j += jInc) { |
| if (j == i) { |
| allSameBlocks.add(i, inc, value); |
| break; |
| } |
| if (j == fastILimit) { |
| jInc = 1; |
| } |
| if (flags[j] == ALL_SAME && index[j] == value) { |
| allSameBlocks.add(j, jInc + inc, value); |
| other = j; |
| break; |
| // We could keep counting blocks with the same value |
| // before we add the first one, which may improve compaction in rare cases, |
| // but it would make it slower. |
| } |
| } |
| } |
| if (other >= 0) { |
| flags[i] = SAME_AS; |
| index[i] = other; |
| } else { |
| // New unique same-value block. |
| newDataCapacity += blockLength; |
| } |
| } |
| return newDataCapacity; |
| } |
| |
| #ifdef UCPTRIE_DEBUG |
| # define DEBUG_DO(expr) expr |
| #else |
| # define DEBUG_DO(expr) |
| #endif |
| |
| #ifdef UCPTRIE_DEBUG |
| // Braille symbols: U+28xx = UTF-8 E2 A0 80..E2 A3 BF |
| int32_t appendValue(char s[], int32_t length, uint32_t value) { |
| value ^= value >> 16; |
| value ^= value >> 8; |
| s[length] = 0xE2; |
| s[length + 1] = (char)(0xA0 + ((value >> 6) & 3)); |
| s[length + 2] = (char)(0x80 + (value & 0x3F)); |
| return length + 3; |
| } |
| |
| void printBlock(const uint32_t *block, int32_t blockLength, uint32_t value, |
| UChar32 start, int32_t overlap, uint32_t initialValue) { |
| char s[UCPTRIE_FAST_DATA_BLOCK_LENGTH * 3 + 3]; |
| int32_t length = 0; |
| int32_t i; |
| for (i = 0; i < overlap; ++i) { |
| length = appendValue(s, length, 0); // Braille blank |
| } |
| s[length++] = '|'; |
| for (; i < blockLength; ++i) { |
| if (block != nullptr) { |
| value = block[i]; |
| } |
| if (value == initialValue) { |
| value = 0x40; // Braille lower left dot |
| } |
| length = appendValue(s, length, value); |
| } |
| s[length] = 0; |
| start += overlap; |
| if (start <= 0xffff) { |
| printf(" %04lX %s|\n", (long)start, s); |
| } else if (start <= 0xfffff) { |
| printf(" %5lX %s|\n", (long)start, s); |
| } else { |
| printf(" %6lX %s|\n", (long)start, s); |
| } |
| } |
| #endif |
| |
| /** |
| * Compacts a build-time trie. |
| * |
| * The compaction |
| * - removes blocks that are identical with earlier ones |
| * - overlaps each new non-duplicate block as much as possible with the previously-written one |
| * - works with fast-range data blocks whose length is a multiple of that of |
| * higher-code-point data blocks |
| * |
| * It does not try to find an optimal order of writing, deduplicating, and overlapping blocks. |
| */ |
| int32_t MutableCodePointTrie::compactData( |
| int32_t fastILimit, uint32_t *newData, int32_t newDataCapacity, |
| int32_t dataNullIndex, MixedBlocks &mixedBlocks, UErrorCode &errorCode) { |
| #ifdef UCPTRIE_DEBUG |
| int32_t countSame=0, sumOverlaps=0; |
| bool printData = dataLength == 29088 /* line.brk */ || |
| // dataLength == 30048 /* CanonIterData */ || |
| dataLength == 50400 /* zh.txt~stroke */; |
| #endif |
| |
| // The linear ASCII data has been copied into newData already. |
| int32_t newDataLength = 0; |
| for (int32_t i = 0; newDataLength < ASCII_LIMIT; |
| newDataLength += UCPTRIE_FAST_DATA_BLOCK_LENGTH, i += SMALL_DATA_BLOCKS_PER_BMP_BLOCK) { |
| index[i] = newDataLength; |
| #ifdef UCPTRIE_DEBUG |
| if (printData) { |
| printBlock(newData + newDataLength, UCPTRIE_FAST_DATA_BLOCK_LENGTH, 0, newDataLength, 0, initialValue); |
| } |
| #endif |
| } |
| |
| int32_t blockLength = UCPTRIE_FAST_DATA_BLOCK_LENGTH; |
| if (!mixedBlocks.init(newDataCapacity, blockLength)) { |
| errorCode = U_MEMORY_ALLOCATION_ERROR; |
| return 0; |
| } |
| mixedBlocks.extend(newData, 0, 0, newDataLength); |
| |
| int32_t iLimit = highStart >> UCPTRIE_SHIFT_3; |
| int32_t inc = SMALL_DATA_BLOCKS_PER_BMP_BLOCK; |
| int32_t fastLength = 0; |
| for (int32_t i = ASCII_I_LIMIT; i < iLimit; i += inc) { |
| if (i == fastILimit) { |
| blockLength = UCPTRIE_SMALL_DATA_BLOCK_LENGTH; |
| inc = 1; |
| fastLength = newDataLength; |
| if (!mixedBlocks.init(newDataCapacity, blockLength)) { |
| errorCode = U_MEMORY_ALLOCATION_ERROR; |
| return 0; |
| } |
| mixedBlocks.extend(newData, 0, 0, newDataLength); |
| } |
| if (flags[i] == ALL_SAME) { |
| uint32_t value = index[i]; |
| // Find an earlier part of the data array of length blockLength |
| // that is filled with this value. |
| int32_t n = mixedBlocks.findAllSameBlock(newData, value); |
| // If we find a match, and the current block is the data null block, |
| // and it is not a fast block but matches the start of a fast block, |
| // then we need to continue looking. |
| // This is because this small block is shorter than the fast block, |
| // and not all of the rest of the fast block is filled with this value. |
| // Otherwise trie.getRange() would detect that the fast block starts at |
| // dataNullOffset and assume incorrectly that it is filled with the null value. |
| while (n >= 0 && i == dataNullIndex && i >= fastILimit && n < fastLength && |
| isStartOfSomeFastBlock(n, index, fastILimit)) { |
| n = findAllSameBlock(newData, n + 1, newDataLength, value, blockLength); |
| } |
| if (n >= 0) { |
| DEBUG_DO(++countSame); |
| index[i] = n; |
| } else { |
| n = getAllSameOverlap(newData, newDataLength, value, blockLength); |
| DEBUG_DO(sumOverlaps += n); |
| #ifdef UCPTRIE_DEBUG |
| if (printData) { |
| printBlock(nullptr, blockLength, value, i << UCPTRIE_SHIFT_3, n, initialValue); |
| } |
| #endif |
| index[i] = newDataLength - n; |
| int32_t prevDataLength = newDataLength; |
| while (n < blockLength) { |
| newData[newDataLength++] = value; |
| ++n; |
| } |
| mixedBlocks.extend(newData, 0, prevDataLength, newDataLength); |
| } |
| } else if (flags[i] == MIXED) { |
| const uint32_t *block = data + index[i]; |
| int32_t n = mixedBlocks.findBlock(newData, block, 0); |
| if (n >= 0) { |
| DEBUG_DO(++countSame); |
| index[i] = n; |
| } else { |
| n = getOverlap(newData, newDataLength, block, 0, blockLength); |
| DEBUG_DO(sumOverlaps += n); |
| #ifdef UCPTRIE_DEBUG |
| if (printData) { |
| printBlock(block, blockLength, 0, i << UCPTRIE_SHIFT_3, n, initialValue); |
| } |
| #endif |
| index[i] = newDataLength - n; |
| int32_t prevDataLength = newDataLength; |
| while (n < blockLength) { |
| newData[newDataLength++] = block[n++]; |
| } |
| mixedBlocks.extend(newData, 0, prevDataLength, newDataLength); |
| } |
| } else /* SAME_AS */ { |
| uint32_t j = index[i]; |
| index[i] = index[j]; |
| } |
| } |
| |
| #ifdef UCPTRIE_DEBUG |
| /* we saved some space */ |
| printf("compacting UCPTrie: count of 32-bit data words %lu->%lu countSame=%ld sumOverlaps=%ld\n", |
| (long)dataLength, (long)newDataLength, (long)countSame, (long)sumOverlaps); |
| #endif |
| return newDataLength; |
| } |
| |
| int32_t MutableCodePointTrie::compactIndex(int32_t fastILimit, MixedBlocks &mixedBlocks, |
| UErrorCode &errorCode) { |
| int32_t fastIndexLength = fastILimit >> (UCPTRIE_FAST_SHIFT - UCPTRIE_SHIFT_3); |
| if ((highStart >> UCPTRIE_FAST_SHIFT) <= fastIndexLength) { |
| // Only the linear fast index, no multi-stage index tables. |
| index3NullOffset = UCPTRIE_NO_INDEX3_NULL_OFFSET; |
| return fastIndexLength; |
| } |
| |
| // Condense the fast index table. |
| // Also, does it contain an index-3 block with all dataNullOffset? |
| uint16_t fastIndex[UCPTRIE_BMP_INDEX_LENGTH]; // fastIndexLength |
| int32_t i3FirstNull = -1; |
| for (int32_t i = 0, j = 0; i < fastILimit; ++j) { |
| uint32_t i3 = index[i]; |
| fastIndex[j] = (uint16_t)i3; |
| if (i3 == (uint32_t)dataNullOffset) { |
| if (i3FirstNull < 0) { |
| i3FirstNull = j; |
| } else if (index3NullOffset < 0 && |
| (j - i3FirstNull + 1) == UCPTRIE_INDEX_3_BLOCK_LENGTH) { |
| index3NullOffset = i3FirstNull; |
| } |
| } else { |
| i3FirstNull = -1; |
| } |
| // Set the index entries that compactData() skipped. |
| // Needed when the multi-stage index covers the fast index range as well. |
| int32_t iNext = i + SMALL_DATA_BLOCKS_PER_BMP_BLOCK; |
| while (++i < iNext) { |
| i3 += UCPTRIE_SMALL_DATA_BLOCK_LENGTH; |
| index[i] = i3; |
| } |
| } |
| |
| if (!mixedBlocks.init(fastIndexLength, UCPTRIE_INDEX_3_BLOCK_LENGTH)) { |
| errorCode = U_MEMORY_ALLOCATION_ERROR; |
| return 0; |
| } |
| mixedBlocks.extend(fastIndex, 0, 0, fastIndexLength); |
| |
| // Examine index-3 blocks. For each determine one of: |
| // - same as the index-3 null block |
| // - same as a fast-index block |
| // - 16-bit indexes |
| // - 18-bit indexes |
| // We store this in the first flags entry for the index-3 block. |
| // |
| // Also determine an upper limit for the index-3 table length. |
| int32_t index3Capacity = 0; |
| i3FirstNull = index3NullOffset; |
| bool hasLongI3Blocks = false; |
| // If the fast index covers the whole BMP, then |
| // the multi-stage index is only for supplementary code points. |
| // Otherwise, the multi-stage index covers all of Unicode. |
| int32_t iStart = fastILimit < BMP_I_LIMIT ? 0 : BMP_I_LIMIT; |
| int32_t iLimit = highStart >> UCPTRIE_SHIFT_3; |
| for (int32_t i = iStart; i < iLimit;) { |
| int32_t j = i; |
| int32_t jLimit = i + UCPTRIE_INDEX_3_BLOCK_LENGTH; |
| uint32_t oredI3 = 0; |
| bool isNull = true; |
| do { |
| uint32_t i3 = index[j]; |
| oredI3 |= i3; |
| if (i3 != (uint32_t)dataNullOffset) { |
| isNull = false; |
| } |
| } while (++j < jLimit); |
| if (isNull) { |
| flags[i] = I3_NULL; |
| if (i3FirstNull < 0) { |
| if (oredI3 <= 0xffff) { |
| index3Capacity += UCPTRIE_INDEX_3_BLOCK_LENGTH; |
| } else { |
| index3Capacity += INDEX_3_18BIT_BLOCK_LENGTH; |
| hasLongI3Blocks = true; |
| } |
| i3FirstNull = 0; |
| } |
| } else { |
| if (oredI3 <= 0xffff) { |
| int32_t n = mixedBlocks.findBlock(fastIndex, index, i); |
| if (n >= 0) { |
| flags[i] = I3_BMP; |
| index[i] = n; |
| } else { |
| flags[i] = I3_16; |
| index3Capacity += UCPTRIE_INDEX_3_BLOCK_LENGTH; |
| } |
| } else { |
| flags[i] = I3_18; |
| index3Capacity += INDEX_3_18BIT_BLOCK_LENGTH; |
| hasLongI3Blocks = true; |
| } |
| } |
| i = j; |
| } |
| |
| int32_t index2Capacity = (iLimit - iStart) >> UCPTRIE_SHIFT_2_3; |
| |
| // Length of the index-1 table, rounded up. |
| int32_t index1Length = (index2Capacity + UCPTRIE_INDEX_2_MASK) >> UCPTRIE_SHIFT_1_2; |
| |
| // Index table: Fast index, index-1, index-3, index-2. |
| // +1 for possible index table padding. |
| int32_t index16Capacity = fastIndexLength + index1Length + index3Capacity + index2Capacity + 1; |
| index16 = (uint16_t *)uprv_malloc(index16Capacity * 2); |
| if (index16 == nullptr) { |
| errorCode = U_MEMORY_ALLOCATION_ERROR; |
| return 0; |
| } |
| uprv_memcpy(index16, fastIndex, fastIndexLength * 2); |
| |
| if (!mixedBlocks.init(index16Capacity, UCPTRIE_INDEX_3_BLOCK_LENGTH)) { |
| errorCode = U_MEMORY_ALLOCATION_ERROR; |
| return 0; |
| } |
| MixedBlocks longI3Blocks; |
| if (hasLongI3Blocks) { |
| if (!longI3Blocks.init(index16Capacity, INDEX_3_18BIT_BLOCK_LENGTH)) { |
| errorCode = U_MEMORY_ALLOCATION_ERROR; |
| return 0; |
| } |
| } |
| |
| // Compact the index-3 table and write an uncompacted version of the index-2 table. |
| uint16_t index2[UNICODE_LIMIT >> UCPTRIE_SHIFT_2]; // index2Capacity |
| int32_t i2Length = 0; |
| i3FirstNull = index3NullOffset; |
| int32_t index3Start = fastIndexLength + index1Length; |
| int32_t indexLength = index3Start; |
| for (int32_t i = iStart; i < iLimit; i += UCPTRIE_INDEX_3_BLOCK_LENGTH) { |
| int32_t i3; |
| uint8_t f = flags[i]; |
| if (f == I3_NULL && i3FirstNull < 0) { |
| // First index-3 null block. Write & overlap it like a normal block, then remember it. |
| f = dataNullOffset <= 0xffff ? I3_16 : I3_18; |
| i3FirstNull = 0; |
| } |
| if (f == I3_NULL) { |
| i3 = index3NullOffset; |
| } else if (f == I3_BMP) { |
| i3 = index[i]; |
| } else if (f == I3_16) { |
| int32_t n = mixedBlocks.findBlock(index16, index, i); |
| if (n >= 0) { |
| i3 = n; |
| } else { |
| if (indexLength == index3Start) { |
| // No overlap at the boundary between the index-1 and index-3 tables. |
| n = 0; |
| } else { |
| n = getOverlap(index16, indexLength, |
| index, i, UCPTRIE_INDEX_3_BLOCK_LENGTH); |
| } |
| i3 = indexLength - n; |
| int32_t prevIndexLength = indexLength; |
| while (n < UCPTRIE_INDEX_3_BLOCK_LENGTH) { |
| index16[indexLength++] = index[i + n++]; |
| } |
| mixedBlocks.extend(index16, index3Start, prevIndexLength, indexLength); |
| if (hasLongI3Blocks) { |
| longI3Blocks.extend(index16, index3Start, prevIndexLength, indexLength); |
| } |
| } |
| } else { |
| U_ASSERT(f == I3_18); |
| U_ASSERT(hasLongI3Blocks); |
| // Encode an index-3 block that contains one or more data indexes exceeding 16 bits. |
| int32_t j = i; |
| int32_t jLimit = i + UCPTRIE_INDEX_3_BLOCK_LENGTH; |
| int32_t k = indexLength; |
| do { |
| ++k; |
| uint32_t v = index[j++]; |
| uint32_t upperBits = (v & 0x30000) >> 2; |
| index16[k++] = v; |
| v = index[j++]; |
| upperBits |= (v & 0x30000) >> 4; |
| index16[k++] = v; |
| v = index[j++]; |
| upperBits |= (v & 0x30000) >> 6; |
| index16[k++] = v; |
| v = index[j++]; |
| upperBits |= (v & 0x30000) >> 8; |
| index16[k++] = v; |
| v = index[j++]; |
| upperBits |= (v & 0x30000) >> 10; |
| index16[k++] = v; |
| v = index[j++]; |
| upperBits |= (v & 0x30000) >> 12; |
| index16[k++] = v; |
| v = index[j++]; |
| upperBits |= (v & 0x30000) >> 14; |
| index16[k++] = v; |
| v = index[j++]; |
| upperBits |= (v & 0x30000) >> 16; |
| index16[k++] = v; |
| index16[k - 9] = upperBits; |
| } while (j < jLimit); |
| int32_t n = longI3Blocks.findBlock(index16, index16, indexLength); |
| if (n >= 0) { |
| i3 = n | 0x8000; |
| } else { |
| if (indexLength == index3Start) { |
| // No overlap at the boundary between the index-1 and index-3 tables. |
| n = 0; |
| } else { |
| n = getOverlap(index16, indexLength, |
| index16, indexLength, INDEX_3_18BIT_BLOCK_LENGTH); |
| } |
| i3 = (indexLength - n) | 0x8000; |
| int32_t prevIndexLength = indexLength; |
| if (n > 0) { |
| int32_t start = indexLength; |
| while (n < INDEX_3_18BIT_BLOCK_LENGTH) { |
| index16[indexLength++] = index16[start + n++]; |
| } |
| } else { |
| indexLength += INDEX_3_18BIT_BLOCK_LENGTH; |
| } |
| mixedBlocks.extend(index16, index3Start, prevIndexLength, indexLength); |
| if (hasLongI3Blocks) { |
| longI3Blocks.extend(index16, index3Start, prevIndexLength, indexLength); |
| } |
| } |
| } |
| if (index3NullOffset < 0 && i3FirstNull >= 0) { |
| index3NullOffset = i3; |
| } |
| // Set the index-2 table entry. |
| index2[i2Length++] = i3; |
| } |
| U_ASSERT(i2Length == index2Capacity); |
| U_ASSERT(indexLength <= index3Start + index3Capacity); |
| |
| if (index3NullOffset < 0) { |
| index3NullOffset = UCPTRIE_NO_INDEX3_NULL_OFFSET; |
| } |
| if (indexLength >= (UCPTRIE_NO_INDEX3_NULL_OFFSET + UCPTRIE_INDEX_3_BLOCK_LENGTH)) { |
| // The index-3 offsets exceed 15 bits, or |
| // the last one cannot be distinguished from the no-null-block value. |
| errorCode = U_INDEX_OUTOFBOUNDS_ERROR; |
| return 0; |
| } |
| |
| // Compact the index-2 table and write the index-1 table. |
| static_assert(UCPTRIE_INDEX_2_BLOCK_LENGTH == UCPTRIE_INDEX_3_BLOCK_LENGTH, |
| "must re-init mixedBlocks"); |
| int32_t blockLength = UCPTRIE_INDEX_2_BLOCK_LENGTH; |
| int32_t i1 = fastIndexLength; |
| for (int32_t i = 0; i < i2Length; i += blockLength) { |
| int32_t n; |
| if ((i2Length - i) >= blockLength) { |
| // normal block |
| U_ASSERT(blockLength == UCPTRIE_INDEX_2_BLOCK_LENGTH); |
| n = mixedBlocks.findBlock(index16, index2, i); |
| } else { |
| // highStart is inside the last index-2 block. Shorten it. |
| blockLength = i2Length - i; |
| n = findSameBlock(index16, index3Start, indexLength, |
| index2, i, blockLength); |
| } |
| int32_t i2; |
| if (n >= 0) { |
| i2 = n; |
| } else { |
| if (indexLength == index3Start) { |
| // No overlap at the boundary between the index-1 and index-3/2 tables. |
| n = 0; |
| } else { |
| n = getOverlap(index16, indexLength, index2, i, blockLength); |
| } |
| i2 = indexLength - n; |
| int32_t prevIndexLength = indexLength; |
| while (n < blockLength) { |
| index16[indexLength++] = index2[i + n++]; |
| } |
| mixedBlocks.extend(index16, index3Start, prevIndexLength, indexLength); |
| } |
| // Set the index-1 table entry. |
| index16[i1++] = i2; |
| } |
| U_ASSERT(i1 == index3Start); |
| U_ASSERT(indexLength <= index16Capacity); |
| |
| #ifdef UCPTRIE_DEBUG |
| /* we saved some space */ |
| printf("compacting UCPTrie: count of 16-bit index words %lu->%lu\n", |
| (long)iLimit, (long)indexLength); |
| #endif |
| |
| return indexLength; |
| } |
| |
| int32_t MutableCodePointTrie::compactTrie(int32_t fastILimit, UErrorCode &errorCode) { |
| // Find the real highStart and round it up. |
| U_ASSERT((highStart & (UCPTRIE_CP_PER_INDEX_2_ENTRY - 1)) == 0); |
| highValue = get(MAX_UNICODE); |
| int32_t realHighStart = findHighStart(); |
| realHighStart = (realHighStart + (UCPTRIE_CP_PER_INDEX_2_ENTRY - 1)) & |
| ~(UCPTRIE_CP_PER_INDEX_2_ENTRY - 1); |
| if (realHighStart == UNICODE_LIMIT) { |
| highValue = initialValue; |
| } |
| |
| #ifdef UCPTRIE_DEBUG |
| printf("UCPTrie: highStart U+%06lx highValue 0x%lx initialValue 0x%lx\n", |
| (long)realHighStart, (long)highValue, (long)initialValue); |
| #endif |
| |
| // We always store indexes and data values for the fast range. |
| // Pin highStart to the top of that range while building. |
| UChar32 fastLimit = fastILimit << UCPTRIE_SHIFT_3; |
| if (realHighStart < fastLimit) { |
| for (int32_t i = (realHighStart >> UCPTRIE_SHIFT_3); i < fastILimit; ++i) { |
| flags[i] = ALL_SAME; |
| index[i] = highValue; |
| } |
| highStart = fastLimit; |
| } else { |
| highStart = realHighStart; |
| } |
| |
| uint32_t asciiData[ASCII_LIMIT]; |
| for (int32_t i = 0; i < ASCII_LIMIT; ++i) { |
| asciiData[i] = get(i); |
| } |
| |
| // First we look for which data blocks have the same value repeated over the whole block, |
| // deduplicate such blocks, find a good null data block (for faster enumeration), |
| // and get an upper bound for the necessary data array length. |
| AllSameBlocks allSameBlocks; |
| int32_t newDataCapacity = compactWholeDataBlocks(fastILimit, allSameBlocks); |
| if (newDataCapacity < 0) { |
| errorCode = U_MEMORY_ALLOCATION_ERROR; |
| return 0; |
| } |
| uint32_t *newData = (uint32_t *)uprv_malloc(newDataCapacity * 4); |
| if (newData == nullptr) { |
| errorCode = U_MEMORY_ALLOCATION_ERROR; |
| return 0; |
| } |
| uprv_memcpy(newData, asciiData, sizeof(asciiData)); |
| |
| int32_t dataNullIndex = allSameBlocks.findMostUsed(); |
| |
| MixedBlocks mixedBlocks; |
| int32_t newDataLength = compactData(fastILimit, newData, newDataCapacity, |
| dataNullIndex, mixedBlocks, errorCode); |
| if (U_FAILURE(errorCode)) { return 0; } |
| U_ASSERT(newDataLength <= newDataCapacity); |
| uprv_free(data); |
| data = newData; |
| dataCapacity = newDataCapacity; |
| dataLength = newDataLength; |
| if (dataLength > (0x3ffff + UCPTRIE_SMALL_DATA_BLOCK_LENGTH)) { |
| // The offset of the last data block is too high to be stored in the index table. |
| errorCode = U_INDEX_OUTOFBOUNDS_ERROR; |
| return 0; |
| } |
| |
| if (dataNullIndex >= 0) { |
| dataNullOffset = index[dataNullIndex]; |
| #ifdef UCPTRIE_DEBUG |
| if (data[dataNullOffset] != initialValue) { |
| printf("UCPTrie initialValue %lx -> more common nullValue %lx\n", |
| (long)initialValue, (long)data[dataNullOffset]); |
| } |
| #endif |
| initialValue = data[dataNullOffset]; |
| } else { |
| dataNullOffset = UCPTRIE_NO_DATA_NULL_OFFSET; |
| } |
| |
| int32_t indexLength = compactIndex(fastILimit, mixedBlocks, errorCode); |
| highStart = realHighStart; |
| return indexLength; |
| } |
| |
| UCPTrie *MutableCodePointTrie::build(UCPTrieType type, UCPTrieValueWidth valueWidth, UErrorCode &errorCode) { |
| if (U_FAILURE(errorCode)) { |
| return nullptr; |
| } |
| if (type < UCPTRIE_TYPE_FAST || UCPTRIE_TYPE_SMALL < type || |
| valueWidth < UCPTRIE_VALUE_BITS_16 || UCPTRIE_VALUE_BITS_8 < valueWidth) { |
| errorCode = U_ILLEGAL_ARGUMENT_ERROR; |
| return nullptr; |
| } |
| |
| // The mutable trie always stores 32-bit values. |
| // When we build a UCPTrie for a smaller value width, we first mask off unused bits |
| // before compacting the data. |
| switch (valueWidth) { |
| case UCPTRIE_VALUE_BITS_32: |
| break; |
| case UCPTRIE_VALUE_BITS_16: |
| maskValues(0xffff); |
| break; |
| case UCPTRIE_VALUE_BITS_8: |
| maskValues(0xff); |
| break; |
| default: |
| break; |
| } |
| |
| UChar32 fastLimit = type == UCPTRIE_TYPE_FAST ? BMP_LIMIT : UCPTRIE_SMALL_LIMIT; |
| int32_t indexLength = compactTrie(fastLimit >> UCPTRIE_SHIFT_3, errorCode); |
| if (U_FAILURE(errorCode)) { |
| clear(); |
| return nullptr; |
| } |
| |
| // Ensure data table alignment: The index length must be even for uint32_t data. |
| if (valueWidth == UCPTRIE_VALUE_BITS_32 && (indexLength & 1) != 0) { |
| index16[indexLength++] = 0xffee; // arbitrary value |
| } |
| |
| // Make the total trie structure length a multiple of 4 bytes by padding the data table, |
| // and store special values as the last two data values. |
| int32_t length = indexLength * 2; |
| if (valueWidth == UCPTRIE_VALUE_BITS_16) { |
| if (((indexLength ^ dataLength) & 1) != 0) { |
| // padding |
| data[dataLength++] = errorValue; |
| } |
| if (data[dataLength - 1] != errorValue || data[dataLength - 2] != highValue) { |
| data[dataLength++] = highValue; |
| data[dataLength++] = errorValue; |
| } |
| length += dataLength * 2; |
| } else if (valueWidth == UCPTRIE_VALUE_BITS_32) { |
| // 32-bit data words never need padding to a multiple of 4 bytes. |
| if (data[dataLength - 1] != errorValue || data[dataLength - 2] != highValue) { |
| if (data[dataLength - 1] != highValue) { |
| data[dataLength++] = highValue; |
| } |
| data[dataLength++] = errorValue; |
| } |
| length += dataLength * 4; |
| } else { |
| int32_t and3 = (length + dataLength) & 3; |
| if (and3 == 0 && data[dataLength - 1] == errorValue && data[dataLength - 2] == highValue) { |
| // all set |
| } else if(and3 == 3 && data[dataLength - 1] == highValue) { |
| data[dataLength++] = errorValue; |
| } else { |
| while (and3 != 2) { |
| data[dataLength++] = highValue; |
| and3 = (and3 + 1) & 3; |
| } |
| data[dataLength++] = highValue; |
| data[dataLength++] = errorValue; |
| } |
| length += dataLength; |
| } |
| |
| // Calculate the total length of the UCPTrie as a single memory block. |
| length += sizeof(UCPTrie); |
| U_ASSERT((length & 3) == 0); |
| |
| uint8_t *bytes = (uint8_t *)uprv_malloc(length); |
| if (bytes == nullptr) { |
| errorCode = U_MEMORY_ALLOCATION_ERROR; |
| clear(); |
| return nullptr; |
| } |
| UCPTrie *trie = reinterpret_cast<UCPTrie *>(bytes); |
| uprv_memset(trie, 0, sizeof(UCPTrie)); |
| trie->indexLength = indexLength; |
| trie->dataLength = dataLength; |
| |
| trie->highStart = highStart; |
| // Round up shifted12HighStart to a multiple of 0x1000 for easy testing from UTF-8 lead bytes. |
| // Runtime code needs to then test for the real highStart as well. |
| trie->shifted12HighStart = (highStart + 0xfff) >> 12; |
| trie->type = type; |
| trie->valueWidth = valueWidth; |
| |
| trie->index3NullOffset = index3NullOffset; |
| trie->dataNullOffset = dataNullOffset; |
| trie->nullValue = initialValue; |
| |
| bytes += sizeof(UCPTrie); |
| |
| // Fill the index and data arrays. |
| uint16_t *dest16 = (uint16_t *)bytes; |
| trie->index = dest16; |
| |
| if (highStart <= fastLimit) { |
| // Condense only the fast index from the mutable-trie index. |
| for (int32_t i = 0, j = 0; j < indexLength; i += SMALL_DATA_BLOCKS_PER_BMP_BLOCK, ++j) { |
| *dest16++ = (uint16_t)index[i]; // dest16[j] |
| } |
| } else { |
| uprv_memcpy(dest16, index16, indexLength * 2); |
| dest16 += indexLength; |
| } |
| bytes += indexLength * 2; |
| |
| // Write the data array. |
| const uint32_t *p = data; |
| switch (valueWidth) { |
| case UCPTRIE_VALUE_BITS_16: |
| // Write 16-bit data values. |
| trie->data.ptr16 = dest16; |
| for (int32_t i = dataLength; i > 0; --i) { |
| *dest16++ = (uint16_t)*p++; |
| } |
| break; |
| case UCPTRIE_VALUE_BITS_32: |
| // Write 32-bit data values. |
| trie->data.ptr32 = (uint32_t *)bytes; |
| uprv_memcpy(bytes, p, (size_t)dataLength * 4); |
| break; |
| case UCPTRIE_VALUE_BITS_8: |
| // Write 8-bit data values. |
| trie->data.ptr8 = bytes; |
| for (int32_t i = dataLength; i > 0; --i) { |
| *bytes++ = (uint8_t)*p++; |
| } |
| break; |
| default: |
| // Will not occur, valueWidth checked at the beginning. |
| break; |
| } |
| |
| #ifdef UCPTRIE_DEBUG |
| trie->name = name; |
| |
| ucptrie_printLengths(trie, ""); |
| #endif |
| |
| clear(); |
| return trie; |
| } |
| |
| } // namespace |
| |
| U_NAMESPACE_END |
| |
| U_NAMESPACE_USE |
| |
| U_CAPI UMutableCPTrie * U_EXPORT2 |
| umutablecptrie_open(uint32_t initialValue, uint32_t errorValue, UErrorCode *pErrorCode) { |
| if (U_FAILURE(*pErrorCode)) { |
| return nullptr; |
| } |
| LocalPointer<MutableCodePointTrie> trie( |
| new MutableCodePointTrie(initialValue, errorValue, *pErrorCode), *pErrorCode); |
| if (U_FAILURE(*pErrorCode)) { |
| return nullptr; |
| } |
| return reinterpret_cast<UMutableCPTrie *>(trie.orphan()); |
| } |
| |
| U_CAPI UMutableCPTrie * U_EXPORT2 |
| umutablecptrie_clone(const UMutableCPTrie *other, UErrorCode *pErrorCode) { |
| if (U_FAILURE(*pErrorCode)) { |
| return nullptr; |
| } |
| if (other == nullptr) { |
| return nullptr; |
| } |
| LocalPointer<MutableCodePointTrie> clone( |
| new MutableCodePointTrie(*reinterpret_cast<const MutableCodePointTrie *>(other), *pErrorCode), *pErrorCode); |
| if (U_FAILURE(*pErrorCode)) { |
| return nullptr; |
| } |
| return reinterpret_cast<UMutableCPTrie *>(clone.orphan()); |
| } |
| |
| U_CAPI void U_EXPORT2 |
| umutablecptrie_close(UMutableCPTrie *trie) { |
| delete reinterpret_cast<MutableCodePointTrie *>(trie); |
| } |
| |
| U_CAPI UMutableCPTrie * U_EXPORT2 |
| umutablecptrie_fromUCPMap(const UCPMap *map, UErrorCode *pErrorCode) { |
| if (U_FAILURE(*pErrorCode)) { |
| return nullptr; |
| } |
| if (map == nullptr) { |
| *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; |
| return nullptr; |
| } |
| return reinterpret_cast<UMutableCPTrie *>(MutableCodePointTrie::fromUCPMap(map, *pErrorCode)); |
| } |
| |
| U_CAPI UMutableCPTrie * U_EXPORT2 |
| umutablecptrie_fromUCPTrie(const UCPTrie *trie, UErrorCode *pErrorCode) { |
| if (U_FAILURE(*pErrorCode)) { |
| return nullptr; |
| } |
| if (trie == nullptr) { |
| *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; |
| return nullptr; |
| } |
| return reinterpret_cast<UMutableCPTrie *>(MutableCodePointTrie::fromUCPTrie(trie, *pErrorCode)); |
| } |
| |
| U_CAPI uint32_t U_EXPORT2 |
| umutablecptrie_get(const UMutableCPTrie *trie, UChar32 c) { |
| return reinterpret_cast<const MutableCodePointTrie *>(trie)->get(c); |
| } |
| |
| namespace { |
| |
| UChar32 getRange(const void *trie, UChar32 start, |
| UCPMapValueFilter *filter, const void *context, uint32_t *pValue) { |
| return reinterpret_cast<const MutableCodePointTrie *>(trie)-> |
| getRange(start, filter, context, pValue); |
| } |
| |
| } // namespace |
| |
| U_CAPI UChar32 U_EXPORT2 |
| umutablecptrie_getRange(const UMutableCPTrie *trie, UChar32 start, |
| UCPMapRangeOption option, uint32_t surrogateValue, |
| UCPMapValueFilter *filter, const void *context, uint32_t *pValue) { |
| return ucptrie_internalGetRange(getRange, trie, start, |
| option, surrogateValue, |
| filter, context, pValue); |
| } |
| |
| U_CAPI void U_EXPORT2 |
| umutablecptrie_set(UMutableCPTrie *trie, UChar32 c, uint32_t value, UErrorCode *pErrorCode) { |
| if (U_FAILURE(*pErrorCode)) { |
| return; |
| } |
| reinterpret_cast<MutableCodePointTrie *>(trie)->set(c, value, *pErrorCode); |
| } |
| |
| U_CAPI void U_EXPORT2 |
| umutablecptrie_setRange(UMutableCPTrie *trie, UChar32 start, UChar32 end, |
| uint32_t value, UErrorCode *pErrorCode) { |
| if (U_FAILURE(*pErrorCode)) { |
| return; |
| } |
| reinterpret_cast<MutableCodePointTrie *>(trie)->setRange(start, end, value, *pErrorCode); |
| } |
| |
| /* Compact and internally serialize the trie. */ |
| U_CAPI UCPTrie * U_EXPORT2 |
| umutablecptrie_buildImmutable(UMutableCPTrie *trie, UCPTrieType type, UCPTrieValueWidth valueWidth, |
| UErrorCode *pErrorCode) { |
| if (U_FAILURE(*pErrorCode)) { |
| return nullptr; |
| } |
| return reinterpret_cast<MutableCodePointTrie *>(trie)->build(type, valueWidth, *pErrorCode); |
| } |
| |
| #ifdef UCPTRIE_DEBUG |
| U_CFUNC void umutablecptrie_setName(UMutableCPTrie *trie, const char *name) { |
| reinterpret_cast<MutableCodePointTrie *>(trie)->name = name; |
| } |
| #endif |