|  | // © 2016 and later: Unicode, Inc. and others. | 
|  | // License & terms of use: http://www.unicode.org/copyright.html | 
|  | /* | 
|  | ******************************************************************************* | 
|  | * Copyright (C) 2013-2015, International Business Machines | 
|  | * Corporation and others.  All Rights Reserved. | 
|  | ******************************************************************************* | 
|  | * collationsettings.cpp | 
|  | * | 
|  | * created on: 2013feb07 | 
|  | * created by: Markus W. Scherer | 
|  | */ | 
|  |  | 
|  | #include "unicode/utypes.h" | 
|  |  | 
|  | #if !UCONFIG_NO_COLLATION | 
|  |  | 
|  | #if defined(STARBOARD) | 
|  | #include "starboard/client_porting/poem/assert_poem.h" | 
|  | #include "starboard/client_porting/poem/string_poem.h" | 
|  | #endif  // defined(STARBOARD) | 
|  | #include "unicode/ucol.h" | 
|  | #include "cmemory.h" | 
|  | #include "collation.h" | 
|  | #include "collationdata.h" | 
|  | #include "collationsettings.h" | 
|  | #include "sharedobject.h" | 
|  | #include "uassert.h" | 
|  | #include "umutex.h" | 
|  | #include "uvectr32.h" | 
|  |  | 
|  | U_NAMESPACE_BEGIN | 
|  |  | 
|  | CollationSettings::CollationSettings(const CollationSettings &other) | 
|  | : SharedObject(other), | 
|  | options(other.options), variableTop(other.variableTop), | 
|  | reorderTable(NULL), | 
|  | minHighNoReorder(other.minHighNoReorder), | 
|  | reorderRanges(NULL), reorderRangesLength(0), | 
|  | reorderCodes(NULL), reorderCodesLength(0), reorderCodesCapacity(0), | 
|  | fastLatinOptions(other.fastLatinOptions) { | 
|  | UErrorCode errorCode = U_ZERO_ERROR; | 
|  | copyReorderingFrom(other, errorCode); | 
|  | if(fastLatinOptions >= 0) { | 
|  | uprv_memcpy(fastLatinPrimaries, other.fastLatinPrimaries, sizeof(fastLatinPrimaries)); | 
|  | } | 
|  | } | 
|  |  | 
|  | CollationSettings::~CollationSettings() { | 
|  | if(reorderCodesCapacity != 0) { | 
|  | uprv_free(const_cast<int32_t *>(reorderCodes)); | 
|  | } | 
|  | } | 
|  |  | 
|  | UBool | 
|  | CollationSettings::operator==(const CollationSettings &other) const { | 
|  | if(options != other.options) { return FALSE; } | 
|  | if((options & ALTERNATE_MASK) != 0 && variableTop != other.variableTop) { return FALSE; } | 
|  | if(reorderCodesLength != other.reorderCodesLength) { return FALSE; } | 
|  | for(int32_t i = 0; i < reorderCodesLength; ++i) { | 
|  | if(reorderCodes[i] != other.reorderCodes[i]) { return FALSE; } | 
|  | } | 
|  | return TRUE; | 
|  | } | 
|  |  | 
|  | int32_t | 
|  | CollationSettings::hashCode() const { | 
|  | int32_t h = options << 8; | 
|  | if((options & ALTERNATE_MASK) != 0) { h ^= variableTop; } | 
|  | h ^= reorderCodesLength; | 
|  | for(int32_t i = 0; i < reorderCodesLength; ++i) { | 
|  | h ^= (reorderCodes[i] << i); | 
|  | } | 
|  | return h; | 
|  | } | 
|  |  | 
|  | void | 
|  | CollationSettings::resetReordering() { | 
|  | // When we turn off reordering, we want to set a NULL permutation | 
|  | // rather than a no-op permutation. | 
|  | // Keep the memory via reorderCodes and its capacity. | 
|  | reorderTable = NULL; | 
|  | minHighNoReorder = 0; | 
|  | reorderRangesLength = 0; | 
|  | reorderCodesLength = 0; | 
|  | } | 
|  |  | 
|  | void | 
|  | CollationSettings::aliasReordering(const CollationData &data, const int32_t *codes, int32_t length, | 
|  | const uint32_t *ranges, int32_t rangesLength, | 
|  | const uint8_t *table, UErrorCode &errorCode) { | 
|  | if(U_FAILURE(errorCode)) { return; } | 
|  | if(table != NULL && | 
|  | (rangesLength == 0 ? | 
|  | !reorderTableHasSplitBytes(table) : | 
|  | rangesLength >= 2 && | 
|  | // The first offset must be 0. The last offset must not be 0. | 
|  | (ranges[0] & 0xffff) == 0 && (ranges[rangesLength - 1] & 0xffff) != 0)) { | 
|  | // We need to release the memory before setting the alias pointer. | 
|  | if(reorderCodesCapacity != 0) { | 
|  | uprv_free(const_cast<int32_t *>(reorderCodes)); | 
|  | reorderCodesCapacity = 0; | 
|  | } | 
|  | reorderTable = table; | 
|  | reorderCodes = codes; | 
|  | reorderCodesLength = length; | 
|  | // Drop ranges before the first split byte. They are reordered by the table. | 
|  | // This then speeds up reordering of the remaining ranges. | 
|  | int32_t firstSplitByteRangeIndex = 0; | 
|  | while(firstSplitByteRangeIndex < rangesLength && | 
|  | (ranges[firstSplitByteRangeIndex] & 0xff0000) == 0) { | 
|  | // The second byte of the primary limit is 0. | 
|  | ++firstSplitByteRangeIndex; | 
|  | } | 
|  | if(firstSplitByteRangeIndex == rangesLength) { | 
|  | U_ASSERT(!reorderTableHasSplitBytes(table)); | 
|  | minHighNoReorder = 0; | 
|  | reorderRanges = NULL; | 
|  | reorderRangesLength = 0; | 
|  | } else { | 
|  | U_ASSERT(table[ranges[firstSplitByteRangeIndex] >> 24] == 0); | 
|  | minHighNoReorder = ranges[rangesLength - 1] & 0xffff0000; | 
|  | reorderRanges = ranges + firstSplitByteRangeIndex; | 
|  | reorderRangesLength = rangesLength - firstSplitByteRangeIndex; | 
|  | } | 
|  | return; | 
|  | } | 
|  | // Regenerate missing data. | 
|  | setReordering(data, codes, length, errorCode); | 
|  | } | 
|  |  | 
|  | void | 
|  | CollationSettings::setReordering(const CollationData &data, | 
|  | const int32_t *codes, int32_t codesLength, | 
|  | UErrorCode &errorCode) { | 
|  | if(U_FAILURE(errorCode)) { return; } | 
|  | if(codesLength == 0 || (codesLength == 1 && codes[0] == UCOL_REORDER_CODE_NONE)) { | 
|  | resetReordering(); | 
|  | return; | 
|  | } | 
|  | UVector32 rangesList(errorCode); | 
|  | data.makeReorderRanges(codes, codesLength, rangesList, errorCode); | 
|  | if(U_FAILURE(errorCode)) { return; } | 
|  | int32_t rangesLength = rangesList.size(); | 
|  | if(rangesLength == 0) { | 
|  | resetReordering(); | 
|  | return; | 
|  | } | 
|  | const uint32_t *ranges = reinterpret_cast<uint32_t *>(rangesList.getBuffer()); | 
|  | // ranges[] contains at least two (limit, offset) pairs. | 
|  | // The first offset must be 0. The last offset must not be 0. | 
|  | // Separators (at the low end) and trailing weights (at the high end) | 
|  | // are never reordered. | 
|  | U_ASSERT(rangesLength >= 2); | 
|  | U_ASSERT((ranges[0] & 0xffff) == 0 && (ranges[rangesLength - 1] & 0xffff) != 0); | 
|  | minHighNoReorder = ranges[rangesLength - 1] & 0xffff0000; | 
|  |  | 
|  | // Write the lead byte permutation table. | 
|  | // Set a 0 for each lead byte that has a range boundary in the middle. | 
|  | uint8_t table[256]; | 
|  | int32_t b = 0; | 
|  | int32_t firstSplitByteRangeIndex = -1; | 
|  | for(int32_t i = 0; i < rangesLength; ++i) { | 
|  | uint32_t pair = ranges[i]; | 
|  | int32_t limit1 = (int32_t)(pair >> 24); | 
|  | while(b < limit1) { | 
|  | table[b] = (uint8_t)(b + pair); | 
|  | ++b; | 
|  | } | 
|  | // Check the second byte of the limit. | 
|  | if((pair & 0xff0000) != 0) { | 
|  | table[limit1] = 0; | 
|  | b = limit1 + 1; | 
|  | if(firstSplitByteRangeIndex < 0) { | 
|  | firstSplitByteRangeIndex = i; | 
|  | } | 
|  | } | 
|  | } | 
|  | while(b <= 0xff) { | 
|  | table[b] = (uint8_t)b; | 
|  | ++b; | 
|  | } | 
|  | if(firstSplitByteRangeIndex < 0) { | 
|  | // The lead byte permutation table alone suffices for reordering. | 
|  | rangesLength = 0; | 
|  | } else { | 
|  | // Remove the ranges below the first split byte. | 
|  | ranges += firstSplitByteRangeIndex; | 
|  | rangesLength -= firstSplitByteRangeIndex; | 
|  | } | 
|  | setReorderArrays(codes, codesLength, ranges, rangesLength, table, errorCode); | 
|  | } | 
|  |  | 
|  | void | 
|  | CollationSettings::setReorderArrays(const int32_t *codes, int32_t codesLength, | 
|  | const uint32_t *ranges, int32_t rangesLength, | 
|  | const uint8_t *table, UErrorCode &errorCode) { | 
|  | if(U_FAILURE(errorCode)) { return; } | 
|  | int32_t *ownedCodes; | 
|  | int32_t totalLength = codesLength + rangesLength; | 
|  | U_ASSERT(totalLength > 0); | 
|  | if(totalLength <= reorderCodesCapacity) { | 
|  | ownedCodes = const_cast<int32_t *>(reorderCodes); | 
|  | } else { | 
|  | // Allocate one memory block for the codes, the ranges, and the 16-aligned table. | 
|  | int32_t capacity = (totalLength + 3) & ~3;  // round up to a multiple of 4 ints | 
|  | ownedCodes = (int32_t *)uprv_malloc(capacity * 4 + 256); | 
|  | if(ownedCodes == NULL) { | 
|  | resetReordering(); | 
|  | errorCode = U_MEMORY_ALLOCATION_ERROR; | 
|  | return; | 
|  | } | 
|  | if(reorderCodesCapacity != 0) { | 
|  | uprv_free(const_cast<int32_t *>(reorderCodes)); | 
|  | } | 
|  | reorderCodes = ownedCodes; | 
|  | reorderCodesCapacity = capacity; | 
|  | } | 
|  | uprv_memcpy(ownedCodes + reorderCodesCapacity, table, 256); | 
|  | uprv_memcpy(ownedCodes, codes, codesLength * 4); | 
|  | uprv_memcpy(ownedCodes + codesLength, ranges, rangesLength * 4); | 
|  | reorderTable = reinterpret_cast<const uint8_t *>(reorderCodes + reorderCodesCapacity); | 
|  | reorderCodesLength = codesLength; | 
|  | reorderRanges = reinterpret_cast<uint32_t *>(ownedCodes) + codesLength; | 
|  | reorderRangesLength = rangesLength; | 
|  | } | 
|  |  | 
|  | void | 
|  | CollationSettings::copyReorderingFrom(const CollationSettings &other, UErrorCode &errorCode) { | 
|  | if(U_FAILURE(errorCode)) { return; } | 
|  | if(!other.hasReordering()) { | 
|  | resetReordering(); | 
|  | return; | 
|  | } | 
|  | minHighNoReorder = other.minHighNoReorder; | 
|  | if(other.reorderCodesCapacity == 0) { | 
|  | // The reorder arrays are aliased to memory-mapped data. | 
|  | reorderTable = other.reorderTable; | 
|  | reorderRanges = other.reorderRanges; | 
|  | reorderRangesLength = other.reorderRangesLength; | 
|  | reorderCodes = other.reorderCodes; | 
|  | reorderCodesLength = other.reorderCodesLength; | 
|  | } else { | 
|  | setReorderArrays(other.reorderCodes, other.reorderCodesLength, | 
|  | other.reorderRanges, other.reorderRangesLength, | 
|  | other.reorderTable, errorCode); | 
|  | } | 
|  | } | 
|  |  | 
|  | UBool | 
|  | CollationSettings::reorderTableHasSplitBytes(const uint8_t table[256]) { | 
|  | U_ASSERT(table[0] == 0); | 
|  | for(int32_t i = 1; i < 256; ++i) { | 
|  | if(table[i] == 0) { | 
|  | return TRUE; | 
|  | } | 
|  | } | 
|  | return FALSE; | 
|  | } | 
|  |  | 
|  | uint32_t | 
|  | CollationSettings::reorderEx(uint32_t p) const { | 
|  | if(p >= minHighNoReorder) { return p; } | 
|  | // Round up p so that its lower 16 bits are >= any offset bits. | 
|  | // Then compare q directly with (limit, offset) pairs. | 
|  | uint32_t q = p | 0xffff; | 
|  | uint32_t r; | 
|  | const uint32_t *ranges = reorderRanges; | 
|  | while(q >= (r = *ranges)) { ++ranges; } | 
|  | return p + (r << 24); | 
|  | } | 
|  |  | 
|  | void | 
|  | CollationSettings::setStrength(int32_t value, int32_t defaultOptions, UErrorCode &errorCode) { | 
|  | if(U_FAILURE(errorCode)) { return; } | 
|  | int32_t noStrength = options & ~STRENGTH_MASK; | 
|  | switch(value) { | 
|  | case UCOL_PRIMARY: | 
|  | case UCOL_SECONDARY: | 
|  | case UCOL_TERTIARY: | 
|  | case UCOL_QUATERNARY: | 
|  | case UCOL_IDENTICAL: | 
|  | options = noStrength | (value << STRENGTH_SHIFT); | 
|  | break; | 
|  | case UCOL_DEFAULT: | 
|  | options = noStrength | (defaultOptions & STRENGTH_MASK); | 
|  | break; | 
|  | default: | 
|  | errorCode = U_ILLEGAL_ARGUMENT_ERROR; | 
|  | break; | 
|  | } | 
|  | } | 
|  |  | 
|  | void | 
|  | CollationSettings::setFlag(int32_t bit, UColAttributeValue value, | 
|  | int32_t defaultOptions, UErrorCode &errorCode) { | 
|  | if(U_FAILURE(errorCode)) { return; } | 
|  | switch(value) { | 
|  | case UCOL_ON: | 
|  | options |= bit; | 
|  | break; | 
|  | case UCOL_OFF: | 
|  | options &= ~bit; | 
|  | break; | 
|  | case UCOL_DEFAULT: | 
|  | options = (options & ~bit) | (defaultOptions & bit); | 
|  | break; | 
|  | default: | 
|  | errorCode = U_ILLEGAL_ARGUMENT_ERROR; | 
|  | break; | 
|  | } | 
|  | } | 
|  |  | 
|  | void | 
|  | CollationSettings::setCaseFirst(UColAttributeValue value, | 
|  | int32_t defaultOptions, UErrorCode &errorCode) { | 
|  | if(U_FAILURE(errorCode)) { return; } | 
|  | int32_t noCaseFirst = options & ~CASE_FIRST_AND_UPPER_MASK; | 
|  | switch(value) { | 
|  | case UCOL_OFF: | 
|  | options = noCaseFirst; | 
|  | break; | 
|  | case UCOL_LOWER_FIRST: | 
|  | options = noCaseFirst | CASE_FIRST; | 
|  | break; | 
|  | case UCOL_UPPER_FIRST: | 
|  | options = noCaseFirst | CASE_FIRST_AND_UPPER_MASK; | 
|  | break; | 
|  | case UCOL_DEFAULT: | 
|  | options = noCaseFirst | (defaultOptions & CASE_FIRST_AND_UPPER_MASK); | 
|  | break; | 
|  | default: | 
|  | errorCode = U_ILLEGAL_ARGUMENT_ERROR; | 
|  | break; | 
|  | } | 
|  | } | 
|  |  | 
|  | void | 
|  | CollationSettings::setAlternateHandling(UColAttributeValue value, | 
|  | int32_t defaultOptions, UErrorCode &errorCode) { | 
|  | if(U_FAILURE(errorCode)) { return; } | 
|  | int32_t noAlternate = options & ~ALTERNATE_MASK; | 
|  | switch(value) { | 
|  | case UCOL_NON_IGNORABLE: | 
|  | options = noAlternate; | 
|  | break; | 
|  | case UCOL_SHIFTED: | 
|  | options = noAlternate | SHIFTED; | 
|  | break; | 
|  | case UCOL_DEFAULT: | 
|  | options = noAlternate | (defaultOptions & ALTERNATE_MASK); | 
|  | break; | 
|  | default: | 
|  | errorCode = U_ILLEGAL_ARGUMENT_ERROR; | 
|  | break; | 
|  | } | 
|  | } | 
|  |  | 
|  | void | 
|  | CollationSettings::setMaxVariable(int32_t value, int32_t defaultOptions, UErrorCode &errorCode) { | 
|  | if(U_FAILURE(errorCode)) { return; } | 
|  | int32_t noMax = options & ~MAX_VARIABLE_MASK; | 
|  | switch(value) { | 
|  | case MAX_VAR_SPACE: | 
|  | case MAX_VAR_PUNCT: | 
|  | case MAX_VAR_SYMBOL: | 
|  | case MAX_VAR_CURRENCY: | 
|  | options = noMax | (value << MAX_VARIABLE_SHIFT); | 
|  | break; | 
|  | case UCOL_DEFAULT: | 
|  | options = noMax | (defaultOptions & MAX_VARIABLE_MASK); | 
|  | break; | 
|  | default: | 
|  | errorCode = U_ILLEGAL_ARGUMENT_ERROR; | 
|  | break; | 
|  | } | 
|  | } | 
|  |  | 
|  | U_NAMESPACE_END | 
|  |  | 
|  | #endif  // !UCONFIG_NO_COLLATION |