third_party/icu/source/i18n/collationsets.cpp - cobalt - Git at Google

 // © 2016 and later: Unicode, Inc. and others.
 // License & terms of use: http://www.unicode.org/copyright.html
 /*
 *******************************************************************************
 * Copyright (C) 2013-2014, International Business Machines
 * Corporation and others.  All Rights Reserved.
 *******************************************************************************
 * collationsets.cpp
 *
 * created on: 2013feb09
 * created by: Markus W. Scherer
 */

 #include "unicode/utypes.h"

 #if !UCONFIG_NO_COLLATION

 #if defined(STARBOARD)
 #include "starboard/client_porting/poem/assert_poem.h"
 #include "starboard/client_porting/poem/string_poem.h"
 #endif  // defined(STARBOARD)
 #include "unicode/ucharstrie.h"
 #include "unicode/uniset.h"
 #include "unicode/unistr.h"
 #include "unicode/ustringtrie.h"
 #include "collation.h"
 #include "collationdata.h"
 #include "collationsets.h"
 #include "normalizer2impl.h"
 #include "uassert.h"
 #include "utf16collationiterator.h"
 #include "utrie2.h"

 U_NAMESPACE_BEGIN

 U_CDECL_BEGIN

 static UBool U_CALLCONV
 enumTailoredRange(const void *context, UChar32 start, UChar32 end, uint32_t ce32) {
     if(ce32 == Collation::FALLBACK_CE32) {
         return TRUE;  // fallback to base, not tailored
     }
     TailoredSet *ts = (TailoredSet *)context;
     return ts->handleCE32(start, end, ce32);
 }

 U_CDECL_END

 void
 TailoredSet::forData(const CollationData *d, UErrorCode &ec) {
     if(U_FAILURE(ec)) { return; }
     errorCode = ec;  // Preserve info & warning codes.
     data = d;
     baseData = d->base;
     U_ASSERT(baseData != NULL);
     utrie2_enum(data->trie, NULL, enumTailoredRange, this);
     ec = errorCode;
 }

 UBool
 TailoredSet::handleCE32(UChar32 start, UChar32 end, uint32_t ce32) {
     U_ASSERT(ce32 != Collation::FALLBACK_CE32);
     if(Collation::isSpecialCE32(ce32)) {
         ce32 = data->getIndirectCE32(ce32);
         if(ce32 == Collation::FALLBACK_CE32) {
             return U_SUCCESS(errorCode);
         }
     }
     do {
         uint32_t baseCE32 = baseData->getFinalCE32(baseData->getCE32(start));
         // Do not just continue if ce32 == baseCE32 because
         // contractions and expansions in different data objects
         // normally differ even if they have the same data offsets.
         if(Collation::isSelfContainedCE32(ce32) && Collation::isSelfContainedCE32(baseCE32)) {
             // fastpath
             if(ce32 != baseCE32) {
                 tailored->add(start);
             }
         } else {
             compare(start, ce32, baseCE32);
         }
     } while(++start <= end);
     return U_SUCCESS(errorCode);
 }

 void
 TailoredSet::compare(UChar32 c, uint32_t ce32, uint32_t baseCE32) {
     if(Collation::isPrefixCE32(ce32)) {
         const UChar *p = data->contexts + Collation::indexFromCE32(ce32);
         ce32 = data->getFinalCE32(CollationData::readCE32(p));
         if(Collation::isPrefixCE32(baseCE32)) {
             const UChar *q = baseData->contexts + Collation::indexFromCE32(baseCE32);
             baseCE32 = baseData->getFinalCE32(CollationData::readCE32(q));
             comparePrefixes(c, p + 2, q + 2);
         } else {
             addPrefixes(data, c, p + 2);
         }
     } else if(Collation::isPrefixCE32(baseCE32)) {
         const UChar *q = baseData->contexts + Collation::indexFromCE32(baseCE32);
         baseCE32 = baseData->getFinalCE32(CollationData::readCE32(q));
         addPrefixes(baseData, c, q + 2);
     }

     if(Collation::isContractionCE32(ce32)) {
         const UChar *p = data->contexts + Collation::indexFromCE32(ce32);
         if((ce32 & Collation::CONTRACT_SINGLE_CP_NO_MATCH) != 0) {
             ce32 = Collation::NO_CE32;
         } else {
             ce32 = data->getFinalCE32(CollationData::readCE32(p));
         }
         if(Collation::isContractionCE32(baseCE32)) {
             const UChar *q = baseData->contexts + Collation::indexFromCE32(baseCE32);
             if((baseCE32 & Collation::CONTRACT_SINGLE_CP_NO_MATCH) != 0) {
                 baseCE32 = Collation::NO_CE32;
             } else {
                 baseCE32 = baseData->getFinalCE32(CollationData::readCE32(q));
             }
             compareContractions(c, p + 2, q + 2);
         } else {
             addContractions(c, p + 2);
         }
     } else if(Collation::isContractionCE32(baseCE32)) {
         const UChar *q = baseData->contexts + Collation::indexFromCE32(baseCE32);
         baseCE32 = baseData->getFinalCE32(CollationData::readCE32(q));
         addContractions(c, q + 2);
     }

     int32_t tag;
     if(Collation::isSpecialCE32(ce32)) {
         tag = Collation::tagFromCE32(ce32);
         U_ASSERT(tag != Collation::PREFIX_TAG);
         U_ASSERT(tag != Collation::CONTRACTION_TAG);
         // Currently, the tailoring data builder does not write offset tags.
         // They might be useful for saving space,
         // but they would complicate the builder,
         // and in tailorings we assume that performance of tailored characters is more important.
         U_ASSERT(tag != Collation::OFFSET_TAG);
     } else {
         tag = -1;
     }
     int32_t baseTag;
     if(Collation::isSpecialCE32(baseCE32)) {
         baseTag = Collation::tagFromCE32(baseCE32);
         U_ASSERT(baseTag != Collation::PREFIX_TAG);
         U_ASSERT(baseTag != Collation::CONTRACTION_TAG);
     } else {
         baseTag = -1;
     }

     // Non-contextual mappings, expansions, etc.
     if(baseTag == Collation::OFFSET_TAG) {
         // We might be comparing a tailoring CE which is a copy of
         // a base offset-tag CE, via the [optimize [set]] syntax
         // or when a single-character mapping was copied for tailored contractions.
         // Offset tags always result in long-primary CEs,
         // with common secondary/tertiary weights.
         if(!Collation::isLongPrimaryCE32(ce32)) {
             add(c);
             return;
         }
         int64_t dataCE = baseData->ces[Collation::indexFromCE32(baseCE32)];
         uint32_t p = Collation::getThreeBytePrimaryForOffsetData(c, dataCE);
         if(Collation::primaryFromLongPrimaryCE32(ce32) != p) {
             add(c);
             return;
         }
     }

     if(tag != baseTag) {
         add(c);
         return;
     }

     if(tag == Collation::EXPANSION32_TAG) {
         const uint32_t *ce32s = data->ce32s + Collation::indexFromCE32(ce32);
         int32_t length = Collation::lengthFromCE32(ce32);

         const uint32_t *baseCE32s = baseData->ce32s + Collation::indexFromCE32(baseCE32);
         int32_t baseLength = Collation::lengthFromCE32(baseCE32);

         if(length != baseLength) {
             add(c);
             return;
         }
         for(int32_t i = 0; i < length; ++i) {
             if(ce32s[i] != baseCE32s[i]) {
                 add(c);
                 break;
             }
         }
     } else if(tag == Collation::EXPANSION_TAG) {
         const int64_t *ces = data->ces + Collation::indexFromCE32(ce32);
         int32_t length = Collation::lengthFromCE32(ce32);

         const int64_t *baseCEs = baseData->ces + Collation::indexFromCE32(baseCE32);
         int32_t baseLength = Collation::lengthFromCE32(baseCE32);

         if(length != baseLength) {
             add(c);
             return;
         }
         for(int32_t i = 0; i < length; ++i) {
             if(ces[i] != baseCEs[i]) {
                 add(c);
                 break;
             }
         }
     } else if(tag == Collation::HANGUL_TAG) {
         UChar jamos[3];
         int32_t length = Hangul::decompose(c, jamos);
         if(tailored->contains(jamos[0]) || tailored->contains(jamos[1]) ||
                 (length == 3 && tailored->contains(jamos[2]))) {
             add(c);
         }
     } else if(ce32 != baseCE32) {
         add(c);
     }
 }

 void
 TailoredSet::comparePrefixes(UChar32 c, const UChar *p, const UChar *q) {
     // Parallel iteration over prefixes of both tables.
     UCharsTrie::Iterator prefixes(p, 0, errorCode);
     UCharsTrie::Iterator basePrefixes(q, 0, errorCode);
     const UnicodeString *tp = NULL;  // Tailoring prefix.
     const UnicodeString *bp = NULL;  // Base prefix.
     // Use a string with a U+FFFF as the limit sentinel.
     // U+FFFF is untailorable and will not occur in prefixes.
     UnicodeString none((UChar)0xffff);
     for(;;) {
         if(tp == NULL) {
             if(prefixes.next(errorCode)) {
                 tp = &prefixes.getString();
             } else {
                 tp = &none;
             }
         }
         if(bp == NULL) {
             if(basePrefixes.next(errorCode)) {
                 bp = &basePrefixes.getString();
             } else {
                 bp = &none;
             }
         }
         if(tp == &none && bp == &none) { break; }
         int32_t cmp = tp->compare(*bp);
         if(cmp < 0) {
             // tp occurs in the tailoring but not in the base.
             addPrefix(data, *tp, c, (uint32_t)prefixes.getValue());
             tp = NULL;
         } else if(cmp > 0) {
             // bp occurs in the base but not in the tailoring.
             addPrefix(baseData, *bp, c, (uint32_t)basePrefixes.getValue());
             bp = NULL;
         } else {
             setPrefix(*tp);
             compare(c, (uint32_t)prefixes.getValue(), (uint32_t)basePrefixes.getValue());
             resetPrefix();
             tp = NULL;
             bp = NULL;
         }
     }
 }

 void
 TailoredSet::compareContractions(UChar32 c, const UChar *p, const UChar *q) {
     // Parallel iteration over suffixes of both tables.
     UCharsTrie::Iterator suffixes(p, 0, errorCode);
     UCharsTrie::Iterator baseSuffixes(q, 0, errorCode);
     const UnicodeString *ts = NULL;  // Tailoring suffix.
     const UnicodeString *bs = NULL;  // Base suffix.
     // Use a string with two U+FFFF as the limit sentinel.
     // U+FFFF is untailorable and will not occur in contractions except maybe
     // as a single suffix character for a root-collator boundary contraction.
     UnicodeString none((UChar)0xffff);
     none.append((UChar)0xffff);
     for(;;) {
         if(ts == NULL) {
             if(suffixes.next(errorCode)) {
                 ts = &suffixes.getString();
             } else {
                 ts = &none;
             }
         }
         if(bs == NULL) {
             if(baseSuffixes.next(errorCode)) {
                 bs = &baseSuffixes.getString();
             } else {
                 bs = &none;
             }
         }
         if(ts == &none && bs == &none) { break; }
         int32_t cmp = ts->compare(*bs);
         if(cmp < 0) {
             // ts occurs in the tailoring but not in the base.
             addSuffix(c, *ts);
             ts = NULL;
         } else if(cmp > 0) {
             // bs occurs in the base but not in the tailoring.
             addSuffix(c, *bs);
             bs = NULL;
         } else {
             suffix = ts;
             compare(c, (uint32_t)suffixes.getValue(), (uint32_t)baseSuffixes.getValue());
             suffix = NULL;
             ts = NULL;
             bs = NULL;
         }
     }
 }

 void
 TailoredSet::addPrefixes(const CollationData *d, UChar32 c, const UChar *p) {
     UCharsTrie::Iterator prefixes(p, 0, errorCode);
     while(prefixes.next(errorCode)) {
         addPrefix(d, prefixes.getString(), c, (uint32_t)prefixes.getValue());
     }
 }

 void
 TailoredSet::addPrefix(const CollationData *d, const UnicodeString &pfx, UChar32 c, uint32_t ce32) {
     setPrefix(pfx);
     ce32 = d->getFinalCE32(ce32);
     if(Collation::isContractionCE32(ce32)) {
         const UChar *p = d->contexts + Collation::indexFromCE32(ce32);
         addContractions(c, p + 2);
     }
     tailored->add(UnicodeString(unreversedPrefix).append(c));
     resetPrefix();
 }

 void
 TailoredSet::addContractions(UChar32 c, const UChar *p) {
     UCharsTrie::Iterator suffixes(p, 0, errorCode);
     while(suffixes.next(errorCode)) {
         addSuffix(c, suffixes.getString());
     }
 }

 void
 TailoredSet::addSuffix(UChar32 c, const UnicodeString &sfx) {
     tailored->add(UnicodeString(unreversedPrefix).append(c).append(sfx));
 }

 void
 TailoredSet::add(UChar32 c) {
     if(unreversedPrefix.isEmpty() && suffix == NULL) {
         tailored->add(c);
     } else {
         UnicodeString s(unreversedPrefix);
         s.append(c);
         if(suffix != NULL) {
             s.append(*suffix);
         }
         tailored->add(s);
     }
 }

 ContractionsAndExpansions::CESink::~CESink() {}

 U_CDECL_BEGIN

 static UBool U_CALLCONV
 enumCnERange(const void *context, UChar32 start, UChar32 end, uint32_t ce32) {
     ContractionsAndExpansions *cne = (ContractionsAndExpansions *)context;
     if(cne->checkTailored == 0) {
         // There is no tailoring.
         // No need to collect nor check the tailored set.
     } else if(cne->checkTailored < 0) {
         // Collect the set of code points with mappings in the tailoring data.
         if(ce32 == Collation::FALLBACK_CE32) {
             return TRUE;  // fallback to base, not tailored
         } else {
             cne->tailored.add(start, end);
         }
         // checkTailored > 0: Exclude tailored ranges from the base data enumeration.
     } else if(start == end) {
         if(cne->tailored.contains(start)) {
             return TRUE;
         }
     } else if(cne->tailored.containsSome(start, end)) {
         cne->ranges.set(start, end).removeAll(cne->tailored);
         int32_t count = cne->ranges.getRangeCount();
         for(int32_t i = 0; i < count; ++i) {
             cne->handleCE32(cne->ranges.getRangeStart(i), cne->ranges.getRangeEnd(i), ce32);
         }
         return U_SUCCESS(cne->errorCode);
     }
     cne->handleCE32(start, end, ce32);
     return U_SUCCESS(cne->errorCode);
 }

 U_CDECL_END

 void
 ContractionsAndExpansions::forData(const CollationData *d, UErrorCode &ec) {
     if(U_FAILURE(ec)) { return; }
     errorCode = ec;  // Preserve info & warning codes.
     // Add all from the data, can be tailoring or base.
     if(d->base != NULL) {
         checkTailored = -1;
     }
     data = d;
     utrie2_enum(data->trie, NULL, enumCnERange, this);
     if(d->base == NULL || U_FAILURE(errorCode)) {
         ec = errorCode;
         return;
     }
     // Add all from the base data but only for un-tailored code points.
     tailored.freeze();
     checkTailored = 1;
     data = d->base;
     utrie2_enum(data->trie, NULL, enumCnERange, this);
     ec = errorCode;
 }

 void
 ContractionsAndExpansions::forCodePoint(const CollationData *d, UChar32 c, UErrorCode &ec) {
     if(U_FAILURE(ec)) { return; }
     errorCode = ec;  // Preserve info & warning codes.
     uint32_t ce32 = d->getCE32(c);
     if(ce32 == Collation::FALLBACK_CE32) {
         d = d->base;
         ce32 = d->getCE32(c);
     }
     data = d;
     handleCE32(c, c, ce32);
     ec = errorCode;
 }

 void
 ContractionsAndExpansions::handleCE32(UChar32 start, UChar32 end, uint32_t ce32) {
     for(;;) {
         if((ce32 & 0xff) < Collation::SPECIAL_CE32_LOW_BYTE) {
             // !isSpecialCE32()
             if(sink != NULL) {
                 sink->handleCE(Collation::ceFromSimpleCE32(ce32));
             }
             return;
         }
         switch(Collation::tagFromCE32(ce32)) {
         case Collation::FALLBACK_TAG:
             return;
         case Collation::RESERVED_TAG_3:
         case Collation::BUILDER_DATA_TAG:
         case Collation::LEAD_SURROGATE_TAG:
             if(U_SUCCESS(errorCode)) { errorCode = U_INTERNAL_PROGRAM_ERROR; }
             return;
         case Collation::LONG_PRIMARY_TAG:
             if(sink != NULL) {
                 sink->handleCE(Collation::ceFromLongPrimaryCE32(ce32));
             }
             return;
         case Collation::LONG_SECONDARY_TAG:
             if(sink != NULL) {
                 sink->handleCE(Collation::ceFromLongSecondaryCE32(ce32));
             }
             return;
         case Collation::LATIN_EXPANSION_TAG:
             if(sink != NULL) {
                 ces[0] = Collation::latinCE0FromCE32(ce32);
                 ces[1] = Collation::latinCE1FromCE32(ce32);
                 sink->handleExpansion(ces, 2);
             }
             // Optimization: If we have a prefix,
             // then the relevant strings have been added already.
             if(unreversedPrefix.isEmpty()) {
                 addExpansions(start, end);
             }
             return;
         case Collation::EXPANSION32_TAG:
             if(sink != NULL) {
                 const uint32_t *ce32s = data->ce32s + Collation::indexFromCE32(ce32);
                 int32_t length = Collation::lengthFromCE32(ce32);
                 for(int32_t i = 0; i < length; ++i) {
                     ces[i] = Collation::ceFromCE32(*ce32s++);
                 }
                 sink->handleExpansion(ces, length);
             }
             // Optimization: If we have a prefix,
             // then the relevant strings have been added already.
             if(unreversedPrefix.isEmpty()) {
                 addExpansions(start, end);
             }
             return;
         case Collation::EXPANSION_TAG:
             if(sink != NULL) {
                 int32_t length = Collation::lengthFromCE32(ce32);
                 sink->handleExpansion(data->ces + Collation::indexFromCE32(ce32), length);
             }
             // Optimization: If we have a prefix,
             // then the relevant strings have been added already.
             if(unreversedPrefix.isEmpty()) {
                 addExpansions(start, end);
             }
             return;
         case Collation::PREFIX_TAG:
             handlePrefixes(start, end, ce32);
             return;
         case Collation::CONTRACTION_TAG:
             handleContractions(start, end, ce32);
             return;
         case Collation::DIGIT_TAG:
             // Fetch the non-numeric-collation CE32 and continue.
             ce32 = data->ce32s[Collation::indexFromCE32(ce32)];
             break;
         case Collation::U0000_TAG:
             U_ASSERT(start == 0 && end == 0);
             // Fetch the normal ce32 for U+0000 and continue.
             ce32 = data->ce32s[0];
             break;
         case Collation::HANGUL_TAG:
             if(sink != NULL) {
                 // TODO: This should be optimized,
                 // especially if [start..end] is the complete Hangul range. (assert that)
                 UTF16CollationIterator iter(data, FALSE, NULL, NULL, NULL);
                 UChar hangul[1] = { 0 };
                 for(UChar32 c = start; c <= end; ++c) {
                     hangul[0] = (UChar)c;
                     iter.setText(hangul, hangul + 1);
                     int32_t length = iter.fetchCEs(errorCode);
                     if(U_FAILURE(errorCode)) { return; }
                     // Ignore the terminating non-CE.
                     U_ASSERT(length >= 2 && iter.getCE(length - 1) == Collation::NO_CE);
                     sink->handleExpansion(iter.getCEs(), length - 1);
                 }
             }
             // Optimization: If we have a prefix,
             // then the relevant strings have been added already.
             if(unreversedPrefix.isEmpty()) {
                 addExpansions(start, end);
             }
             return;
         case Collation::OFFSET_TAG:
             // Currently no need to send offset CEs to the sink.
             return;
         case Collation::IMPLICIT_TAG:
             // Currently no need to send implicit CEs to the sink.
             return;
         }
     }
 }

 void
 ContractionsAndExpansions::handlePrefixes(
         UChar32 start, UChar32 end, uint32_t ce32) {
     const UChar *p = data->contexts + Collation::indexFromCE32(ce32);
     ce32 = CollationData::readCE32(p);  // Default if no prefix match.
     handleCE32(start, end, ce32);
     if(!addPrefixes) { return; }
     UCharsTrie::Iterator prefixes(p + 2, 0, errorCode);
     while(prefixes.next(errorCode)) {
         setPrefix(prefixes.getString());
         // Prefix/pre-context mappings are special kinds of contractions
         // that always yield expansions.
         addStrings(start, end, contractions);
         addStrings(start, end, expansions);
         handleCE32(start, end, (uint32_t)prefixes.getValue());
     }
     resetPrefix();
 }

 void
 ContractionsAndExpansions::handleContractions(
         UChar32 start, UChar32 end, uint32_t ce32) {
     const UChar *p = data->contexts + Collation::indexFromCE32(ce32);
     if((ce32 & Collation::CONTRACT_SINGLE_CP_NO_MATCH) != 0) {
         // No match on the single code point.
         // We are underneath a prefix, and the default mapping is just
         // a fallback to the mappings for a shorter prefix.
         U_ASSERT(!unreversedPrefix.isEmpty());
     } else {
         ce32 = CollationData::readCE32(p);  // Default if no suffix match.
         U_ASSERT(!Collation::isContractionCE32(ce32));
         handleCE32(start, end, ce32);
     }
     UCharsTrie::Iterator suffixes(p + 2, 0, errorCode);
     while(suffixes.next(errorCode)) {
         suffix = &suffixes.getString();
         addStrings(start, end, contractions);
         if(!unreversedPrefix.isEmpty()) {
             addStrings(start, end, expansions);
         }
         handleCE32(start, end, (uint32_t)suffixes.getValue());
     }
     suffix = NULL;
 }

 void
 ContractionsAndExpansions::addExpansions(UChar32 start, UChar32 end) {
     if(unreversedPrefix.isEmpty() && suffix == NULL) {
         if(expansions != NULL) {
             expansions->add(start, end);
         }
     } else {
         addStrings(start, end, expansions);
     }
 }

 void
 ContractionsAndExpansions::addStrings(UChar32 start, UChar32 end, UnicodeSet *set) {
     if(set == NULL) { return; }
     UnicodeString s(unreversedPrefix);
     do {
         s.append(start);
         if(suffix != NULL) {
             s.append(*suffix);
         }
         set->add(s);
         s.truncate(unreversedPrefix.length());
     } while(++start <= end);
 }

 U_NAMESPACE_END

 #endif  // !UCONFIG_NO_COLLATION
	// © 2016 and later: Unicode, Inc. and others.
	// License & terms of use: http://www.unicode.org/copyright.html
	/*
	*******************************************************************************
	* Copyright (C) 2013-2014, International Business Machines
	* Corporation and others. All Rights Reserved.
	*******************************************************************************
	* collationsets.cpp
	*
	* created on: 2013feb09
	* created by: Markus W. Scherer
	*/

	#include "unicode/utypes.h"

	#if !UCONFIG_NO_COLLATION

	#if defined(STARBOARD)
	#include "starboard/client_porting/poem/assert_poem.h"
	#include "starboard/client_porting/poem/string_poem.h"
	#endif // defined(STARBOARD)
	#include "unicode/ucharstrie.h"
	#include "unicode/uniset.h"
	#include "unicode/unistr.h"
	#include "unicode/ustringtrie.h"
	#include "collation.h"
	#include "collationdata.h"
	#include "collationsets.h"
	#include "normalizer2impl.h"
	#include "uassert.h"
	#include "utf16collationiterator.h"
	#include "utrie2.h"

	U_NAMESPACE_BEGIN

	U_CDECL_BEGIN

	static UBool U_CALLCONV
	enumTailoredRange(const void *context, UChar32 start, UChar32 end, uint32_t ce32) {
	if(ce32 == Collation::FALLBACK_CE32) {
	return TRUE; // fallback to base, not tailored
	}
	TailoredSet ts = (TailoredSet )context;
	return ts->handleCE32(start, end, ce32);
	}

	U_CDECL_END

	void
	TailoredSet::forData(const CollationData *d, UErrorCode &ec) {
	if(U_FAILURE(ec)) { return; }
	errorCode = ec; // Preserve info & warning codes.
	data = d;
	baseData = d->base;
	U_ASSERT(baseData != NULL);
	utrie2_enum(data->trie, NULL, enumTailoredRange, this);
	ec = errorCode;
	}

	UBool
	TailoredSet::handleCE32(UChar32 start, UChar32 end, uint32_t ce32) {
	U_ASSERT(ce32 != Collation::FALLBACK_CE32);
	if(Collation::isSpecialCE32(ce32)) {
	ce32 = data->getIndirectCE32(ce32);
	if(ce32 == Collation::FALLBACK_CE32) {
	return U_SUCCESS(errorCode);
	}
	}
	do {
	uint32_t baseCE32 = baseData->getFinalCE32(baseData->getCE32(start));
	// Do not just continue if ce32 == baseCE32 because
	// contractions and expansions in different data objects
	// normally differ even if they have the same data offsets.
	if(Collation::isSelfContainedCE32(ce32) && Collation::isSelfContainedCE32(baseCE32)) {
	// fastpath
	if(ce32 != baseCE32) {
	tailored->add(start);
	}
	} else {
	compare(start, ce32, baseCE32);
	}
	} while(++start <= end);
	return U_SUCCESS(errorCode);
	}

	void
	TailoredSet::compare(UChar32 c, uint32_t ce32, uint32_t baseCE32) {
	if(Collation::isPrefixCE32(ce32)) {
	const UChar *p = data->contexts + Collation::indexFromCE32(ce32);
	ce32 = data->getFinalCE32(CollationData::readCE32(p));
	if(Collation::isPrefixCE32(baseCE32)) {
	const UChar *q = baseData->contexts + Collation::indexFromCE32(baseCE32);
	baseCE32 = baseData->getFinalCE32(CollationData::readCE32(q));
	comparePrefixes(c, p + 2, q + 2);
	} else {
	addPrefixes(data, c, p + 2);
	}
	} else if(Collation::isPrefixCE32(baseCE32)) {
	const UChar *q = baseData->contexts + Collation::indexFromCE32(baseCE32);
	baseCE32 = baseData->getFinalCE32(CollationData::readCE32(q));
	addPrefixes(baseData, c, q + 2);
	}

	if(Collation::isContractionCE32(ce32)) {
	const UChar *p = data->contexts + Collation::indexFromCE32(ce32);
	if((ce32 & Collation::CONTRACT_SINGLE_CP_NO_MATCH) != 0) {
	ce32 = Collation::NO_CE32;
	} else {
	ce32 = data->getFinalCE32(CollationData::readCE32(p));
	}
	if(Collation::isContractionCE32(baseCE32)) {
	const UChar *q = baseData->contexts + Collation::indexFromCE32(baseCE32);
	if((baseCE32 & Collation::CONTRACT_SINGLE_CP_NO_MATCH) != 0) {
	baseCE32 = Collation::NO_CE32;
	} else {
	baseCE32 = baseData->getFinalCE32(CollationData::readCE32(q));
	}
	compareContractions(c, p + 2, q + 2);
	} else {
	addContractions(c, p + 2);
	}
	} else if(Collation::isContractionCE32(baseCE32)) {
	const UChar *q = baseData->contexts + Collation::indexFromCE32(baseCE32);
	baseCE32 = baseData->getFinalCE32(CollationData::readCE32(q));
	addContractions(c, q + 2);
	}

	int32_t tag;
	if(Collation::isSpecialCE32(ce32)) {
	tag = Collation::tagFromCE32(ce32);
	U_ASSERT(tag != Collation::PREFIX_TAG);
	U_ASSERT(tag != Collation::CONTRACTION_TAG);
	// Currently, the tailoring data builder does not write offset tags.
	// They might be useful for saving space,
	// but they would complicate the builder,
	// and in tailorings we assume that performance of tailored characters is more important.
	U_ASSERT(tag != Collation::OFFSET_TAG);
	} else {
	tag = -1;
	}
	int32_t baseTag;
	if(Collation::isSpecialCE32(baseCE32)) {
	baseTag = Collation::tagFromCE32(baseCE32);
	U_ASSERT(baseTag != Collation::PREFIX_TAG);
	U_ASSERT(baseTag != Collation::CONTRACTION_TAG);
	} else {
	baseTag = -1;
	}

	// Non-contextual mappings, expansions, etc.
	if(baseTag == Collation::OFFSET_TAG) {
	// We might be comparing a tailoring CE which is a copy of
	// a base offset-tag CE, via the [optimize [set]] syntax
	// or when a single-character mapping was copied for tailored contractions.
	// Offset tags always result in long-primary CEs,
	// with common secondary/tertiary weights.
	if(!Collation::isLongPrimaryCE32(ce32)) {
	add(c);
	return;
	}
	int64_t dataCE = baseData->ces[Collation::indexFromCE32(baseCE32)];
	uint32_t p = Collation::getThreeBytePrimaryForOffsetData(c, dataCE);
	if(Collation::primaryFromLongPrimaryCE32(ce32) != p) {
	add(c);
	return;
	}
	}

	if(tag != baseTag) {
	add(c);
	return;
	}

	if(tag == Collation::EXPANSION32_TAG) {
	const uint32_t *ce32s = data->ce32s + Collation::indexFromCE32(ce32);
	int32_t length = Collation::lengthFromCE32(ce32);

	const uint32_t *baseCE32s = baseData->ce32s + Collation::indexFromCE32(baseCE32);
	int32_t baseLength = Collation::lengthFromCE32(baseCE32);

	if(length != baseLength) {
	add(c);
	return;
	}
	for(int32_t i = 0; i < length; ++i) {
	if(ce32s[i] != baseCE32s[i]) {
	add(c);
	break;
	}
	}
	} else if(tag == Collation::EXPANSION_TAG) {
	const int64_t *ces = data->ces + Collation::indexFromCE32(ce32);
	int32_t length = Collation::lengthFromCE32(ce32);

	const int64_t *baseCEs = baseData->ces + Collation::indexFromCE32(baseCE32);
	int32_t baseLength = Collation::lengthFromCE32(baseCE32);

	if(length != baseLength) {
	add(c);
	return;
	}
	for(int32_t i = 0; i < length; ++i) {
	if(ces[i] != baseCEs[i]) {
	add(c);
	break;
	}
	}
	} else if(tag == Collation::HANGUL_TAG) {
	UChar jamos[3];
	int32_t length = Hangul::decompose(c, jamos);
	if(tailored->contains(jamos[0]) \|\| tailored->contains(jamos[1]) \|\|
	(length == 3 && tailored->contains(jamos[2]))) {
	add(c);
	}
	} else if(ce32 != baseCE32) {
	add(c);
	}
	}

	void
	TailoredSet::comparePrefixes(UChar32 c, const UChar p, const UChar q) {
	// Parallel iteration over prefixes of both tables.
	UCharsTrie::Iterator prefixes(p, 0, errorCode);
	UCharsTrie::Iterator basePrefixes(q, 0, errorCode);
	const UnicodeString *tp = NULL; // Tailoring prefix.
	const UnicodeString *bp = NULL; // Base prefix.
	// Use a string with a U+FFFF as the limit sentinel.
	// U+FFFF is untailorable and will not occur in prefixes.
	UnicodeString none((UChar)0xffff);
	for(;;) {
	if(tp == NULL) {
	if(prefixes.next(errorCode)) {
	tp = &prefixes.getString();
	} else {
	tp = &none;
	}
	}
	if(bp == NULL) {
	if(basePrefixes.next(errorCode)) {
	bp = &basePrefixes.getString();
	} else {
	bp = &none;
	}
	}
	if(tp == &none && bp == &none) { break; }
	int32_t cmp = tp->compare(*bp);
	if(cmp < 0) {
	// tp occurs in the tailoring but not in the base.
	addPrefix(data, *tp, c, (uint32_t)prefixes.getValue());
	tp = NULL;
	} else if(cmp > 0) {
	// bp occurs in the base but not in the tailoring.
	addPrefix(baseData, *bp, c, (uint32_t)basePrefixes.getValue());
	bp = NULL;
	} else {
	setPrefix(*tp);
	compare(c, (uint32_t)prefixes.getValue(), (uint32_t)basePrefixes.getValue());
	resetPrefix();
	tp = NULL;
	bp = NULL;
	}
	}
	}

	void
	TailoredSet::compareContractions(UChar32 c, const UChar p, const UChar q) {
	// Parallel iteration over suffixes of both tables.
	UCharsTrie::Iterator suffixes(p, 0, errorCode);
	UCharsTrie::Iterator baseSuffixes(q, 0, errorCode);
	const UnicodeString *ts = NULL; // Tailoring suffix.
	const UnicodeString *bs = NULL; // Base suffix.
	// Use a string with two U+FFFF as the limit sentinel.
	// U+FFFF is untailorable and will not occur in contractions except maybe
	// as a single suffix character for a root-collator boundary contraction.
	UnicodeString none((UChar)0xffff);
	none.append((UChar)0xffff);
	for(;;) {
	if(ts == NULL) {
	if(suffixes.next(errorCode)) {
	ts = &suffixes.getString();
	} else {
	ts = &none;
	}
	}
	if(bs == NULL) {
	if(baseSuffixes.next(errorCode)) {
	bs = &baseSuffixes.getString();
	} else {
	bs = &none;
	}
	}
	if(ts == &none && bs == &none) { break; }
	int32_t cmp = ts->compare(*bs);
	if(cmp < 0) {
	// ts occurs in the tailoring but not in the base.
	addSuffix(c, *ts);
	ts = NULL;
	} else if(cmp > 0) {
	// bs occurs in the base but not in the tailoring.
	addSuffix(c, *bs);
	bs = NULL;
	} else {
	suffix = ts;
	compare(c, (uint32_t)suffixes.getValue(), (uint32_t)baseSuffixes.getValue());
	suffix = NULL;
	ts = NULL;
	bs = NULL;
	}
	}
	}

	void
	TailoredSet::addPrefixes(const CollationData d, UChar32 c, const UChar p) {
	UCharsTrie::Iterator prefixes(p, 0, errorCode);
	while(prefixes.next(errorCode)) {
	addPrefix(d, prefixes.getString(), c, (uint32_t)prefixes.getValue());
	}
	}

	void
	TailoredSet::addPrefix(const CollationData *d, const UnicodeString &pfx, UChar32 c, uint32_t ce32) {
	setPrefix(pfx);
	ce32 = d->getFinalCE32(ce32);
	if(Collation::isContractionCE32(ce32)) {
	const UChar *p = d->contexts + Collation::indexFromCE32(ce32);
	addContractions(c, p + 2);
	}
	tailored->add(UnicodeString(unreversedPrefix).append(c));
	resetPrefix();
	}

	void
	TailoredSet::addContractions(UChar32 c, const UChar *p) {
	UCharsTrie::Iterator suffixes(p, 0, errorCode);
	while(suffixes.next(errorCode)) {
	addSuffix(c, suffixes.getString());
	}
	}

	void
	TailoredSet::addSuffix(UChar32 c, const UnicodeString &sfx) {
	tailored->add(UnicodeString(unreversedPrefix).append(c).append(sfx));
	}

	void
	TailoredSet::add(UChar32 c) {
	if(unreversedPrefix.isEmpty() && suffix == NULL) {
	tailored->add(c);
	} else {
	UnicodeString s(unreversedPrefix);
	s.append(c);
	if(suffix != NULL) {
	s.append(*suffix);
	}
	tailored->add(s);
	}
	}

	ContractionsAndExpansions::CESink::~CESink() {}

	U_CDECL_BEGIN

	static UBool U_CALLCONV
	enumCnERange(const void *context, UChar32 start, UChar32 end, uint32_t ce32) {
	ContractionsAndExpansions cne = (ContractionsAndExpansions )context;
	if(cne->checkTailored == 0) {
	// There is no tailoring.
	// No need to collect nor check the tailored set.
	} else if(cne->checkTailored < 0) {
	// Collect the set of code points with mappings in the tailoring data.
	if(ce32 == Collation::FALLBACK_CE32) {
	return TRUE; // fallback to base, not tailored
	} else {
	cne->tailored.add(start, end);
	}
	// checkTailored > 0: Exclude tailored ranges from the base data enumeration.
	} else if(start == end) {
	if(cne->tailored.contains(start)) {
	return TRUE;
	}
	} else if(cne->tailored.containsSome(start, end)) {
	cne->ranges.set(start, end).removeAll(cne->tailored);
	int32_t count = cne->ranges.getRangeCount();
	for(int32_t i = 0; i < count; ++i) {
	cne->handleCE32(cne->ranges.getRangeStart(i), cne->ranges.getRangeEnd(i), ce32);
	}
	return U_SUCCESS(cne->errorCode);
	}
	cne->handleCE32(start, end, ce32);
	return U_SUCCESS(cne->errorCode);
	}

	U_CDECL_END

	void
	ContractionsAndExpansions::forData(const CollationData *d, UErrorCode &ec) {
	if(U_FAILURE(ec)) { return; }
	errorCode = ec; // Preserve info & warning codes.
	// Add all from the data, can be tailoring or base.
	if(d->base != NULL) {
	checkTailored = -1;
	}
	data = d;
	utrie2_enum(data->trie, NULL, enumCnERange, this);
	if(d->base == NULL \|\| U_FAILURE(errorCode)) {
	ec = errorCode;
	return;
	}
	// Add all from the base data but only for un-tailored code points.
	tailored.freeze();
	checkTailored = 1;
	data = d->base;
	utrie2_enum(data->trie, NULL, enumCnERange, this);
	ec = errorCode;
	}

	void
	ContractionsAndExpansions::forCodePoint(const CollationData *d, UChar32 c, UErrorCode &ec) {
	if(U_FAILURE(ec)) { return; }
	errorCode = ec; // Preserve info & warning codes.
	uint32_t ce32 = d->getCE32(c);
	if(ce32 == Collation::FALLBACK_CE32) {
	d = d->base;
	ce32 = d->getCE32(c);
	}
	data = d;
	handleCE32(c, c, ce32);
	ec = errorCode;
	}

	void
	ContractionsAndExpansions::handleCE32(UChar32 start, UChar32 end, uint32_t ce32) {
	for(;;) {
	if((ce32 & 0xff) < Collation::SPECIAL_CE32_LOW_BYTE) {
	// !isSpecialCE32()
	if(sink != NULL) {
	sink->handleCE(Collation::ceFromSimpleCE32(ce32));
	}
	return;
	}
	switch(Collation::tagFromCE32(ce32)) {
	case Collation::FALLBACK_TAG:
	return;
	case Collation::RESERVED_TAG_3:
	case Collation::BUILDER_DATA_TAG:
	case Collation::LEAD_SURROGATE_TAG:
	if(U_SUCCESS(errorCode)) { errorCode = U_INTERNAL_PROGRAM_ERROR; }
	return;
	case Collation::LONG_PRIMARY_TAG:
	if(sink != NULL) {
	sink->handleCE(Collation::ceFromLongPrimaryCE32(ce32));
	}
	return;
	case Collation::LONG_SECONDARY_TAG:
	if(sink != NULL) {
	sink->handleCE(Collation::ceFromLongSecondaryCE32(ce32));
	}
	return;
	case Collation::LATIN_EXPANSION_TAG:
	if(sink != NULL) {
	ces[0] = Collation::latinCE0FromCE32(ce32);
	ces[1] = Collation::latinCE1FromCE32(ce32);
	sink->handleExpansion(ces, 2);
	}
	// Optimization: If we have a prefix,
	// then the relevant strings have been added already.
	if(unreversedPrefix.isEmpty()) {
	addExpansions(start, end);
	}
	return;
	case Collation::EXPANSION32_TAG:
	if(sink != NULL) {
	const uint32_t *ce32s = data->ce32s + Collation::indexFromCE32(ce32);
	int32_t length = Collation::lengthFromCE32(ce32);
	for(int32_t i = 0; i < length; ++i) {
	ces[i] = Collation::ceFromCE32(*ce32s++);
	}
	sink->handleExpansion(ces, length);
	}
	// Optimization: If we have a prefix,
	// then the relevant strings have been added already.
	if(unreversedPrefix.isEmpty()) {
	addExpansions(start, end);
	}
	return;
	case Collation::EXPANSION_TAG:
	if(sink != NULL) {
	int32_t length = Collation::lengthFromCE32(ce32);
	sink->handleExpansion(data->ces + Collation::indexFromCE32(ce32), length);
	}
	// Optimization: If we have a prefix,
	// then the relevant strings have been added already.
	if(unreversedPrefix.isEmpty()) {
	addExpansions(start, end);
	}
	return;
	case Collation::PREFIX_TAG:
	handlePrefixes(start, end, ce32);
	return;
	case Collation::CONTRACTION_TAG:
	handleContractions(start, end, ce32);
	return;
	case Collation::DIGIT_TAG:
	// Fetch the non-numeric-collation CE32 and continue.
	ce32 = data->ce32s[Collation::indexFromCE32(ce32)];
	break;
	case Collation::U0000_TAG:
	U_ASSERT(start == 0 && end == 0);
	// Fetch the normal ce32 for U+0000 and continue.
	ce32 = data->ce32s[0];
	break;
	case Collation::HANGUL_TAG:
	if(sink != NULL) {
	// TODO: This should be optimized,
	// especially if [start..end] is the complete Hangul range. (assert that)
	UTF16CollationIterator iter(data, FALSE, NULL, NULL, NULL);
	UChar hangul[1] = { 0 };
	for(UChar32 c = start; c <= end; ++c) {
	hangul[0] = (UChar)c;
	iter.setText(hangul, hangul + 1);
	int32_t length = iter.fetchCEs(errorCode);
	if(U_FAILURE(errorCode)) { return; }
	// Ignore the terminating non-CE.
	U_ASSERT(length >= 2 && iter.getCE(length - 1) == Collation::NO_CE);
	sink->handleExpansion(iter.getCEs(), length - 1);
	}
	}
	// Optimization: If we have a prefix,
	// then the relevant strings have been added already.
	if(unreversedPrefix.isEmpty()) {
	addExpansions(start, end);
	}
	return;
	case Collation::OFFSET_TAG:
	// Currently no need to send offset CEs to the sink.
	return;
	case Collation::IMPLICIT_TAG:
	// Currently no need to send implicit CEs to the sink.
	return;
	}
	}
	}

	void
	ContractionsAndExpansions::handlePrefixes(
	UChar32 start, UChar32 end, uint32_t ce32) {
	const UChar *p = data->contexts + Collation::indexFromCE32(ce32);
	ce32 = CollationData::readCE32(p); // Default if no prefix match.
	handleCE32(start, end, ce32);
	if(!addPrefixes) { return; }
	UCharsTrie::Iterator prefixes(p + 2, 0, errorCode);
	while(prefixes.next(errorCode)) {
	setPrefix(prefixes.getString());
	// Prefix/pre-context mappings are special kinds of contractions
	// that always yield expansions.
	addStrings(start, end, contractions);
	addStrings(start, end, expansions);
	handleCE32(start, end, (uint32_t)prefixes.getValue());
	}
	resetPrefix();
	}

	void
	ContractionsAndExpansions::handleContractions(
	UChar32 start, UChar32 end, uint32_t ce32) {
	const UChar *p = data->contexts + Collation::indexFromCE32(ce32);
	if((ce32 & Collation::CONTRACT_SINGLE_CP_NO_MATCH) != 0) {
	// No match on the single code point.
	// We are underneath a prefix, and the default mapping is just
	// a fallback to the mappings for a shorter prefix.
	U_ASSERT(!unreversedPrefix.isEmpty());
	} else {
	ce32 = CollationData::readCE32(p); // Default if no suffix match.
	U_ASSERT(!Collation::isContractionCE32(ce32));
	handleCE32(start, end, ce32);
	}
	UCharsTrie::Iterator suffixes(p + 2, 0, errorCode);
	while(suffixes.next(errorCode)) {
	suffix = &suffixes.getString();
	addStrings(start, end, contractions);
	if(!unreversedPrefix.isEmpty()) {
	addStrings(start, end, expansions);
	}
	handleCE32(start, end, (uint32_t)suffixes.getValue());
	}
	suffix = NULL;
	}

	void
	ContractionsAndExpansions::addExpansions(UChar32 start, UChar32 end) {
	if(unreversedPrefix.isEmpty() && suffix == NULL) {
	if(expansions != NULL) {
	expansions->add(start, end);
	}
	} else {
	addStrings(start, end, expansions);
	}
	}

	void
	ContractionsAndExpansions::addStrings(UChar32 start, UChar32 end, UnicodeSet *set) {
	if(set == NULL) { return; }
	UnicodeString s(unreversedPrefix);
	do {
	s.append(start);
	if(suffix != NULL) {
	s.append(*suffix);
	}
	set->add(s);
	s.truncate(unreversedPrefix.length());
	} while(++start <= end);
	}

	U_NAMESPACE_END

	#endif // !UCONFIG_NO_COLLATION