src/third_party/icu/source/common/filterednormalizer2.cpp - cobalt - Git at Google

 /*
 *******************************************************************************
 *
 *   Copyright (C) 2009-2012, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 *
 *******************************************************************************
 *   file name:  filterednormalizer2.cpp
 *   encoding:   US-ASCII
 *   tab size:   8 (not used)
 *   indentation:4
 *
 *   created on: 2009dec10
 *   created by: Markus W. Scherer
 */

 #include "unicode/utypes.h"

 #if !UCONFIG_NO_NORMALIZATION

 #include "starboard/client_porting/poem/string_poem.h"
 #include "unicode/normalizer2.h"
 #include "unicode/uniset.h"
 #include "unicode/unistr.h"
 #include "unicode/unorm.h"
 #include "cpputils.h"

 U_NAMESPACE_BEGIN

 FilteredNormalizer2::~FilteredNormalizer2() {}

 UnicodeString &
 FilteredNormalizer2::normalize(const UnicodeString &src,
                                UnicodeString &dest,
                                UErrorCode &errorCode) const {
     uprv_checkCanGetBuffer(src, errorCode);
     if(U_FAILURE(errorCode)) {
         dest.setToBogus();
         return dest;
     }
     if(&dest==&src) {
         errorCode=U_ILLEGAL_ARGUMENT_ERROR;
         return dest;
     }
     dest.remove();
     return normalize(src, dest, USET_SPAN_SIMPLE, errorCode);
 }

 // Internal: No argument checking, and appends to dest.
 // Pass as input spanCondition the one that is likely to yield a non-zero
 // span length at the start of src.
 // For set=[:age=3.2:], since almost all common characters were in Unicode 3.2,
 // USET_SPAN_SIMPLE should be passed in for the start of src
 // and USET_SPAN_NOT_CONTAINED should be passed in if we continue after
 // an in-filter prefix.
 UnicodeString &
 FilteredNormalizer2::normalize(const UnicodeString &src,
                                UnicodeString &dest,
                                USetSpanCondition spanCondition,
                                UErrorCode &errorCode) const {
     UnicodeString tempDest;  // Don't throw away destination buffer between iterations.
     for(int32_t prevSpanLimit=0; prevSpanLimit<src.length();) {
         int32_t spanLimit=set.span(src, prevSpanLimit, spanCondition);
         int32_t spanLength=spanLimit-prevSpanLimit;
         if(spanCondition==USET_SPAN_NOT_CONTAINED) {
             if(spanLength!=0) {
                 dest.append(src, prevSpanLimit, spanLength);
             }
             spanCondition=USET_SPAN_SIMPLE;
         } else {
             if(spanLength!=0) {
                 // Not norm2.normalizeSecondAndAppend() because we do not want
                 // to modify the non-filter part of dest.
                 dest.append(norm2.normalize(src.tempSubStringBetween(prevSpanLimit, spanLimit),
                                             tempDest, errorCode));
                 if(U_FAILURE(errorCode)) {
                     break;
                 }
             }
             spanCondition=USET_SPAN_NOT_CONTAINED;
         }
         prevSpanLimit=spanLimit;
     }
     return dest;
 }

 UnicodeString &
 FilteredNormalizer2::normalizeSecondAndAppend(UnicodeString &first,
                                               const UnicodeString &second,
                                               UErrorCode &errorCode) const {
     return normalizeSecondAndAppend(first, second, TRUE, errorCode);
 }

 UnicodeString &
 FilteredNormalizer2::append(UnicodeString &first,
                             const UnicodeString &second,
                             UErrorCode &errorCode) const {
     return normalizeSecondAndAppend(first, second, FALSE, errorCode);
 }

 UnicodeString &
 FilteredNormalizer2::normalizeSecondAndAppend(UnicodeString &first,
                                               const UnicodeString &second,
                                               UBool doNormalize,
                                               UErrorCode &errorCode) const {
     uprv_checkCanGetBuffer(first, errorCode);
     uprv_checkCanGetBuffer(second, errorCode);
     if(U_FAILURE(errorCode)) {
         return first;
     }
     if(&first==&second) {
         errorCode=U_ILLEGAL_ARGUMENT_ERROR;
         return first;
     }
     if(first.isEmpty()) {
         if(doNormalize) {
             return normalize(second, first, errorCode);
         } else {
             return first=second;
         }
     }
     // merge the in-filter suffix of the first string with the in-filter prefix of the second
     int32_t prefixLimit=set.span(second, 0, USET_SPAN_SIMPLE);
     if(prefixLimit!=0) {
         UnicodeString prefix(second.tempSubString(0, prefixLimit));
         int32_t suffixStart=set.spanBack(first, INT32_MAX, USET_SPAN_SIMPLE);
         if(suffixStart==0) {
             if(doNormalize) {
                 norm2.normalizeSecondAndAppend(first, prefix, errorCode);
             } else {
                 norm2.append(first, prefix, errorCode);
             }
         } else {
             UnicodeString middle(first, suffixStart, INT32_MAX);
             if(doNormalize) {
                 norm2.normalizeSecondAndAppend(middle, prefix, errorCode);
             } else {
                 norm2.append(middle, prefix, errorCode);
             }
             first.replace(suffixStart, INT32_MAX, middle);
         }
     }
     if(prefixLimit<second.length()) {
         UnicodeString rest(second.tempSubString(prefixLimit, INT32_MAX));
         if(doNormalize) {
             normalize(rest, first, USET_SPAN_NOT_CONTAINED, errorCode);
         } else {
             first.append(rest);
         }
     }
     return first;
 }

 UBool
 FilteredNormalizer2::getDecomposition(UChar32 c, UnicodeString &decomposition) const {
     return set.contains(c) && norm2.getDecomposition(c, decomposition);
 }

 UBool
 FilteredNormalizer2::getRawDecomposition(UChar32 c, UnicodeString &decomposition) const {
     return set.contains(c) && norm2.getRawDecomposition(c, decomposition);
 }

 UChar32
 FilteredNormalizer2::composePair(UChar32 a, UChar32 b) const {
     return (set.contains(a) && set.contains(b)) ? norm2.composePair(a, b) : U_SENTINEL;
 }

 uint8_t
 FilteredNormalizer2::getCombiningClass(UChar32 c) const {
     return set.contains(c) ? norm2.getCombiningClass(c) : 0;
 }

 UBool
 FilteredNormalizer2::isNormalized(const UnicodeString &s, UErrorCode &errorCode) const {
     uprv_checkCanGetBuffer(s, errorCode);
     if(U_FAILURE(errorCode)) {
         return FALSE;
     }
     USetSpanCondition spanCondition=USET_SPAN_SIMPLE;
     for(int32_t prevSpanLimit=0; prevSpanLimit<s.length();) {
         int32_t spanLimit=set.span(s, prevSpanLimit, spanCondition);
         if(spanCondition==USET_SPAN_NOT_CONTAINED) {
             spanCondition=USET_SPAN_SIMPLE;
         } else {
             if( !norm2.isNormalized(s.tempSubStringBetween(prevSpanLimit, spanLimit), errorCode) ||
                 U_FAILURE(errorCode)
             ) {
                 return FALSE;
             }
             spanCondition=USET_SPAN_NOT_CONTAINED;
         }
         prevSpanLimit=spanLimit;
     }
     return TRUE;
 }

 UNormalizationCheckResult
 FilteredNormalizer2::quickCheck(const UnicodeString &s, UErrorCode &errorCode) const {
     uprv_checkCanGetBuffer(s, errorCode);
     if(U_FAILURE(errorCode)) {
         return UNORM_MAYBE;
     }
     UNormalizationCheckResult result=UNORM_YES;
     USetSpanCondition spanCondition=USET_SPAN_SIMPLE;
     for(int32_t prevSpanLimit=0; prevSpanLimit<s.length();) {
         int32_t spanLimit=set.span(s, prevSpanLimit, spanCondition);
         if(spanCondition==USET_SPAN_NOT_CONTAINED) {
             spanCondition=USET_SPAN_SIMPLE;
         } else {
             UNormalizationCheckResult qcResult=
                 norm2.quickCheck(s.tempSubStringBetween(prevSpanLimit, spanLimit), errorCode);
             if(U_FAILURE(errorCode) || qcResult==UNORM_NO) {
                 return qcResult;
             } else if(qcResult==UNORM_MAYBE) {
                 result=qcResult;
             }
             spanCondition=USET_SPAN_NOT_CONTAINED;
         }
         prevSpanLimit=spanLimit;
     }
     return result;
 }

 int32_t
 FilteredNormalizer2::spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const {
     uprv_checkCanGetBuffer(s, errorCode);
     if(U_FAILURE(errorCode)) {
         return 0;
     }
     USetSpanCondition spanCondition=USET_SPAN_SIMPLE;
     for(int32_t prevSpanLimit=0; prevSpanLimit<s.length();) {
         int32_t spanLimit=set.span(s, prevSpanLimit, spanCondition);
         if(spanCondition==USET_SPAN_NOT_CONTAINED) {
             spanCondition=USET_SPAN_SIMPLE;
         } else {
             int32_t yesLimit=
                 prevSpanLimit+
                 norm2.spanQuickCheckYes(
                     s.tempSubStringBetween(prevSpanLimit, spanLimit), errorCode);
             if(U_FAILURE(errorCode) || yesLimit<spanLimit) {
                 return yesLimit;
             }
             spanCondition=USET_SPAN_NOT_CONTAINED;
         }
         prevSpanLimit=spanLimit;
     }
     return s.length();
 }

 UBool
 FilteredNormalizer2::hasBoundaryBefore(UChar32 c) const {
     return !set.contains(c) || norm2.hasBoundaryBefore(c);
 }

 UBool
 FilteredNormalizer2::hasBoundaryAfter(UChar32 c) const {
     return !set.contains(c) || norm2.hasBoundaryAfter(c);
 }

 UBool
 FilteredNormalizer2::isInert(UChar32 c) const {
     return !set.contains(c) || norm2.isInert(c);
 }

 U_NAMESPACE_END

 // C API ------------------------------------------------------------------- ***

 U_NAMESPACE_USE

 U_CAPI UNormalizer2 * U_EXPORT2
 unorm2_openFiltered(const UNormalizer2 *norm2, const USet *filterSet, UErrorCode *pErrorCode) {
     if(U_FAILURE(*pErrorCode)) {
         return NULL;
     }
     if(filterSet==NULL) {
         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
         return NULL;
     }
     Normalizer2 *fn2=new FilteredNormalizer2(*(Normalizer2 *)norm2,
                                              *UnicodeSet::fromUSet(filterSet));
     if(fn2==NULL) {
         *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
     }
     return (UNormalizer2 *)fn2;
 }

 #endif  // !UCONFIG_NO_NORMALIZATION
	/*
	*******************************************************************************
	*
	* Copyright (C) 2009-2012, International Business Machines
	* Corporation and others. All Rights Reserved.
	*
	*******************************************************************************
	* file name: filterednormalizer2.cpp
	* encoding: US-ASCII
	* tab size: 8 (not used)
	* indentation:4
	*
	* created on: 2009dec10
	* created by: Markus W. Scherer
	*/

	#include "unicode/utypes.h"

	#if !UCONFIG_NO_NORMALIZATION

	#include "starboard/client_porting/poem/string_poem.h"
	#include "unicode/normalizer2.h"
	#include "unicode/uniset.h"
	#include "unicode/unistr.h"
	#include "unicode/unorm.h"
	#include "cpputils.h"

	U_NAMESPACE_BEGIN

	FilteredNormalizer2::~FilteredNormalizer2() {}

	UnicodeString &
	FilteredNormalizer2::normalize(const UnicodeString &src,
	UnicodeString &dest,
	UErrorCode &errorCode) const {
	uprv_checkCanGetBuffer(src, errorCode);
	if(U_FAILURE(errorCode)) {
	dest.setToBogus();
	return dest;
	}
	if(&dest==&src) {
	errorCode=U_ILLEGAL_ARGUMENT_ERROR;
	return dest;
	}
	dest.remove();
	return normalize(src, dest, USET_SPAN_SIMPLE, errorCode);
	}

	// Internal: No argument checking, and appends to dest.
	// Pass as input spanCondition the one that is likely to yield a non-zero
	// span length at the start of src.
	// For set=[:age=3.2:], since almost all common characters were in Unicode 3.2,
	// USET_SPAN_SIMPLE should be passed in for the start of src
	// and USET_SPAN_NOT_CONTAINED should be passed in if we continue after
	// an in-filter prefix.
	UnicodeString &
	FilteredNormalizer2::normalize(const UnicodeString &src,
	UnicodeString &dest,
	USetSpanCondition spanCondition,
	UErrorCode &errorCode) const {
	UnicodeString tempDest; // Don't throw away destination buffer between iterations.
	for(int32_t prevSpanLimit=0; prevSpanLimit<src.length();) {
	int32_t spanLimit=set.span(src, prevSpanLimit, spanCondition);
	int32_t spanLength=spanLimit-prevSpanLimit;
	if(spanCondition==USET_SPAN_NOT_CONTAINED) {
	if(spanLength!=0) {
	dest.append(src, prevSpanLimit, spanLength);
	}
	spanCondition=USET_SPAN_SIMPLE;
	} else {
	if(spanLength!=0) {
	// Not norm2.normalizeSecondAndAppend() because we do not want
	// to modify the non-filter part of dest.
	dest.append(norm2.normalize(src.tempSubStringBetween(prevSpanLimit, spanLimit),
	tempDest, errorCode));
	if(U_FAILURE(errorCode)) {
	break;
	}
	}
	spanCondition=USET_SPAN_NOT_CONTAINED;
	}
	prevSpanLimit=spanLimit;
	}
	return dest;
	}

	UnicodeString &
	FilteredNormalizer2::normalizeSecondAndAppend(UnicodeString &first,
	const UnicodeString &second,
	UErrorCode &errorCode) const {
	return normalizeSecondAndAppend(first, second, TRUE, errorCode);
	}

	UnicodeString &
	FilteredNormalizer2::append(UnicodeString &first,
	const UnicodeString &second,
	UErrorCode &errorCode) const {
	return normalizeSecondAndAppend(first, second, FALSE, errorCode);
	}

	UnicodeString &
	FilteredNormalizer2::normalizeSecondAndAppend(UnicodeString &first,
	const UnicodeString &second,
	UBool doNormalize,
	UErrorCode &errorCode) const {
	uprv_checkCanGetBuffer(first, errorCode);
	uprv_checkCanGetBuffer(second, errorCode);
	if(U_FAILURE(errorCode)) {
	return first;
	}
	if(&first==&second) {
	errorCode=U_ILLEGAL_ARGUMENT_ERROR;
	return first;
	}
	if(first.isEmpty()) {
	if(doNormalize) {
	return normalize(second, first, errorCode);
	} else {
	return first=second;
	}
	}
	// merge the in-filter suffix of the first string with the in-filter prefix of the second
	int32_t prefixLimit=set.span(second, 0, USET_SPAN_SIMPLE);
	if(prefixLimit!=0) {
	UnicodeString prefix(second.tempSubString(0, prefixLimit));
	int32_t suffixStart=set.spanBack(first, INT32_MAX, USET_SPAN_SIMPLE);
	if(suffixStart==0) {
	if(doNormalize) {
	norm2.normalizeSecondAndAppend(first, prefix, errorCode);
	} else {
	norm2.append(first, prefix, errorCode);
	}
	} else {
	UnicodeString middle(first, suffixStart, INT32_MAX);
	if(doNormalize) {
	norm2.normalizeSecondAndAppend(middle, prefix, errorCode);
	} else {
	norm2.append(middle, prefix, errorCode);
	}
	first.replace(suffixStart, INT32_MAX, middle);
	}
	}
	if(prefixLimit<second.length()) {
	UnicodeString rest(second.tempSubString(prefixLimit, INT32_MAX));
	if(doNormalize) {
	normalize(rest, first, USET_SPAN_NOT_CONTAINED, errorCode);
	} else {
	first.append(rest);
	}
	}
	return first;
	}

	UBool
	FilteredNormalizer2::getDecomposition(UChar32 c, UnicodeString &decomposition) const {
	return set.contains(c) && norm2.getDecomposition(c, decomposition);
	}

	UBool
	FilteredNormalizer2::getRawDecomposition(UChar32 c, UnicodeString &decomposition) const {
	return set.contains(c) && norm2.getRawDecomposition(c, decomposition);
	}

	UChar32
	FilteredNormalizer2::composePair(UChar32 a, UChar32 b) const {
	return (set.contains(a) && set.contains(b)) ? norm2.composePair(a, b) : U_SENTINEL;
	}

	uint8_t
	FilteredNormalizer2::getCombiningClass(UChar32 c) const {
	return set.contains(c) ? norm2.getCombiningClass(c) : 0;
	}

	UBool
	FilteredNormalizer2::isNormalized(const UnicodeString &s, UErrorCode &errorCode) const {
	uprv_checkCanGetBuffer(s, errorCode);
	if(U_FAILURE(errorCode)) {
	return FALSE;
	}
	USetSpanCondition spanCondition=USET_SPAN_SIMPLE;
	for(int32_t prevSpanLimit=0; prevSpanLimit<s.length();) {
	int32_t spanLimit=set.span(s, prevSpanLimit, spanCondition);
	if(spanCondition==USET_SPAN_NOT_CONTAINED) {
	spanCondition=USET_SPAN_SIMPLE;
	} else {
	if( !norm2.isNormalized(s.tempSubStringBetween(prevSpanLimit, spanLimit), errorCode) \|\|
	U_FAILURE(errorCode)
	) {
	return FALSE;
	}
	spanCondition=USET_SPAN_NOT_CONTAINED;
	}
	prevSpanLimit=spanLimit;
	}
	return TRUE;
	}

	UNormalizationCheckResult
	FilteredNormalizer2::quickCheck(const UnicodeString &s, UErrorCode &errorCode) const {
	uprv_checkCanGetBuffer(s, errorCode);
	if(U_FAILURE(errorCode)) {
	return UNORM_MAYBE;
	}
	UNormalizationCheckResult result=UNORM_YES;
	USetSpanCondition spanCondition=USET_SPAN_SIMPLE;
	for(int32_t prevSpanLimit=0; prevSpanLimit<s.length();) {
	int32_t spanLimit=set.span(s, prevSpanLimit, spanCondition);
	if(spanCondition==USET_SPAN_NOT_CONTAINED) {
	spanCondition=USET_SPAN_SIMPLE;
	} else {
	UNormalizationCheckResult qcResult=
	norm2.quickCheck(s.tempSubStringBetween(prevSpanLimit, spanLimit), errorCode);
	if(U_FAILURE(errorCode) \|\| qcResult==UNORM_NO) {
	return qcResult;
	} else if(qcResult==UNORM_MAYBE) {
	result=qcResult;
	}
	spanCondition=USET_SPAN_NOT_CONTAINED;
	}
	prevSpanLimit=spanLimit;
	}
	return result;
	}

	int32_t
	FilteredNormalizer2::spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const {
	uprv_checkCanGetBuffer(s, errorCode);
	if(U_FAILURE(errorCode)) {
	return 0;
	}
	USetSpanCondition spanCondition=USET_SPAN_SIMPLE;
	for(int32_t prevSpanLimit=0; prevSpanLimit<s.length();) {
	int32_t spanLimit=set.span(s, prevSpanLimit, spanCondition);
	if(spanCondition==USET_SPAN_NOT_CONTAINED) {
	spanCondition=USET_SPAN_SIMPLE;
	} else {
	int32_t yesLimit=
	prevSpanLimit+
	norm2.spanQuickCheckYes(
	s.tempSubStringBetween(prevSpanLimit, spanLimit), errorCode);
	if(U_FAILURE(errorCode) \|\| yesLimit<spanLimit) {
	return yesLimit;
	}
	spanCondition=USET_SPAN_NOT_CONTAINED;
	}
	prevSpanLimit=spanLimit;
	}
	return s.length();
	}

	UBool
	FilteredNormalizer2::hasBoundaryBefore(UChar32 c) const {
	return !set.contains(c) \|\| norm2.hasBoundaryBefore(c);
	}

	UBool
	FilteredNormalizer2::hasBoundaryAfter(UChar32 c) const {
	return !set.contains(c) \|\| norm2.hasBoundaryAfter(c);
	}

	UBool
	FilteredNormalizer2::isInert(UChar32 c) const {
	return !set.contains(c) \|\| norm2.isInert(c);
	}

	U_NAMESPACE_END

	// C API ------------------------------------------------------------------- ***

	U_NAMESPACE_USE

	U_CAPI UNormalizer2 * U_EXPORT2
	unorm2_openFiltered(const UNormalizer2 norm2, const USet filterSet, UErrorCode *pErrorCode) {
	if(U_FAILURE(*pErrorCode)) {
	return NULL;
	}
	if(filterSet==NULL) {
	*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
	return NULL;
	}
	Normalizer2 fn2=new FilteredNormalizer2((Normalizer2 *)norm2,
	*UnicodeSet::fromUSet(filterSet));
	if(fn2==NULL) {
	*pErrorCode=U_MEMORY_ALLOCATION_ERROR;
	}
	return (UNormalizer2 *)fn2;
	}

	#endif // !UCONFIG_NO_NORMALIZATION