| /* |
| ******************************************************************************* |
| * |
| * Copyright (C) 2009-2012, International Business Machines |
| * Corporation and others. All Rights Reserved. |
| * |
| ******************************************************************************* |
| * file name: normalizer2.cpp |
| * encoding: US-ASCII |
| * tab size: 8 (not used) |
| * indentation:4 |
| * |
| * created on: 2009nov22 |
| * created by: Markus W. Scherer |
| */ |
| |
| #include "unicode/utypes.h" |
| |
| #if !UCONFIG_NO_NORMALIZATION |
| |
| #include "unicode/localpointer.h" |
| #include "unicode/normalizer2.h" |
| #include "unicode/unistr.h" |
| #include "unicode/unorm.h" |
| #include "cpputils.h" |
| #include "cstring.h" |
| #include "mutex.h" |
| #include "normalizer2impl.h" |
| #include "ucln_cmn.h" |
| #include "uhash.h" |
| |
| U_NAMESPACE_BEGIN |
| |
| // Public API dispatch via Normalizer2 subclasses -------------------------- *** |
| |
| Normalizer2::~Normalizer2() {} |
| |
| UBool |
| Normalizer2::getRawDecomposition(UChar32, UnicodeString &) const { |
| return FALSE; |
| } |
| |
| UChar32 |
| Normalizer2::composePair(UChar32, UChar32) const { |
| return U_SENTINEL; |
| } |
| |
| uint8_t |
| Normalizer2::getCombiningClass(UChar32 /*c*/) const { |
| return 0; |
| } |
| |
| UOBJECT_DEFINE_NO_RTTI_IMPLEMENTATION(Normalizer2) |
| |
| // Normalizer2 implementation for the old UNORM_NONE. |
| class NoopNormalizer2 : public Normalizer2 { |
| virtual ~NoopNormalizer2(); |
| |
| virtual UnicodeString & |
| normalize(const UnicodeString &src, |
| UnicodeString &dest, |
| UErrorCode &errorCode) const { |
| if(U_SUCCESS(errorCode)) { |
| if(&dest!=&src) { |
| dest=src; |
| } else { |
| errorCode=U_ILLEGAL_ARGUMENT_ERROR; |
| } |
| } |
| return dest; |
| } |
| virtual UnicodeString & |
| normalizeSecondAndAppend(UnicodeString &first, |
| const UnicodeString &second, |
| UErrorCode &errorCode) const { |
| if(U_SUCCESS(errorCode)) { |
| if(&first!=&second) { |
| first.append(second); |
| } else { |
| errorCode=U_ILLEGAL_ARGUMENT_ERROR; |
| } |
| } |
| return first; |
| } |
| virtual UnicodeString & |
| append(UnicodeString &first, |
| const UnicodeString &second, |
| UErrorCode &errorCode) const { |
| if(U_SUCCESS(errorCode)) { |
| if(&first!=&second) { |
| first.append(second); |
| } else { |
| errorCode=U_ILLEGAL_ARGUMENT_ERROR; |
| } |
| } |
| return first; |
| } |
| virtual UBool |
| getDecomposition(UChar32, UnicodeString &) const { |
| return FALSE; |
| } |
| // No need to override the default getRawDecomposition(). |
| virtual UBool |
| isNormalized(const UnicodeString &, UErrorCode &) const { |
| return TRUE; |
| } |
| virtual UNormalizationCheckResult |
| quickCheck(const UnicodeString &, UErrorCode &) const { |
| return UNORM_YES; |
| } |
| virtual int32_t |
| spanQuickCheckYes(const UnicodeString &s, UErrorCode &) const { |
| return s.length(); |
| } |
| virtual UBool hasBoundaryBefore(UChar32) const { return TRUE; } |
| virtual UBool hasBoundaryAfter(UChar32) const { return TRUE; } |
| virtual UBool isInert(UChar32) const { return TRUE; } |
| }; |
| |
| NoopNormalizer2::~NoopNormalizer2() {} |
| |
| // Intermediate class: |
| // Has Normalizer2Impl and does boilerplate argument checking and setup. |
| class Normalizer2WithImpl : public Normalizer2 { |
| public: |
| Normalizer2WithImpl(const Normalizer2Impl &ni) : impl(ni) {} |
| virtual ~Normalizer2WithImpl(); |
| |
| // normalize |
| virtual UnicodeString & |
| normalize(const UnicodeString &src, |
| UnicodeString &dest, |
| UErrorCode &errorCode) const { |
| if(U_FAILURE(errorCode)) { |
| dest.setToBogus(); |
| return dest; |
| } |
| const UChar *sArray=src.getBuffer(); |
| if(&dest==&src || sArray==NULL) { |
| errorCode=U_ILLEGAL_ARGUMENT_ERROR; |
| dest.setToBogus(); |
| return dest; |
| } |
| dest.remove(); |
| ReorderingBuffer buffer(impl, dest); |
| if(buffer.init(src.length(), errorCode)) { |
| normalize(sArray, sArray+src.length(), buffer, errorCode); |
| } |
| return dest; |
| } |
| virtual void |
| normalize(const UChar *src, const UChar *limit, |
| ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0; |
| |
| // normalize and append |
| virtual UnicodeString & |
| normalizeSecondAndAppend(UnicodeString &first, |
| const UnicodeString &second, |
| UErrorCode &errorCode) const { |
| return normalizeSecondAndAppend(first, second, TRUE, errorCode); |
| } |
| virtual UnicodeString & |
| append(UnicodeString &first, |
| const UnicodeString &second, |
| UErrorCode &errorCode) const { |
| return normalizeSecondAndAppend(first, second, FALSE, errorCode); |
| } |
| UnicodeString & |
| normalizeSecondAndAppend(UnicodeString &first, |
| const UnicodeString &second, |
| UBool doNormalize, |
| UErrorCode &errorCode) const { |
| uprv_checkCanGetBuffer(first, errorCode); |
| if(U_FAILURE(errorCode)) { |
| return first; |
| } |
| const UChar *secondArray=second.getBuffer(); |
| if(&first==&second || secondArray==NULL) { |
| errorCode=U_ILLEGAL_ARGUMENT_ERROR; |
| return first; |
| } |
| int32_t firstLength=first.length(); |
| UnicodeString safeMiddle; |
| { |
| ReorderingBuffer buffer(impl, first); |
| if(buffer.init(firstLength+second.length(), errorCode)) { |
| normalizeAndAppend(secondArray, secondArray+second.length(), doNormalize, |
| safeMiddle, buffer, errorCode); |
| } |
| } // The ReorderingBuffer destructor finalizes the first string. |
| if(U_FAILURE(errorCode)) { |
| // Restore the modified suffix of the first string. |
| first.replace(firstLength-safeMiddle.length(), 0x7fffffff, safeMiddle); |
| } |
| return first; |
| } |
| virtual void |
| normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize, |
| UnicodeString &safeMiddle, |
| ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0; |
| virtual UBool |
| getDecomposition(UChar32 c, UnicodeString &decomposition) const { |
| UChar buffer[4]; |
| int32_t length; |
| const UChar *d=impl.getDecomposition(c, buffer, length); |
| if(d==NULL) { |
| return FALSE; |
| } |
| if(d==buffer) { |
| decomposition.setTo(buffer, length); // copy the string (Jamos from Hangul syllable c) |
| } else { |
| decomposition.setTo(FALSE, d, length); // read-only alias |
| } |
| return TRUE; |
| } |
| virtual UBool |
| getRawDecomposition(UChar32 c, UnicodeString &decomposition) const { |
| UChar buffer[30]; |
| int32_t length; |
| const UChar *d=impl.getRawDecomposition(c, buffer, length); |
| if(d==NULL) { |
| return FALSE; |
| } |
| if(d==buffer) { |
| decomposition.setTo(buffer, length); // copy the string (algorithmic decomposition) |
| } else { |
| decomposition.setTo(FALSE, d, length); // read-only alias |
| } |
| return TRUE; |
| } |
| virtual UChar32 |
| composePair(UChar32 a, UChar32 b) const { |
| return impl.composePair(a, b); |
| } |
| |
| virtual uint8_t |
| getCombiningClass(UChar32 c) const { |
| return impl.getCC(impl.getNorm16(c)); |
| } |
| |
| // quick checks |
| virtual UBool |
| isNormalized(const UnicodeString &s, UErrorCode &errorCode) const { |
| if(U_FAILURE(errorCode)) { |
| return FALSE; |
| } |
| const UChar *sArray=s.getBuffer(); |
| if(sArray==NULL) { |
| errorCode=U_ILLEGAL_ARGUMENT_ERROR; |
| return FALSE; |
| } |
| const UChar *sLimit=sArray+s.length(); |
| return sLimit==spanQuickCheckYes(sArray, sLimit, errorCode); |
| } |
| virtual UNormalizationCheckResult |
| quickCheck(const UnicodeString &s, UErrorCode &errorCode) const { |
| return Normalizer2WithImpl::isNormalized(s, errorCode) ? UNORM_YES : UNORM_NO; |
| } |
| virtual int32_t |
| spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const { |
| if(U_FAILURE(errorCode)) { |
| return 0; |
| } |
| const UChar *sArray=s.getBuffer(); |
| if(sArray==NULL) { |
| errorCode=U_ILLEGAL_ARGUMENT_ERROR; |
| return 0; |
| } |
| return (int32_t)(spanQuickCheckYes(sArray, sArray+s.length(), errorCode)-sArray); |
| } |
| virtual const UChar * |
| spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const = 0; |
| |
| virtual UNormalizationCheckResult getQuickCheck(UChar32) const { |
| return UNORM_YES; |
| } |
| |
| const Normalizer2Impl &impl; |
| }; |
| |
| Normalizer2WithImpl::~Normalizer2WithImpl() {} |
| |
| class DecomposeNormalizer2 : public Normalizer2WithImpl { |
| public: |
| DecomposeNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {} |
| virtual ~DecomposeNormalizer2(); |
| |
| private: |
| virtual void |
| normalize(const UChar *src, const UChar *limit, |
| ReorderingBuffer &buffer, UErrorCode &errorCode) const { |
| impl.decompose(src, limit, &buffer, errorCode); |
| } |
| using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function. |
| virtual void |
| normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize, |
| UnicodeString &safeMiddle, |
| ReorderingBuffer &buffer, UErrorCode &errorCode) const { |
| impl.decomposeAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode); |
| } |
| virtual const UChar * |
| spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const { |
| return impl.decompose(src, limit, NULL, errorCode); |
| } |
| using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function. |
| virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const { |
| return impl.isDecompYes(impl.getNorm16(c)) ? UNORM_YES : UNORM_NO; |
| } |
| virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasDecompBoundary(c, TRUE); } |
| virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasDecompBoundary(c, FALSE); } |
| virtual UBool isInert(UChar32 c) const { return impl.isDecompInert(c); } |
| }; |
| |
| DecomposeNormalizer2::~DecomposeNormalizer2() {} |
| |
| class ComposeNormalizer2 : public Normalizer2WithImpl { |
| public: |
| ComposeNormalizer2(const Normalizer2Impl &ni, UBool fcc) : |
| Normalizer2WithImpl(ni), onlyContiguous(fcc) {} |
| virtual ~ComposeNormalizer2(); |
| |
| private: |
| virtual void |
| normalize(const UChar *src, const UChar *limit, |
| ReorderingBuffer &buffer, UErrorCode &errorCode) const { |
| impl.compose(src, limit, onlyContiguous, TRUE, buffer, errorCode); |
| } |
| using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function. |
| virtual void |
| normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize, |
| UnicodeString &safeMiddle, |
| ReorderingBuffer &buffer, UErrorCode &errorCode) const { |
| impl.composeAndAppend(src, limit, doNormalize, onlyContiguous, safeMiddle, buffer, errorCode); |
| } |
| |
| virtual UBool |
| isNormalized(const UnicodeString &s, UErrorCode &errorCode) const { |
| if(U_FAILURE(errorCode)) { |
| return FALSE; |
| } |
| const UChar *sArray=s.getBuffer(); |
| if(sArray==NULL) { |
| errorCode=U_ILLEGAL_ARGUMENT_ERROR; |
| return FALSE; |
| } |
| UnicodeString temp; |
| ReorderingBuffer buffer(impl, temp); |
| if(!buffer.init(5, errorCode)) { // small destCapacity for substring normalization |
| return FALSE; |
| } |
| return impl.compose(sArray, sArray+s.length(), onlyContiguous, FALSE, buffer, errorCode); |
| } |
| virtual UNormalizationCheckResult |
| quickCheck(const UnicodeString &s, UErrorCode &errorCode) const { |
| if(U_FAILURE(errorCode)) { |
| return UNORM_MAYBE; |
| } |
| const UChar *sArray=s.getBuffer(); |
| if(sArray==NULL) { |
| errorCode=U_ILLEGAL_ARGUMENT_ERROR; |
| return UNORM_MAYBE; |
| } |
| UNormalizationCheckResult qcResult=UNORM_YES; |
| impl.composeQuickCheck(sArray, sArray+s.length(), onlyContiguous, &qcResult); |
| return qcResult; |
| } |
| virtual const UChar * |
| spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &) const { |
| return impl.composeQuickCheck(src, limit, onlyContiguous, NULL); |
| } |
| using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function. |
| virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const { |
| return impl.getCompQuickCheck(impl.getNorm16(c)); |
| } |
| virtual UBool hasBoundaryBefore(UChar32 c) const { |
| return impl.hasCompBoundaryBefore(c); |
| } |
| virtual UBool hasBoundaryAfter(UChar32 c) const { |
| return impl.hasCompBoundaryAfter(c, onlyContiguous, FALSE); |
| } |
| virtual UBool isInert(UChar32 c) const { |
| return impl.hasCompBoundaryAfter(c, onlyContiguous, TRUE); |
| } |
| |
| const UBool onlyContiguous; |
| }; |
| |
| ComposeNormalizer2::~ComposeNormalizer2() {} |
| |
| class FCDNormalizer2 : public Normalizer2WithImpl { |
| public: |
| FCDNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {} |
| virtual ~FCDNormalizer2(); |
| |
| private: |
| virtual void |
| normalize(const UChar *src, const UChar *limit, |
| ReorderingBuffer &buffer, UErrorCode &errorCode) const { |
| impl.makeFCD(src, limit, &buffer, errorCode); |
| } |
| using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function. |
| virtual void |
| normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize, |
| UnicodeString &safeMiddle, |
| ReorderingBuffer &buffer, UErrorCode &errorCode) const { |
| impl.makeFCDAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode); |
| } |
| virtual const UChar * |
| spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const { |
| return impl.makeFCD(src, limit, NULL, errorCode); |
| } |
| using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function. |
| virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasFCDBoundaryBefore(c); } |
| virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasFCDBoundaryAfter(c); } |
| virtual UBool isInert(UChar32 c) const { return impl.isFCDInert(c); } |
| }; |
| |
| FCDNormalizer2::~FCDNormalizer2() {} |
| |
| // instance cache ---------------------------------------------------------- *** |
| |
| struct Norm2AllModes : public UMemory { |
| static Norm2AllModes *createInstance(const char *packageName, |
| const char *name, |
| UErrorCode &errorCode); |
| Norm2AllModes() : comp(impl, FALSE), decomp(impl), fcd(impl), fcc(impl, TRUE) {} |
| |
| Normalizer2Impl impl; |
| ComposeNormalizer2 comp; |
| DecomposeNormalizer2 decomp; |
| FCDNormalizer2 fcd; |
| ComposeNormalizer2 fcc; |
| }; |
| |
| Norm2AllModes * |
| Norm2AllModes::createInstance(const char *packageName, |
| const char *name, |
| UErrorCode &errorCode) { |
| if(U_FAILURE(errorCode)) { |
| return NULL; |
| } |
| LocalPointer<Norm2AllModes> allModes(new Norm2AllModes); |
| if(allModes.isNull()) { |
| errorCode=U_MEMORY_ALLOCATION_ERROR; |
| return NULL; |
| } |
| allModes->impl.load(packageName, name, errorCode); |
| return U_SUCCESS(errorCode) ? allModes.orphan() : NULL; |
| } |
| |
| U_CDECL_BEGIN |
| static UBool U_CALLCONV uprv_normalizer2_cleanup(); |
| U_CDECL_END |
| |
| class Norm2AllModesSingleton : public TriStateSingletonWrapper<Norm2AllModes> { |
| public: |
| Norm2AllModesSingleton(TriStateSingleton &s, const char *n) : |
| TriStateSingletonWrapper<Norm2AllModes>(s), name(n) {} |
| Norm2AllModes *getInstance(UErrorCode &errorCode) { |
| return TriStateSingletonWrapper<Norm2AllModes>::getInstance(createInstance, name, errorCode); |
| } |
| private: |
| static void *createInstance(const void *context, UErrorCode &errorCode) { |
| ucln_common_registerCleanup(UCLN_COMMON_NORMALIZER2, uprv_normalizer2_cleanup); |
| return Norm2AllModes::createInstance(NULL, (const char *)context, errorCode); |
| } |
| |
| const char *name; |
| }; |
| |
| STATIC_TRI_STATE_SINGLETON(nfcSingleton); |
| STATIC_TRI_STATE_SINGLETON(nfkcSingleton); |
| STATIC_TRI_STATE_SINGLETON(nfkc_cfSingleton); |
| |
| class Norm2Singleton : public SimpleSingletonWrapper<Normalizer2> { |
| public: |
| Norm2Singleton(SimpleSingleton &s) : SimpleSingletonWrapper<Normalizer2>(s) {} |
| Normalizer2 *getInstance(UErrorCode &errorCode) { |
| return SimpleSingletonWrapper<Normalizer2>::getInstance(createInstance, NULL, errorCode); |
| } |
| private: |
| static void *createInstance(const void *, UErrorCode &errorCode) { |
| Normalizer2 *noop=new NoopNormalizer2; |
| if(noop==NULL) { |
| errorCode=U_MEMORY_ALLOCATION_ERROR; |
| } |
| ucln_common_registerCleanup(UCLN_COMMON_NORMALIZER2, uprv_normalizer2_cleanup); |
| return noop; |
| } |
| }; |
| |
| STATIC_SIMPLE_SINGLETON(noopSingleton); |
| |
| static UHashtable *cache=NULL; |
| |
| U_CDECL_BEGIN |
| |
| static void U_CALLCONV deleteNorm2AllModes(void *allModes) { |
| delete (Norm2AllModes *)allModes; |
| } |
| |
| static UBool U_CALLCONV uprv_normalizer2_cleanup() { |
| Norm2AllModesSingleton(nfcSingleton, NULL).deleteInstance(); |
| Norm2AllModesSingleton(nfkcSingleton, NULL).deleteInstance(); |
| Norm2AllModesSingleton(nfkc_cfSingleton, NULL).deleteInstance(); |
| Norm2Singleton(noopSingleton).deleteInstance(); |
| uhash_close(cache); |
| cache=NULL; |
| return TRUE; |
| } |
| |
| U_CDECL_END |
| |
| const Normalizer2 *Normalizer2Factory::getNFCInstance(UErrorCode &errorCode) { |
| Norm2AllModes *allModes=Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode); |
| return allModes!=NULL ? &allModes->comp : NULL; |
| } |
| |
| const Normalizer2 *Normalizer2Factory::getNFDInstance(UErrorCode &errorCode) { |
| Norm2AllModes *allModes=Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode); |
| return allModes!=NULL ? &allModes->decomp : NULL; |
| } |
| |
| const Normalizer2 *Normalizer2Factory::getFCDInstance(UErrorCode &errorCode) { |
| Norm2AllModes *allModes=Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode); |
| return allModes!=NULL ? &allModes->fcd : NULL; |
| } |
| |
| const Normalizer2 *Normalizer2Factory::getFCCInstance(UErrorCode &errorCode) { |
| Norm2AllModes *allModes=Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode); |
| return allModes!=NULL ? &allModes->fcc : NULL; |
| } |
| |
| const Normalizer2 *Normalizer2Factory::getNFKCInstance(UErrorCode &errorCode) { |
| Norm2AllModes *allModes= |
| Norm2AllModesSingleton(nfkcSingleton, "nfkc").getInstance(errorCode); |
| return allModes!=NULL ? &allModes->comp : NULL; |
| } |
| |
| const Normalizer2 *Normalizer2Factory::getNFKDInstance(UErrorCode &errorCode) { |
| Norm2AllModes *allModes= |
| Norm2AllModesSingleton(nfkcSingleton, "nfkc").getInstance(errorCode); |
| return allModes!=NULL ? &allModes->decomp : NULL; |
| } |
| |
| const Normalizer2 *Normalizer2Factory::getNFKC_CFInstance(UErrorCode &errorCode) { |
| Norm2AllModes *allModes= |
| Norm2AllModesSingleton(nfkc_cfSingleton, "nfkc_cf").getInstance(errorCode); |
| return allModes!=NULL ? &allModes->comp : NULL; |
| } |
| |
| const Normalizer2 *Normalizer2Factory::getNoopInstance(UErrorCode &errorCode) { |
| return Norm2Singleton(noopSingleton).getInstance(errorCode); |
| } |
| |
| const Normalizer2 * |
| Normalizer2Factory::getInstance(UNormalizationMode mode, UErrorCode &errorCode) { |
| if(U_FAILURE(errorCode)) { |
| return NULL; |
| } |
| switch(mode) { |
| case UNORM_NFD: |
| return getNFDInstance(errorCode); |
| case UNORM_NFKD: |
| return getNFKDInstance(errorCode); |
| case UNORM_NFC: |
| return getNFCInstance(errorCode); |
| case UNORM_NFKC: |
| return getNFKCInstance(errorCode); |
| case UNORM_FCD: |
| return getFCDInstance(errorCode); |
| default: // UNORM_NONE |
| return getNoopInstance(errorCode); |
| } |
| } |
| |
| const Normalizer2Impl * |
| Normalizer2Factory::getNFCImpl(UErrorCode &errorCode) { |
| Norm2AllModes *allModes= |
| Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode); |
| return allModes!=NULL ? &allModes->impl : NULL; |
| } |
| |
| const Normalizer2Impl * |
| Normalizer2Factory::getNFKCImpl(UErrorCode &errorCode) { |
| Norm2AllModes *allModes= |
| Norm2AllModesSingleton(nfkcSingleton, "nfkc").getInstance(errorCode); |
| return allModes!=NULL ? &allModes->impl : NULL; |
| } |
| |
| const Normalizer2Impl * |
| Normalizer2Factory::getNFKC_CFImpl(UErrorCode &errorCode) { |
| Norm2AllModes *allModes= |
| Norm2AllModesSingleton(nfkc_cfSingleton, "nfkc_cf").getInstance(errorCode); |
| return allModes!=NULL ? &allModes->impl : NULL; |
| } |
| |
| const Normalizer2Impl * |
| Normalizer2Factory::getImpl(const Normalizer2 *norm2) { |
| return &((Normalizer2WithImpl *)norm2)->impl; |
| } |
| |
| const Normalizer2 * |
| Normalizer2::getNFCInstance(UErrorCode &errorCode) { |
| return Normalizer2Factory::getNFCInstance(errorCode); |
| } |
| |
| const Normalizer2 * |
| Normalizer2::getNFDInstance(UErrorCode &errorCode) { |
| return Normalizer2Factory::getNFDInstance(errorCode); |
| } |
| |
| const Normalizer2 * |
| Normalizer2::getNFKCInstance(UErrorCode &errorCode) { |
| return Normalizer2Factory::getNFKCInstance(errorCode); |
| } |
| |
| const Normalizer2 * |
| Normalizer2::getNFKDInstance(UErrorCode &errorCode) { |
| return Normalizer2Factory::getNFKDInstance(errorCode); |
| } |
| |
| const Normalizer2 * |
| Normalizer2::getNFKCCasefoldInstance(UErrorCode &errorCode) { |
| return Normalizer2Factory::getNFKC_CFInstance(errorCode); |
| } |
| |
| const Normalizer2 * |
| Normalizer2::getInstance(const char *packageName, |
| const char *name, |
| UNormalization2Mode mode, |
| UErrorCode &errorCode) { |
| if(U_FAILURE(errorCode)) { |
| return NULL; |
| } |
| if(name==NULL || *name==0) { |
| errorCode=U_ILLEGAL_ARGUMENT_ERROR; |
| return NULL; |
| } |
| Norm2AllModes *allModes=NULL; |
| if(packageName==NULL) { |
| if(0==uprv_strcmp(name, "nfc")) { |
| allModes=Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode); |
| } else if(0==uprv_strcmp(name, "nfkc")) { |
| allModes=Norm2AllModesSingleton(nfkcSingleton, "nfkc").getInstance(errorCode); |
| } else if(0==uprv_strcmp(name, "nfkc_cf")) { |
| allModes=Norm2AllModesSingleton(nfkc_cfSingleton, "nfkc_cf").getInstance(errorCode); |
| } |
| } |
| if(allModes==NULL && U_SUCCESS(errorCode)) { |
| { |
| Mutex lock; |
| if(cache!=NULL) { |
| allModes=(Norm2AllModes *)uhash_get(cache, name); |
| } |
| } |
| if(allModes==NULL) { |
| LocalPointer<Norm2AllModes> localAllModes( |
| Norm2AllModes::createInstance(packageName, name, errorCode)); |
| if(U_SUCCESS(errorCode)) { |
| Mutex lock; |
| if(cache==NULL) { |
| cache=uhash_open(uhash_hashChars, uhash_compareChars, NULL, &errorCode); |
| if(U_FAILURE(errorCode)) { |
| return NULL; |
| } |
| uhash_setKeyDeleter(cache, uprv_free); |
| uhash_setValueDeleter(cache, deleteNorm2AllModes); |
| } |
| void *temp=uhash_get(cache, name); |
| if(temp==NULL) { |
| int32_t keyLength=uprv_strlen(name)+1; |
| char *nameCopy=(char *)uprv_malloc(keyLength); |
| if(nameCopy==NULL) { |
| errorCode=U_MEMORY_ALLOCATION_ERROR; |
| return NULL; |
| } |
| uprv_memcpy(nameCopy, name, keyLength); |
| uhash_put(cache, nameCopy, allModes=localAllModes.orphan(), &errorCode); |
| } else { |
| // race condition |
| allModes=(Norm2AllModes *)temp; |
| } |
| } |
| } |
| } |
| if(allModes!=NULL && U_SUCCESS(errorCode)) { |
| switch(mode) { |
| case UNORM2_COMPOSE: |
| return &allModes->comp; |
| case UNORM2_DECOMPOSE: |
| return &allModes->decomp; |
| case UNORM2_FCD: |
| return &allModes->fcd; |
| case UNORM2_COMPOSE_CONTIGUOUS: |
| return &allModes->fcc; |
| default: |
| break; // do nothing |
| } |
| } |
| return NULL; |
| } |
| |
| U_NAMESPACE_END |
| |
| // C API ------------------------------------------------------------------- *** |
| |
| U_NAMESPACE_USE |
| |
| U_CAPI const UNormalizer2 * U_EXPORT2 |
| unorm2_getNFCInstance(UErrorCode *pErrorCode) { |
| return (const UNormalizer2 *)Normalizer2::getNFCInstance(*pErrorCode); |
| } |
| |
| U_CAPI const UNormalizer2 * U_EXPORT2 |
| unorm2_getNFDInstance(UErrorCode *pErrorCode) { |
| return (const UNormalizer2 *)Normalizer2::getNFDInstance(*pErrorCode); |
| } |
| |
| U_CAPI const UNormalizer2 * U_EXPORT2 |
| unorm2_getNFKCInstance(UErrorCode *pErrorCode) { |
| return (const UNormalizer2 *)Normalizer2::getNFKCInstance(*pErrorCode); |
| } |
| |
| U_CAPI const UNormalizer2 * U_EXPORT2 |
| unorm2_getNFKDInstance(UErrorCode *pErrorCode) { |
| return (const UNormalizer2 *)Normalizer2::getNFKDInstance(*pErrorCode); |
| } |
| |
| U_CAPI const UNormalizer2 * U_EXPORT2 |
| unorm2_getNFKCCasefoldInstance(UErrorCode *pErrorCode) { |
| return (const UNormalizer2 *)Normalizer2::getNFKCCasefoldInstance(*pErrorCode); |
| } |
| |
| U_CAPI const UNormalizer2 * U_EXPORT2 |
| unorm2_getInstance(const char *packageName, |
| const char *name, |
| UNormalization2Mode mode, |
| UErrorCode *pErrorCode) { |
| return (const UNormalizer2 *)Normalizer2::getInstance(packageName, name, mode, *pErrorCode); |
| } |
| |
| U_CAPI void U_EXPORT2 |
| unorm2_close(UNormalizer2 *norm2) { |
| delete (Normalizer2 *)norm2; |
| } |
| |
| U_CAPI int32_t U_EXPORT2 |
| unorm2_normalize(const UNormalizer2 *norm2, |
| const UChar *src, int32_t length, |
| UChar *dest, int32_t capacity, |
| UErrorCode *pErrorCode) { |
| if(U_FAILURE(*pErrorCode)) { |
| return 0; |
| } |
| if( (src==NULL ? length!=0 : length<-1) || |
| (dest==NULL ? capacity!=0 : capacity<0) || |
| (src==dest && src!=NULL) |
| ) { |
| *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
| return 0; |
| } |
| UnicodeString destString(dest, 0, capacity); |
| // length==0: Nothing to do, and n2wi->normalize(NULL, NULL, buffer, ...) would crash. |
| if(length!=0) { |
| const Normalizer2 *n2=(const Normalizer2 *)norm2; |
| const Normalizer2WithImpl *n2wi=dynamic_cast<const Normalizer2WithImpl *>(n2); |
| if(n2wi!=NULL) { |
| // Avoid duplicate argument checking and support NUL-terminated src. |
| ReorderingBuffer buffer(n2wi->impl, destString); |
| if(buffer.init(length, *pErrorCode)) { |
| n2wi->normalize(src, length>=0 ? src+length : NULL, buffer, *pErrorCode); |
| } |
| } else { |
| UnicodeString srcString(length<0, src, length); |
| n2->normalize(srcString, destString, *pErrorCode); |
| } |
| } |
| return destString.extract(dest, capacity, *pErrorCode); |
| } |
| |
| static int32_t |
| normalizeSecondAndAppend(const UNormalizer2 *norm2, |
| UChar *first, int32_t firstLength, int32_t firstCapacity, |
| const UChar *second, int32_t secondLength, |
| UBool doNormalize, |
| UErrorCode *pErrorCode) { |
| if(U_FAILURE(*pErrorCode)) { |
| return 0; |
| } |
| if( (second==NULL ? secondLength!=0 : secondLength<-1) || |
| (first==NULL ? (firstCapacity!=0 || firstLength!=0) : |
| (firstCapacity<0 || firstLength<-1)) || |
| (first==second && first!=NULL) |
| ) { |
| *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
| return 0; |
| } |
| UnicodeString firstString(first, firstLength, firstCapacity); |
| firstLength=firstString.length(); // In case it was -1. |
| // secondLength==0: Nothing to do, and n2wi->normalizeAndAppend(NULL, NULL, buffer, ...) would crash. |
| if(secondLength!=0) { |
| const Normalizer2 *n2=(const Normalizer2 *)norm2; |
| const Normalizer2WithImpl *n2wi=dynamic_cast<const Normalizer2WithImpl *>(n2); |
| if(n2wi!=NULL) { |
| // Avoid duplicate argument checking and support NUL-terminated src. |
| UnicodeString safeMiddle; |
| { |
| ReorderingBuffer buffer(n2wi->impl, firstString); |
| if(buffer.init(firstLength+secondLength+1, *pErrorCode)) { // destCapacity>=-1 |
| n2wi->normalizeAndAppend(second, secondLength>=0 ? second+secondLength : NULL, |
| doNormalize, safeMiddle, buffer, *pErrorCode); |
| } |
| } // The ReorderingBuffer destructor finalizes firstString. |
| if(U_FAILURE(*pErrorCode) || firstString.length()>firstCapacity) { |
| // Restore the modified suffix of the first string. |
| // This does not restore first[] array contents between firstLength and firstCapacity. |
| // (That might be uninitialized memory, as far as we know.) |
| if(first!=NULL) { /* don't dereference NULL */ |
| safeMiddle.extract(0, 0x7fffffff, first+firstLength-safeMiddle.length()); |
| if(firstLength<firstCapacity) { |
| first[firstLength]=0; // NUL-terminate in case it was originally. |
| } |
| } |
| } |
| } else { |
| UnicodeString secondString(secondLength<0, second, secondLength); |
| if(doNormalize) { |
| n2->normalizeSecondAndAppend(firstString, secondString, *pErrorCode); |
| } else { |
| n2->append(firstString, secondString, *pErrorCode); |
| } |
| } |
| } |
| return firstString.extract(first, firstCapacity, *pErrorCode); |
| } |
| |
| U_CAPI int32_t U_EXPORT2 |
| unorm2_normalizeSecondAndAppend(const UNormalizer2 *norm2, |
| UChar *first, int32_t firstLength, int32_t firstCapacity, |
| const UChar *second, int32_t secondLength, |
| UErrorCode *pErrorCode) { |
| return normalizeSecondAndAppend(norm2, |
| first, firstLength, firstCapacity, |
| second, secondLength, |
| TRUE, pErrorCode); |
| } |
| |
| U_CAPI int32_t U_EXPORT2 |
| unorm2_append(const UNormalizer2 *norm2, |
| UChar *first, int32_t firstLength, int32_t firstCapacity, |
| const UChar *second, int32_t secondLength, |
| UErrorCode *pErrorCode) { |
| return normalizeSecondAndAppend(norm2, |
| first, firstLength, firstCapacity, |
| second, secondLength, |
| FALSE, pErrorCode); |
| } |
| |
| U_CAPI int32_t U_EXPORT2 |
| unorm2_getDecomposition(const UNormalizer2 *norm2, |
| UChar32 c, UChar *decomposition, int32_t capacity, |
| UErrorCode *pErrorCode) { |
| if(U_FAILURE(*pErrorCode)) { |
| return 0; |
| } |
| if(decomposition==NULL ? capacity!=0 : capacity<0) { |
| *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
| return 0; |
| } |
| UnicodeString destString(decomposition, 0, capacity); |
| if(reinterpret_cast<const Normalizer2 *>(norm2)->getDecomposition(c, destString)) { |
| return destString.extract(decomposition, capacity, *pErrorCode); |
| } else { |
| return -1; |
| } |
| } |
| |
| U_CAPI int32_t U_EXPORT2 |
| unorm2_getRawDecomposition(const UNormalizer2 *norm2, |
| UChar32 c, UChar *decomposition, int32_t capacity, |
| UErrorCode *pErrorCode) { |
| if(U_FAILURE(*pErrorCode)) { |
| return 0; |
| } |
| if(decomposition==NULL ? capacity!=0 : capacity<0) { |
| *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
| return 0; |
| } |
| UnicodeString destString(decomposition, 0, capacity); |
| if(reinterpret_cast<const Normalizer2 *>(norm2)->getRawDecomposition(c, destString)) { |
| return destString.extract(decomposition, capacity, *pErrorCode); |
| } else { |
| return -1; |
| } |
| } |
| |
| U_CAPI UChar32 U_EXPORT2 |
| unorm2_composePair(const UNormalizer2 *norm2, UChar32 a, UChar32 b) { |
| return reinterpret_cast<const Normalizer2 *>(norm2)->composePair(a, b); |
| } |
| |
| U_CAPI uint8_t U_EXPORT2 |
| unorm2_getCombiningClass(const UNormalizer2 *norm2, UChar32 c) { |
| return reinterpret_cast<const Normalizer2 *>(norm2)->getCombiningClass(c); |
| } |
| |
| U_CAPI UBool U_EXPORT2 |
| unorm2_isNormalized(const UNormalizer2 *norm2, |
| const UChar *s, int32_t length, |
| UErrorCode *pErrorCode) { |
| if(U_FAILURE(*pErrorCode)) { |
| return 0; |
| } |
| if((s==NULL && length!=0) || length<-1) { |
| *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
| return 0; |
| } |
| UnicodeString sString(length<0, s, length); |
| return ((const Normalizer2 *)norm2)->isNormalized(sString, *pErrorCode); |
| } |
| |
| U_CAPI UNormalizationCheckResult U_EXPORT2 |
| unorm2_quickCheck(const UNormalizer2 *norm2, |
| const UChar *s, int32_t length, |
| UErrorCode *pErrorCode) { |
| if(U_FAILURE(*pErrorCode)) { |
| return UNORM_NO; |
| } |
| if((s==NULL && length!=0) || length<-1) { |
| *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
| return UNORM_NO; |
| } |
| UnicodeString sString(length<0, s, length); |
| return ((const Normalizer2 *)norm2)->quickCheck(sString, *pErrorCode); |
| } |
| |
| U_CAPI int32_t U_EXPORT2 |
| unorm2_spanQuickCheckYes(const UNormalizer2 *norm2, |
| const UChar *s, int32_t length, |
| UErrorCode *pErrorCode) { |
| if(U_FAILURE(*pErrorCode)) { |
| return 0; |
| } |
| if((s==NULL && length!=0) || length<-1) { |
| *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
| return 0; |
| } |
| UnicodeString sString(length<0, s, length); |
| return ((const Normalizer2 *)norm2)->spanQuickCheckYes(sString, *pErrorCode); |
| } |
| |
| U_CAPI UBool U_EXPORT2 |
| unorm2_hasBoundaryBefore(const UNormalizer2 *norm2, UChar32 c) { |
| return ((const Normalizer2 *)norm2)->hasBoundaryBefore(c); |
| } |
| |
| U_CAPI UBool U_EXPORT2 |
| unorm2_hasBoundaryAfter(const UNormalizer2 *norm2, UChar32 c) { |
| return ((const Normalizer2 *)norm2)->hasBoundaryAfter(c); |
| } |
| |
| U_CAPI UBool U_EXPORT2 |
| unorm2_isInert(const UNormalizer2 *norm2, UChar32 c) { |
| return ((const Normalizer2 *)norm2)->isInert(c); |
| } |
| |
| // Some properties APIs ---------------------------------------------------- *** |
| |
| U_CAPI uint8_t U_EXPORT2 |
| u_getCombiningClass(UChar32 c) { |
| UErrorCode errorCode=U_ZERO_ERROR; |
| const Normalizer2 *nfd=Normalizer2Factory::getNFDInstance(errorCode); |
| if(U_SUCCESS(errorCode)) { |
| return nfd->getCombiningClass(c); |
| } else { |
| return 0; |
| } |
| } |
| |
| U_CFUNC UNormalizationCheckResult |
| unorm_getQuickCheck(UChar32 c, UNormalizationMode mode) { |
| if(mode<=UNORM_NONE || UNORM_FCD<=mode) { |
| return UNORM_YES; |
| } |
| UErrorCode errorCode=U_ZERO_ERROR; |
| const Normalizer2 *norm2=Normalizer2Factory::getInstance(mode, errorCode); |
| if(U_SUCCESS(errorCode)) { |
| return ((const Normalizer2WithImpl *)norm2)->getQuickCheck(c); |
| } else { |
| return UNORM_MAYBE; |
| } |
| } |
| |
| U_CFUNC uint16_t |
| unorm_getFCD16(UChar32 c) { |
| UErrorCode errorCode=U_ZERO_ERROR; |
| const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(errorCode); |
| if(U_SUCCESS(errorCode)) { |
| return impl->getFCD16(c); |
| } else { |
| return 0; |
| } |
| } |
| |
| #endif // !UCONFIG_NO_NORMALIZATION |