| // © 2016 and later: Unicode, Inc. and others. |
| // License & terms of use: http://www.unicode.org/copyright.html |
| /* |
| ******************************************************************************* |
| * Copyright (C) 2014, International Business Machines |
| * Corporation and others. All Rights Reserved. |
| ******************************************************************************* |
| * norm2allmodes.h |
| * |
| * created on: 2014sep07 |
| * created by: Markus W. Scherer |
| */ |
| |
| #ifndef __NORM2ALLMODES_H__ |
| #define __NORM2ALLMODES_H__ |
| |
| #include "unicode/utypes.h" |
| |
| #if !UCONFIG_NO_NORMALIZATION |
| |
| #include "unicode/edits.h" |
| #include "unicode/normalizer2.h" |
| #include "unicode/stringoptions.h" |
| #include "unicode/unistr.h" |
| #include "cpputils.h" |
| #include "normalizer2impl.h" |
| |
| U_NAMESPACE_BEGIN |
| |
| // Intermediate class: |
| // Has Normalizer2Impl and does boilerplate argument checking and setup. |
| class Normalizer2WithImpl : public Normalizer2 { |
| public: |
| Normalizer2WithImpl(const Normalizer2Impl &ni) : impl(ni) {} |
| virtual ~Normalizer2WithImpl(); |
| |
| // normalize |
| virtual UnicodeString & |
| normalize(const UnicodeString &src, |
| UnicodeString &dest, |
| UErrorCode &errorCode) const { |
| if(U_FAILURE(errorCode)) { |
| dest.setToBogus(); |
| return dest; |
| } |
| const UChar *sArray=src.getBuffer(); |
| if(&dest==&src || sArray==NULL) { |
| errorCode=U_ILLEGAL_ARGUMENT_ERROR; |
| dest.setToBogus(); |
| return dest; |
| } |
| dest.remove(); |
| ReorderingBuffer buffer(impl, dest); |
| if(buffer.init(src.length(), errorCode)) { |
| normalize(sArray, sArray+src.length(), buffer, errorCode); |
| } |
| return dest; |
| } |
| virtual void |
| normalize(const UChar *src, const UChar *limit, |
| ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0; |
| |
| // normalize and append |
| virtual UnicodeString & |
| normalizeSecondAndAppend(UnicodeString &first, |
| const UnicodeString &second, |
| UErrorCode &errorCode) const { |
| return normalizeSecondAndAppend(first, second, true, errorCode); |
| } |
| virtual UnicodeString & |
| append(UnicodeString &first, |
| const UnicodeString &second, |
| UErrorCode &errorCode) const { |
| return normalizeSecondAndAppend(first, second, false, errorCode); |
| } |
| UnicodeString & |
| normalizeSecondAndAppend(UnicodeString &first, |
| const UnicodeString &second, |
| UBool doNormalize, |
| UErrorCode &errorCode) const { |
| uprv_checkCanGetBuffer(first, errorCode); |
| if(U_FAILURE(errorCode)) { |
| return first; |
| } |
| const UChar *secondArray=second.getBuffer(); |
| if(&first==&second || secondArray==NULL) { |
| errorCode=U_ILLEGAL_ARGUMENT_ERROR; |
| return first; |
| } |
| int32_t firstLength=first.length(); |
| UnicodeString safeMiddle; |
| { |
| ReorderingBuffer buffer(impl, first); |
| if(buffer.init(firstLength+second.length(), errorCode)) { |
| normalizeAndAppend(secondArray, secondArray+second.length(), doNormalize, |
| safeMiddle, buffer, errorCode); |
| } |
| } // The ReorderingBuffer destructor finalizes the first string. |
| if(U_FAILURE(errorCode)) { |
| // Restore the modified suffix of the first string. |
| first.replace(firstLength-safeMiddle.length(), 0x7fffffff, safeMiddle); |
| } |
| return first; |
| } |
| virtual void |
| normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize, |
| UnicodeString &safeMiddle, |
| ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0; |
| virtual UBool |
| getDecomposition(UChar32 c, UnicodeString &decomposition) const { |
| UChar buffer[4]; |
| int32_t length; |
| const UChar *d=impl.getDecomposition(c, buffer, length); |
| if(d==NULL) { |
| return false; |
| } |
| if(d==buffer) { |
| decomposition.setTo(buffer, length); // copy the string (Jamos from Hangul syllable c) |
| } else { |
| decomposition.setTo(false, d, length); // read-only alias |
| } |
| return true; |
| } |
| virtual UBool |
| getRawDecomposition(UChar32 c, UnicodeString &decomposition) const { |
| UChar buffer[30]; |
| int32_t length; |
| const UChar *d=impl.getRawDecomposition(c, buffer, length); |
| if(d==NULL) { |
| return false; |
| } |
| if(d==buffer) { |
| decomposition.setTo(buffer, length); // copy the string (algorithmic decomposition) |
| } else { |
| decomposition.setTo(false, d, length); // read-only alias |
| } |
| return true; |
| } |
| virtual UChar32 |
| composePair(UChar32 a, UChar32 b) const { |
| return impl.composePair(a, b); |
| } |
| |
| virtual uint8_t |
| getCombiningClass(UChar32 c) const { |
| return impl.getCC(impl.getNorm16(c)); |
| } |
| |
| // quick checks |
| virtual UBool |
| isNormalized(const UnicodeString &s, UErrorCode &errorCode) const { |
| if(U_FAILURE(errorCode)) { |
| return false; |
| } |
| const UChar *sArray=s.getBuffer(); |
| if(sArray==NULL) { |
| errorCode=U_ILLEGAL_ARGUMENT_ERROR; |
| return false; |
| } |
| const UChar *sLimit=sArray+s.length(); |
| return sLimit==spanQuickCheckYes(sArray, sLimit, errorCode); |
| } |
| virtual UNormalizationCheckResult |
| quickCheck(const UnicodeString &s, UErrorCode &errorCode) const { |
| return Normalizer2WithImpl::isNormalized(s, errorCode) ? UNORM_YES : UNORM_NO; |
| } |
| virtual int32_t |
| spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const { |
| if(U_FAILURE(errorCode)) { |
| return 0; |
| } |
| const UChar *sArray=s.getBuffer(); |
| if(sArray==NULL) { |
| errorCode=U_ILLEGAL_ARGUMENT_ERROR; |
| return 0; |
| } |
| return (int32_t)(spanQuickCheckYes(sArray, sArray+s.length(), errorCode)-sArray); |
| } |
| virtual const UChar * |
| spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const = 0; |
| |
| virtual UNormalizationCheckResult getQuickCheck(UChar32) const { |
| return UNORM_YES; |
| } |
| |
| const Normalizer2Impl &impl; |
| }; |
| |
| class DecomposeNormalizer2 : public Normalizer2WithImpl { |
| public: |
| DecomposeNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {} |
| virtual ~DecomposeNormalizer2(); |
| |
| private: |
| virtual void |
| normalize(const UChar *src, const UChar *limit, |
| ReorderingBuffer &buffer, UErrorCode &errorCode) const { |
| impl.decompose(src, limit, &buffer, errorCode); |
| } |
| using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function. |
| virtual void |
| normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize, |
| UnicodeString &safeMiddle, |
| ReorderingBuffer &buffer, UErrorCode &errorCode) const { |
| impl.decomposeAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode); |
| } |
| virtual const UChar * |
| spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const { |
| return impl.decompose(src, limit, NULL, errorCode); |
| } |
| using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function. |
| virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const { |
| return impl.isDecompYes(impl.getNorm16(c)) ? UNORM_YES : UNORM_NO; |
| } |
| virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasDecompBoundaryBefore(c); } |
| virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasDecompBoundaryAfter(c); } |
| virtual UBool isInert(UChar32 c) const { return impl.isDecompInert(c); } |
| }; |
| |
| class ComposeNormalizer2 : public Normalizer2WithImpl { |
| public: |
| ComposeNormalizer2(const Normalizer2Impl &ni, UBool fcc) : |
| Normalizer2WithImpl(ni), onlyContiguous(fcc) {} |
| virtual ~ComposeNormalizer2(); |
| |
| private: |
| virtual void |
| normalize(const UChar *src, const UChar *limit, |
| ReorderingBuffer &buffer, UErrorCode &errorCode) const U_OVERRIDE { |
| impl.compose(src, limit, onlyContiguous, true, buffer, errorCode); |
| } |
| using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function. |
| |
| void |
| normalizeUTF8(uint32_t options, StringPiece src, ByteSink &sink, |
| Edits *edits, UErrorCode &errorCode) const U_OVERRIDE { |
| if (U_FAILURE(errorCode)) { |
| return; |
| } |
| if (edits != nullptr && (options & U_EDITS_NO_RESET) == 0) { |
| edits->reset(); |
| } |
| const uint8_t *s = reinterpret_cast<const uint8_t *>(src.data()); |
| impl.composeUTF8(options, onlyContiguous, s, s + src.length(), |
| &sink, edits, errorCode); |
| sink.Flush(); |
| } |
| |
| virtual void |
| normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize, |
| UnicodeString &safeMiddle, |
| ReorderingBuffer &buffer, UErrorCode &errorCode) const U_OVERRIDE { |
| impl.composeAndAppend(src, limit, doNormalize, onlyContiguous, safeMiddle, buffer, errorCode); |
| } |
| |
| virtual UBool |
| isNormalized(const UnicodeString &s, UErrorCode &errorCode) const U_OVERRIDE { |
| if(U_FAILURE(errorCode)) { |
| return false; |
| } |
| const UChar *sArray=s.getBuffer(); |
| if(sArray==NULL) { |
| errorCode=U_ILLEGAL_ARGUMENT_ERROR; |
| return false; |
| } |
| UnicodeString temp; |
| ReorderingBuffer buffer(impl, temp); |
| if(!buffer.init(5, errorCode)) { // small destCapacity for substring normalization |
| return false; |
| } |
| return impl.compose(sArray, sArray+s.length(), onlyContiguous, false, buffer, errorCode); |
| } |
| virtual UBool |
| isNormalizedUTF8(StringPiece sp, UErrorCode &errorCode) const U_OVERRIDE { |
| if(U_FAILURE(errorCode)) { |
| return false; |
| } |
| const uint8_t *s = reinterpret_cast<const uint8_t *>(sp.data()); |
| return impl.composeUTF8(0, onlyContiguous, s, s + sp.length(), nullptr, nullptr, errorCode); |
| } |
| virtual UNormalizationCheckResult |
| quickCheck(const UnicodeString &s, UErrorCode &errorCode) const U_OVERRIDE { |
| if(U_FAILURE(errorCode)) { |
| return UNORM_MAYBE; |
| } |
| const UChar *sArray=s.getBuffer(); |
| if(sArray==NULL) { |
| errorCode=U_ILLEGAL_ARGUMENT_ERROR; |
| return UNORM_MAYBE; |
| } |
| UNormalizationCheckResult qcResult=UNORM_YES; |
| impl.composeQuickCheck(sArray, sArray+s.length(), onlyContiguous, &qcResult); |
| return qcResult; |
| } |
| virtual const UChar * |
| spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &) const U_OVERRIDE { |
| return impl.composeQuickCheck(src, limit, onlyContiguous, NULL); |
| } |
| using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function. |
| virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const U_OVERRIDE { |
| return impl.getCompQuickCheck(impl.getNorm16(c)); |
| } |
| virtual UBool hasBoundaryBefore(UChar32 c) const U_OVERRIDE { |
| return impl.hasCompBoundaryBefore(c); |
| } |
| virtual UBool hasBoundaryAfter(UChar32 c) const U_OVERRIDE { |
| return impl.hasCompBoundaryAfter(c, onlyContiguous); |
| } |
| virtual UBool isInert(UChar32 c) const U_OVERRIDE { |
| return impl.isCompInert(c, onlyContiguous); |
| } |
| |
| const UBool onlyContiguous; |
| }; |
| |
| class FCDNormalizer2 : public Normalizer2WithImpl { |
| public: |
| FCDNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {} |
| virtual ~FCDNormalizer2(); |
| |
| private: |
| virtual void |
| normalize(const UChar *src, const UChar *limit, |
| ReorderingBuffer &buffer, UErrorCode &errorCode) const { |
| impl.makeFCD(src, limit, &buffer, errorCode); |
| } |
| using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function. |
| virtual void |
| normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize, |
| UnicodeString &safeMiddle, |
| ReorderingBuffer &buffer, UErrorCode &errorCode) const { |
| impl.makeFCDAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode); |
| } |
| virtual const UChar * |
| spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const { |
| return impl.makeFCD(src, limit, NULL, errorCode); |
| } |
| using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function. |
| virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasFCDBoundaryBefore(c); } |
| virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasFCDBoundaryAfter(c); } |
| virtual UBool isInert(UChar32 c) const { return impl.isFCDInert(c); } |
| }; |
| |
| struct Norm2AllModes : public UMemory { |
| Norm2AllModes(Normalizer2Impl *i) |
| : impl(i), comp(*i, false), decomp(*i), fcd(*i), fcc(*i, true) {} |
| ~Norm2AllModes(); |
| |
| static Norm2AllModes *createInstance(Normalizer2Impl *impl, UErrorCode &errorCode); |
| static Norm2AllModes *createNFCInstance(UErrorCode &errorCode); |
| static Norm2AllModes *createInstance(const char *packageName, |
| const char *name, |
| UErrorCode &errorCode); |
| |
| static const Norm2AllModes *getNFCInstance(UErrorCode &errorCode); |
| static const Norm2AllModes *getNFKCInstance(UErrorCode &errorCode); |
| static const Norm2AllModes *getNFKC_CFInstance(UErrorCode &errorCode); |
| |
| Normalizer2Impl *impl; |
| ComposeNormalizer2 comp; |
| DecomposeNormalizer2 decomp; |
| FCDNormalizer2 fcd; |
| ComposeNormalizer2 fcc; |
| }; |
| |
| U_NAMESPACE_END |
| |
| #endif // !UCONFIG_NO_NORMALIZATION |
| #endif // __NORM2ALLMODES_H__ |