|  | /* | 
|  | ******************************************************************************* | 
|  | * Copyright (C) 1997-2015, International Business Machines Corporation and | 
|  | * others. All Rights Reserved. | 
|  | ******************************************************************************* | 
|  | * | 
|  | * File brkiter.cpp | 
|  | * | 
|  | * Modification History: | 
|  | * | 
|  | *   Date        Name        Description | 
|  | *   02/18/97    aliu        Converted from OpenClass.  Added DONE. | 
|  | *   01/13/2000  helena      Added UErrorCode parameter to createXXXInstance methods. | 
|  | ***************************************************************************************** | 
|  | */ | 
|  |  | 
|  | // ***************************************************************************** | 
|  | // This file was generated from the java source file BreakIterator.java | 
|  | // ***************************************************************************** | 
|  |  | 
|  | #include "unicode/ptypes.h" | 
|  | #include "unicode/utypes.h" | 
|  |  | 
|  | #if !UCONFIG_NO_BREAK_ITERATION | 
|  |  | 
|  | #include "starboard/client_porting/poem/assert_poem.h" | 
|  | #include "starboard/client_porting/poem/string_poem.h" | 
|  | #include "unicode/rbbi.h" | 
|  | #include "unicode/brkiter.h" | 
|  | #include "unicode/udata.h" | 
|  | #include "unicode/ures.h" | 
|  | #include "unicode/ustring.h" | 
|  | #include "unicode/filteredbrk.h" | 
|  | #include "ucln_cmn.h" | 
|  | #include "cstring.h" | 
|  | #include "umutex.h" | 
|  | #include "servloc.h" | 
|  | #include "locbased.h" | 
|  | #include "uresimp.h" | 
|  | #include "uassert.h" | 
|  | #include "ubrkimpl.h" | 
|  | #include "charstr.h" | 
|  |  | 
|  | // ***************************************************************************** | 
|  | // class BreakIterator | 
|  | // This class implements methods for finding the location of boundaries in text. | 
|  | // Instances of BreakIterator maintain a current position and scan over text | 
|  | // returning the index of characters where boundaries occur. | 
|  | // ***************************************************************************** | 
|  |  | 
|  | U_NAMESPACE_BEGIN | 
|  |  | 
|  | // ------------------------------------- | 
|  |  | 
|  | BreakIterator* | 
|  | BreakIterator::buildInstance(const Locale& loc, const char *type, int32_t kind, UErrorCode &status) | 
|  | { | 
|  | char fnbuff[256]; | 
|  | char ext[4]={'\0'}; | 
|  | CharString actualLocale; | 
|  | int32_t size; | 
|  | const UChar* brkfname = NULL; | 
|  | UResourceBundle brkRulesStack; | 
|  | UResourceBundle brkNameStack; | 
|  | UResourceBundle *brkRules = &brkRulesStack; | 
|  | UResourceBundle *brkName  = &brkNameStack; | 
|  | RuleBasedBreakIterator *result = NULL; | 
|  |  | 
|  | if (U_FAILURE(status)) | 
|  | return NULL; | 
|  |  | 
|  | ures_initStackObject(brkRules); | 
|  | ures_initStackObject(brkName); | 
|  |  | 
|  | // Get the locale | 
|  | UResourceBundle *b = ures_openNoDefault(U_ICUDATA_BRKITR, loc.getName(), &status); | 
|  |  | 
|  | // Get the "boundaries" array. | 
|  | if (U_SUCCESS(status)) { | 
|  | brkRules = ures_getByKeyWithFallback(b, "boundaries", brkRules, &status); | 
|  | // Get the string object naming the rules file | 
|  | brkName = ures_getByKeyWithFallback(brkRules, type, brkName, &status); | 
|  | // Get the actual string | 
|  | brkfname = ures_getString(brkName, &size, &status); | 
|  | U_ASSERT((size_t)size<sizeof(fnbuff)); | 
|  | if ((size_t)size>=sizeof(fnbuff)) { | 
|  | size=0; | 
|  | if (U_SUCCESS(status)) { | 
|  | status = U_BUFFER_OVERFLOW_ERROR; | 
|  | } | 
|  | } | 
|  |  | 
|  | // Use the string if we found it | 
|  | if (U_SUCCESS(status) && brkfname) { | 
|  | actualLocale.append(ures_getLocaleInternal(brkName, &status), -1, status); | 
|  |  | 
|  | UChar* extStart=u_strchr(brkfname, 0x002e); | 
|  | int len = 0; | 
|  | if(extStart!=NULL){ | 
|  | len = (int)(extStart-brkfname); | 
|  | u_UCharsToChars(extStart+1, ext, sizeof(ext)); // nul terminates the buff | 
|  | u_UCharsToChars(brkfname, fnbuff, len); | 
|  | } | 
|  | fnbuff[len]=0; // nul terminate | 
|  | } | 
|  | } | 
|  |  | 
|  | ures_close(brkRules); | 
|  | ures_close(brkName); | 
|  |  | 
|  | UDataMemory* file = udata_open(U_ICUDATA_BRKITR, ext, fnbuff, &status); | 
|  | if (U_FAILURE(status)) { | 
|  | ures_close(b); | 
|  | return NULL; | 
|  | } | 
|  |  | 
|  | // Create a RuleBasedBreakIterator | 
|  | result = new RuleBasedBreakIterator(file, status); | 
|  |  | 
|  | // If there is a result, set the valid locale and actual locale, and the kind | 
|  | if (U_SUCCESS(status) && result != NULL) { | 
|  | U_LOCALE_BASED(locBased, *(BreakIterator*)result); | 
|  | locBased.setLocaleIDs(ures_getLocaleByType(b, ULOC_VALID_LOCALE, &status), | 
|  | actualLocale.data()); | 
|  | result->setBreakType(kind); | 
|  | } | 
|  |  | 
|  | ures_close(b); | 
|  |  | 
|  | if (U_FAILURE(status) && result != NULL) {  // Sometimes redundant check, but simple | 
|  | delete result; | 
|  | return NULL; | 
|  | } | 
|  |  | 
|  | if (result == NULL) { | 
|  | udata_close(file); | 
|  | if (U_SUCCESS(status)) { | 
|  | status = U_MEMORY_ALLOCATION_ERROR; | 
|  | } | 
|  | } | 
|  |  | 
|  | return result; | 
|  | } | 
|  |  | 
|  | // Creates a break iterator for word breaks. | 
|  | BreakIterator* U_EXPORT2 | 
|  | BreakIterator::createWordInstance(const Locale& key, UErrorCode& status) | 
|  | { | 
|  | return createInstance(key, UBRK_WORD, status); | 
|  | } | 
|  |  | 
|  | // ------------------------------------- | 
|  |  | 
|  | // Creates a break iterator  for line breaks. | 
|  | BreakIterator* U_EXPORT2 | 
|  | BreakIterator::createLineInstance(const Locale& key, UErrorCode& status) | 
|  | { | 
|  | return createInstance(key, UBRK_LINE, status); | 
|  | } | 
|  |  | 
|  | // ------------------------------------- | 
|  |  | 
|  | // Creates a break iterator  for character breaks. | 
|  | BreakIterator* U_EXPORT2 | 
|  | BreakIterator::createCharacterInstance(const Locale& key, UErrorCode& status) | 
|  | { | 
|  | return createInstance(key, UBRK_CHARACTER, status); | 
|  | } | 
|  |  | 
|  | // ------------------------------------- | 
|  |  | 
|  | // Creates a break iterator  for sentence breaks. | 
|  | BreakIterator* U_EXPORT2 | 
|  | BreakIterator::createSentenceInstance(const Locale& key, UErrorCode& status) | 
|  | { | 
|  | return createInstance(key, UBRK_SENTENCE, status); | 
|  | } | 
|  |  | 
|  | // ------------------------------------- | 
|  |  | 
|  | // Creates a break iterator for title casing breaks. | 
|  | BreakIterator* U_EXPORT2 | 
|  | BreakIterator::createTitleInstance(const Locale& key, UErrorCode& status) | 
|  | { | 
|  | return createInstance(key, UBRK_TITLE, status); | 
|  | } | 
|  |  | 
|  | // ------------------------------------- | 
|  |  | 
|  | // Gets all the available locales that has localized text boundary data. | 
|  | const Locale* U_EXPORT2 | 
|  | BreakIterator::getAvailableLocales(int32_t& count) | 
|  | { | 
|  | return Locale::getAvailableLocales(count); | 
|  | } | 
|  |  | 
|  | // ------------------------------------------ | 
|  | // | 
|  | // Default constructor and destructor | 
|  | // | 
|  | //------------------------------------------- | 
|  |  | 
|  | BreakIterator::BreakIterator() | 
|  | { | 
|  | *validLocale = *actualLocale = 0; | 
|  | } | 
|  |  | 
|  | BreakIterator::~BreakIterator() | 
|  | { | 
|  | } | 
|  |  | 
|  | // ------------------------------------------ | 
|  | // | 
|  | // Registration | 
|  | // | 
|  | //------------------------------------------- | 
|  | #if !UCONFIG_NO_SERVICE | 
|  |  | 
|  | // ------------------------------------- | 
|  |  | 
|  | class ICUBreakIteratorFactory : public ICUResourceBundleFactory { | 
|  | public: | 
|  | virtual ~ICUBreakIteratorFactory(); | 
|  | protected: | 
|  | virtual UObject* handleCreate(const Locale& loc, int32_t kind, const ICUService* /*service*/, UErrorCode& status) const { | 
|  | return BreakIterator::makeInstance(loc, kind, status); | 
|  | } | 
|  | }; | 
|  |  | 
|  | ICUBreakIteratorFactory::~ICUBreakIteratorFactory() {} | 
|  |  | 
|  | // ------------------------------------- | 
|  |  | 
|  | class ICUBreakIteratorService : public ICULocaleService { | 
|  | public: | 
|  | ICUBreakIteratorService() | 
|  | : ICULocaleService(UNICODE_STRING("Break Iterator", 14)) | 
|  | { | 
|  | UErrorCode status = U_ZERO_ERROR; | 
|  | registerFactory(new ICUBreakIteratorFactory(), status); | 
|  | } | 
|  |  | 
|  | virtual ~ICUBreakIteratorService(); | 
|  |  | 
|  | virtual UObject* cloneInstance(UObject* instance) const { | 
|  | return ((BreakIterator*)instance)->clone(); | 
|  | } | 
|  |  | 
|  | virtual UObject* handleDefault(const ICUServiceKey& key, UnicodeString* /*actualID*/, UErrorCode& status) const { | 
|  | LocaleKey& lkey = (LocaleKey&)key; | 
|  | int32_t kind = lkey.kind(); | 
|  | Locale loc; | 
|  | lkey.currentLocale(loc); | 
|  | return BreakIterator::makeInstance(loc, kind, status); | 
|  | } | 
|  |  | 
|  | virtual UBool isDefault() const { | 
|  | return countFactories() == 1; | 
|  | } | 
|  | }; | 
|  |  | 
|  | ICUBreakIteratorService::~ICUBreakIteratorService() {} | 
|  |  | 
|  | // ------------------------------------- | 
|  |  | 
|  | // defined in ucln_cmn.h | 
|  | U_NAMESPACE_END | 
|  |  | 
|  | static icu::UInitOnce gInitOnce; | 
|  | static icu::ICULocaleService* gService = NULL; | 
|  |  | 
|  |  | 
|  |  | 
|  | /** | 
|  | * Release all static memory held by breakiterator. | 
|  | */ | 
|  | U_CDECL_BEGIN | 
|  | static UBool U_CALLCONV breakiterator_cleanup(void) { | 
|  | #if !UCONFIG_NO_SERVICE | 
|  | if (gService) { | 
|  | delete gService; | 
|  | gService = NULL; | 
|  | } | 
|  | gInitOnce.reset(); | 
|  | #endif | 
|  | return TRUE; | 
|  | } | 
|  | U_CDECL_END | 
|  | U_NAMESPACE_BEGIN | 
|  |  | 
|  | static void U_CALLCONV | 
|  | initService(void) { | 
|  | gService = new ICUBreakIteratorService(); | 
|  | ucln_common_registerCleanup(UCLN_COMMON_BREAKITERATOR, breakiterator_cleanup); | 
|  | } | 
|  |  | 
|  | static ICULocaleService* | 
|  | getService(void) | 
|  | { | 
|  | umtx_initOnce(gInitOnce, &initService); | 
|  | return gService; | 
|  | } | 
|  |  | 
|  |  | 
|  | // ------------------------------------- | 
|  |  | 
|  | static inline UBool | 
|  | hasService(void) | 
|  | { | 
|  | return !gInitOnce.isReset() && getService() != NULL; | 
|  | } | 
|  |  | 
|  | // ------------------------------------- | 
|  |  | 
|  | URegistryKey U_EXPORT2 | 
|  | BreakIterator::registerInstance(BreakIterator* toAdopt, const Locale& locale, UBreakIteratorType kind, UErrorCode& status) | 
|  | { | 
|  | ICULocaleService *service = getService(); | 
|  | if (service == NULL) { | 
|  | status = U_MEMORY_ALLOCATION_ERROR; | 
|  | return NULL; | 
|  | } | 
|  | return service->registerInstance(toAdopt, locale, kind, status); | 
|  | } | 
|  |  | 
|  | // ------------------------------------- | 
|  |  | 
|  | UBool U_EXPORT2 | 
|  | BreakIterator::unregister(URegistryKey key, UErrorCode& status) | 
|  | { | 
|  | if (U_SUCCESS(status)) { | 
|  | if (hasService()) { | 
|  | return gService->unregister(key, status); | 
|  | } | 
|  | status = U_MEMORY_ALLOCATION_ERROR; | 
|  | } | 
|  | return FALSE; | 
|  | } | 
|  |  | 
|  | // ------------------------------------- | 
|  |  | 
|  | StringEnumeration* U_EXPORT2 | 
|  | BreakIterator::getAvailableLocales(void) | 
|  | { | 
|  | ICULocaleService *service = getService(); | 
|  | if (service == NULL) { | 
|  | return NULL; | 
|  | } | 
|  | return service->getAvailableLocales(); | 
|  | } | 
|  | #endif /* UCONFIG_NO_SERVICE */ | 
|  |  | 
|  | // ------------------------------------- | 
|  |  | 
|  | BreakIterator* | 
|  | BreakIterator::createInstance(const Locale& loc, int32_t kind, UErrorCode& status) | 
|  | { | 
|  | if (U_FAILURE(status)) { | 
|  | return NULL; | 
|  | } | 
|  |  | 
|  | #if !UCONFIG_NO_SERVICE | 
|  | if (hasService()) { | 
|  | Locale actualLoc(""); | 
|  | BreakIterator *result = (BreakIterator*)gService->get(loc, kind, &actualLoc, status); | 
|  | // TODO: The way the service code works in ICU 2.8 is that if | 
|  | // there is a real registered break iterator, the actualLoc | 
|  | // will be populated, but if the handleDefault path is taken | 
|  | // (because nothing is registered that can handle the | 
|  | // requested locale) then the actualLoc comes back empty.  In | 
|  | // that case, the returned object already has its actual/valid | 
|  | // locale data populated (by makeInstance, which is what | 
|  | // handleDefault calls), so we don't touch it.  YES, A COMMENT | 
|  | // THIS LONG is a sign of bad code -- so the action item is to | 
|  | // revisit this in ICU 3.0 and clean it up/fix it/remove it. | 
|  | if (U_SUCCESS(status) && (result != NULL) && *actualLoc.getName() != 0) { | 
|  | U_LOCALE_BASED(locBased, *result); | 
|  | locBased.setLocaleIDs(actualLoc.getName(), actualLoc.getName()); | 
|  | } | 
|  | return result; | 
|  | } | 
|  | else | 
|  | #endif | 
|  | { | 
|  | return makeInstance(loc, kind, status); | 
|  | } | 
|  | } | 
|  |  | 
|  | // ------------------------------------- | 
|  | enum { kKeyValueLenMax = 32 }; | 
|  |  | 
|  | BreakIterator* | 
|  | BreakIterator::makeInstance(const Locale& loc, int32_t kind, UErrorCode& status) | 
|  | { | 
|  |  | 
|  | if (U_FAILURE(status)) { | 
|  | return NULL; | 
|  | } | 
|  | char lbType[kKeyValueLenMax]; | 
|  |  | 
|  | BreakIterator *result = NULL; | 
|  | switch (kind) { | 
|  | case UBRK_CHARACTER: | 
|  | result = BreakIterator::buildInstance(loc, "grapheme", kind, status); | 
|  | break; | 
|  | case UBRK_WORD: | 
|  | result = BreakIterator::buildInstance(loc, "word", kind, status); | 
|  | break; | 
|  | case UBRK_LINE: | 
|  | uprv_strcpy(lbType, "line"); | 
|  | { | 
|  | char lbKeyValue[kKeyValueLenMax] = {0}; | 
|  | UErrorCode kvStatus = U_ZERO_ERROR; | 
|  | int32_t kLen = loc.getKeywordValue("lb", lbKeyValue, kKeyValueLenMax, kvStatus); | 
|  | if (U_SUCCESS(kvStatus) && kLen > 0 && (uprv_strcmp(lbKeyValue,"strict")==0 || uprv_strcmp(lbKeyValue,"normal")==0 || uprv_strcmp(lbKeyValue,"loose")==0)) { | 
|  | uprv_strcat(lbType, "_"); | 
|  | uprv_strcat(lbType, lbKeyValue); | 
|  | } | 
|  | } | 
|  | result = BreakIterator::buildInstance(loc, lbType, kind, status); | 
|  | break; | 
|  | case UBRK_SENTENCE: | 
|  | result = BreakIterator::buildInstance(loc, "sentence", kind, status); | 
|  | { | 
|  | char ssKeyValue[kKeyValueLenMax] = {0}; | 
|  | UErrorCode kvStatus = U_ZERO_ERROR; | 
|  | int32_t kLen = loc.getKeywordValue("ss", ssKeyValue, kKeyValueLenMax, kvStatus); | 
|  | if (U_SUCCESS(kvStatus) && kLen > 0 && uprv_strcmp(ssKeyValue,"standard")==0) { | 
|  | FilteredBreakIteratorBuilder* fbiBuilder = FilteredBreakIteratorBuilder::createInstance(loc, kvStatus); | 
|  | if (U_SUCCESS(kvStatus)) { | 
|  | result = fbiBuilder->build(result, status); | 
|  | delete fbiBuilder; | 
|  | } | 
|  | } | 
|  | } | 
|  | break; | 
|  | case UBRK_TITLE: | 
|  | result = BreakIterator::buildInstance(loc, "title", kind, status); | 
|  | break; | 
|  | default: | 
|  | status = U_ILLEGAL_ARGUMENT_ERROR; | 
|  | } | 
|  |  | 
|  | if (U_FAILURE(status)) { | 
|  | return NULL; | 
|  | } | 
|  |  | 
|  | return result; | 
|  | } | 
|  |  | 
|  | Locale | 
|  | BreakIterator::getLocale(ULocDataLocaleType type, UErrorCode& status) const { | 
|  | U_LOCALE_BASED(locBased, *this); | 
|  | return locBased.getLocale(type, status); | 
|  | } | 
|  |  | 
|  | const char * | 
|  | BreakIterator::getLocaleID(ULocDataLocaleType type, UErrorCode& status) const { | 
|  | U_LOCALE_BASED(locBased, *this); | 
|  | return locBased.getLocaleID(type, status); | 
|  | } | 
|  |  | 
|  |  | 
|  | // This implementation of getRuleStatus is a do-nothing stub, here to | 
|  | // provide a default implementation for any derived BreakIterator classes that | 
|  | // do not implement it themselves. | 
|  | int32_t BreakIterator::getRuleStatus() const { | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | // This implementation of getRuleStatusVec is a do-nothing stub, here to | 
|  | // provide a default implementation for any derived BreakIterator classes that | 
|  | // do not implement it themselves. | 
|  | int32_t BreakIterator::getRuleStatusVec(int32_t *fillInVec, int32_t capacity, UErrorCode &status) { | 
|  | if (U_FAILURE(status)) { | 
|  | return 0; | 
|  | } | 
|  | if (capacity < 1) { | 
|  | status = U_BUFFER_OVERFLOW_ERROR; | 
|  | return 1; | 
|  | } | 
|  | *fillInVec = 0; | 
|  | return 1; | 
|  | } | 
|  |  | 
|  | BreakIterator::BreakIterator (const Locale& valid, const Locale& actual) { | 
|  | U_LOCALE_BASED(locBased, (*this)); | 
|  | locBased.setLocaleIDs(valid, actual); | 
|  | } | 
|  |  | 
|  | U_NAMESPACE_END | 
|  |  | 
|  | #endif /* #if !UCONFIG_NO_BREAK_ITERATION */ | 
|  |  | 
|  | //eof |