third_party/icu/source/i18n/brktrans.cpp - cobalt - Git at Google

 // © 2016 and later: Unicode, Inc. and others.
 // License & terms of use: http://www.unicode.org/copyright.html
 /*
 **********************************************************************
 *   Copyright (C) 2008-2015, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 **********************************************************************
 *   Date        Name        Description
 *   05/11/2008  Andy Heninger  Port from Java
 **********************************************************************
 */

 #include <utility>

 #include "unicode/utypes.h"

 #if  !UCONFIG_NO_TRANSLITERATION && !UCONFIG_NO_BREAK_ITERATION

 #if defined(STARBOARD)
 #include "starboard/client_porting/poem/assert_poem.h"
 #include "starboard/client_porting/poem/string_poem.h"
 #endif  // defined(STARBOARD)
 #include "unicode/brkiter.h"
 #include "unicode/localpointer.h"
 #include "unicode/uchar.h"
 #include "unicode/unifilt.h"
 #include "unicode/uniset.h"

 #include "brktrans.h"
 #include "cmemory.h"
 #include "mutex.h"
 #include "uprops.h"
 #include "uinvchar.h"
 #include "util.h"
 #include "uvectr32.h"

 U_NAMESPACE_BEGIN

 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(BreakTransliterator)

 static const UChar SPACE       = 32;  // ' '


 /**
  * Constructs a transliterator with the default delimiters '{' and
  * '}'.
  */
 BreakTransliterator::BreakTransliterator(UnicodeFilter* adoptedFilter) :
         Transliterator(UNICODE_STRING("Any-BreakInternal", 17), adoptedFilter),
         cachedBI(NULL), cachedBoundaries(NULL), fInsertion(SPACE) {
     }


 /**
  * Destructor.
  */
 BreakTransliterator::~BreakTransliterator() {
 }

 /**
  * Copy constructor.
  */
 BreakTransliterator::BreakTransliterator(const BreakTransliterator& o) :
         Transliterator(o), cachedBI(NULL), cachedBoundaries(NULL), fInsertion(o.fInsertion) {
 }


 /**
  * Transliterator API.
  */
 BreakTransliterator* BreakTransliterator::clone() const {
     return new BreakTransliterator(*this);
 }

 /**
  * Implements {@link Transliterator#handleTransliterate}.
  */
 void BreakTransliterator::handleTransliterate(Replaceable& text, UTransPosition& offsets,
                                                     UBool isIncremental ) const {

         UErrorCode status = U_ZERO_ERROR;
         LocalPointer<BreakIterator> bi;
         LocalPointer<UVector32> boundaries;

         {
             Mutex m;
             BreakTransliterator *nonConstThis = const_cast<BreakTransliterator *>(this);
             boundaries = std::move(nonConstThis->cachedBoundaries);
             bi = std::move(nonConstThis->cachedBI);
         }
         if (bi.isNull()) {
             bi.adoptInstead(BreakIterator::createWordInstance(Locale::getEnglish(), status));
         }
         if (boundaries.isNull()) {
             boundaries.adoptInstead(new UVector32(status));
         }

         if (bi.isNull() || boundaries.isNull() || U_FAILURE(status)) {
             return;
         }

         boundaries->removeAllElements();
         UnicodeString sText = replaceableAsString(text);
         bi->setText(sText);
         bi->preceding(offsets.start);

         // To make things much easier, we will stack the boundaries, and then insert at the end.
         // generally, we won't need too many, since we will be filtered.

         int32_t boundary;
         for(boundary = bi->next(); boundary != UBRK_DONE && boundary < offsets.limit; boundary = bi->next()) {
             if (boundary == 0) continue;
             // HACK: Check to see that preceeding item was a letter

             UChar32 cp = sText.char32At(boundary-1);
             int type = u_charType(cp);
             //System.out.println(Integer.toString(cp,16) + " (before): " + type);
             if ((U_MASK(type) & (U_GC_L_MASK | U_GC_M_MASK)) == 0) continue;

             cp = sText.char32At(boundary);
             type = u_charType(cp);
             //System.out.println(Integer.toString(cp,16) + " (after): " + type);
             if ((U_MASK(type) & (U_GC_L_MASK | U_GC_M_MASK)) == 0) continue;

             boundaries->addElement(boundary, status);
             // printf("Boundary at %d\n", boundary);
         }

         int delta = 0;
         int lastBoundary = 0;

         if (boundaries->size() != 0) { // if we found something, adjust
             delta = boundaries->size() * fInsertion.length();
             lastBoundary = boundaries->lastElementi();

             // we do this from the end backwards, so that we don't have to keep updating.

             while (boundaries->size() > 0) {
                 boundary = boundaries->popi();
                 text.handleReplaceBetween(boundary, boundary, fInsertion);
             }
         }

         // Now fix up the return values
         offsets.contextLimit += delta;
         offsets.limit += delta;
         offsets.start = isIncremental ? lastBoundary + delta : offsets.limit;

         // Return break iterator & boundaries vector to the cache.
         {
             Mutex m;
             BreakTransliterator *nonConstThis = const_cast<BreakTransliterator *>(this);
             if (nonConstThis->cachedBI.isNull()) {
                 nonConstThis->cachedBI = std::move(bi);
             }
             if (nonConstThis->cachedBoundaries.isNull()) {
                 nonConstThis->cachedBoundaries = std::move(boundaries);
             }
         }

         // TODO:  do something with U_FAILURE(status);
         //        (need to look at transliterators overall, not just here.)
 }

 //
 //  getInsertion()
 //
 const UnicodeString &BreakTransliterator::getInsertion() const {
     return fInsertion;
 }

 //
 //  setInsertion()
 //
 void BreakTransliterator::setInsertion(const UnicodeString &insertion) {
     this->fInsertion = insertion;
 }

 //
 //   replaceableAsString   Hack to let break iterators work
 //                         on the replaceable text from transliterators.
 //                         In practice, the only real Replaceable type that we
 //                         will be seeing is UnicodeString, so this function
 //                         will normally be efficient.
 //
 UnicodeString BreakTransliterator::replaceableAsString(Replaceable &r) {
     UnicodeString s;
     UnicodeString *rs = dynamic_cast<UnicodeString *>(&r);
     if (rs != NULL) {
         s = *rs;
     } else {
         r.extractBetween(0, r.length(), s);
     }
     return s;
 }

 U_NAMESPACE_END

 #endif /* #if !UCONFIG_NO_TRANSLITERATION */
	// © 2016 and later: Unicode, Inc. and others.
	// License & terms of use: http://www.unicode.org/copyright.html
	/*
	**********************************************************************
	* Copyright (C) 2008-2015, International Business Machines
	* Corporation and others. All Rights Reserved.
	**********************************************************************
	* Date Name Description
	* 05/11/2008 Andy Heninger Port from Java
	**********************************************************************
	*/

	#include <utility>

	#include "unicode/utypes.h"

	#if !UCONFIG_NO_TRANSLITERATION && !UCONFIG_NO_BREAK_ITERATION

	#if defined(STARBOARD)
	#include "starboard/client_porting/poem/assert_poem.h"
	#include "starboard/client_porting/poem/string_poem.h"
	#endif // defined(STARBOARD)
	#include "unicode/brkiter.h"
	#include "unicode/localpointer.h"
	#include "unicode/uchar.h"
	#include "unicode/unifilt.h"
	#include "unicode/uniset.h"

	#include "brktrans.h"
	#include "cmemory.h"
	#include "mutex.h"
	#include "uprops.h"
	#include "uinvchar.h"
	#include "util.h"
	#include "uvectr32.h"

	U_NAMESPACE_BEGIN

	UOBJECT_DEFINE_RTTI_IMPLEMENTATION(BreakTransliterator)

	static const UChar SPACE = 32; // ' '


	/**
	* Constructs a transliterator with the default delimiters '{' and
	* '}'.
	*/
	BreakTransliterator::BreakTransliterator(UnicodeFilter* adoptedFilter) :
	Transliterator(UNICODE_STRING("Any-BreakInternal", 17), adoptedFilter),
	cachedBI(NULL), cachedBoundaries(NULL), fInsertion(SPACE) {
	}


	/**
	* Destructor.
	*/
	BreakTransliterator::~BreakTransliterator() {
	}

	/**
	* Copy constructor.
	*/
	BreakTransliterator::BreakTransliterator(const BreakTransliterator& o) :
	Transliterator(o), cachedBI(NULL), cachedBoundaries(NULL), fInsertion(o.fInsertion) {
	}


	/**
	* Transliterator API.
	*/
	BreakTransliterator* BreakTransliterator::clone() const {
	return new BreakTransliterator(*this);
	}

	/**
	* Implements {@link Transliterator#handleTransliterate}.
	*/
	void BreakTransliterator::handleTransliterate(Replaceable& text, UTransPosition& offsets,
	UBool isIncremental ) const {

	UErrorCode status = U_ZERO_ERROR;
	LocalPointer<BreakIterator> bi;
	LocalPointer<UVector32> boundaries;

	{
	Mutex m;
	BreakTransliterator nonConstThis = const_cast<BreakTransliterator >(this);
	boundaries = std::move(nonConstThis->cachedBoundaries);
	bi = std::move(nonConstThis->cachedBI);
	}
	if (bi.isNull()) {
	bi.adoptInstead(BreakIterator::createWordInstance(Locale::getEnglish(), status));
	}
	if (boundaries.isNull()) {
	boundaries.adoptInstead(new UVector32(status));
	}

	if (bi.isNull() \|\| boundaries.isNull() \|\| U_FAILURE(status)) {
	return;
	}

	boundaries->removeAllElements();
	UnicodeString sText = replaceableAsString(text);
	bi->setText(sText);
	bi->preceding(offsets.start);

	// To make things much easier, we will stack the boundaries, and then insert at the end.
	// generally, we won't need too many, since we will be filtered.

	int32_t boundary;
	for(boundary = bi->next(); boundary != UBRK_DONE && boundary < offsets.limit; boundary = bi->next()) {
	if (boundary == 0) continue;
	// HACK: Check to see that preceeding item was a letter

	UChar32 cp = sText.char32At(boundary-1);
	int type = u_charType(cp);
	//System.out.println(Integer.toString(cp,16) + " (before): " + type);
	if ((U_MASK(type) & (U_GC_L_MASK \| U_GC_M_MASK)) == 0) continue;

	cp = sText.char32At(boundary);
	type = u_charType(cp);
	//System.out.println(Integer.toString(cp,16) + " (after): " + type);
	if ((U_MASK(type) & (U_GC_L_MASK \| U_GC_M_MASK)) == 0) continue;

	boundaries->addElement(boundary, status);
	// printf("Boundary at %d\n", boundary);
	}

	int delta = 0;
	int lastBoundary = 0;

	if (boundaries->size() != 0) { // if we found something, adjust
	delta = boundaries->size() * fInsertion.length();
	lastBoundary = boundaries->lastElementi();

	// we do this from the end backwards, so that we don't have to keep updating.

	while (boundaries->size() > 0) {
	boundary = boundaries->popi();
	text.handleReplaceBetween(boundary, boundary, fInsertion);
	}
	}

	// Now fix up the return values
	offsets.contextLimit += delta;
	offsets.limit += delta;
	offsets.start = isIncremental ? lastBoundary + delta : offsets.limit;

	// Return break iterator & boundaries vector to the cache.
	{
	Mutex m;
	BreakTransliterator nonConstThis = const_cast<BreakTransliterator >(this);
	if (nonConstThis->cachedBI.isNull()) {
	nonConstThis->cachedBI = std::move(bi);
	}
	if (nonConstThis->cachedBoundaries.isNull()) {
	nonConstThis->cachedBoundaries = std::move(boundaries);
	}
	}

	// TODO: do something with U_FAILURE(status);
	// (need to look at transliterators overall, not just here.)
	}

	//
	// getInsertion()
	//
	const UnicodeString &BreakTransliterator::getInsertion() const {
	return fInsertion;
	}

	//
	// setInsertion()
	//
	void BreakTransliterator::setInsertion(const UnicodeString &insertion) {
	this->fInsertion = insertion;
	}

	//
	// replaceableAsString Hack to let break iterators work
	// on the replaceable text from transliterators.
	// In practice, the only real Replaceable type that we
	// will be seeing is UnicodeString, so this function
	// will normally be efficient.
	//
	UnicodeString BreakTransliterator::replaceableAsString(Replaceable &r) {
	UnicodeString s;
	UnicodeString rs = dynamic_cast<UnicodeString >(&r);
	if (rs != NULL) {
	s = *rs;
	} else {
	r.extractBetween(0, r.length(), s);
	}
	return s;
	}

	U_NAMESPACE_END

	#endif /* #if !UCONFIG_NO_TRANSLITERATION */