| /* |
| ********************************************************************** |
| * Copyright (C) 2001-2008 IBM and others. All rights reserved. |
| ********************************************************************** |
| * Date Name Description |
| * 03/22/2000 helena Creation. |
| ********************************************************************** |
| */ |
| |
| #include "unicode/utypes.h" |
| |
| #if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION |
| |
| #include "unicode/stsearch.h" |
| #include "usrchimp.h" |
| #include "cmemory.h" |
| |
| U_NAMESPACE_BEGIN |
| |
| UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringSearch) |
| |
| // public constructors and destructors ----------------------------------- |
| |
| StringSearch::StringSearch(const UnicodeString &pattern, |
| const UnicodeString &text, |
| const Locale &locale, |
| BreakIterator *breakiter, |
| UErrorCode &status) : |
| SearchIterator(text, breakiter), |
| m_collator_(), |
| m_pattern_(pattern) |
| { |
| if (U_FAILURE(status)) { |
| m_strsrch_ = NULL; |
| return; |
| } |
| |
| m_strsrch_ = usearch_open(m_pattern_.getBuffer(), m_pattern_.length(), |
| m_text_.getBuffer(), m_text_.length(), |
| locale.getName(), (UBreakIterator *)breakiter, |
| &status); |
| uprv_free(m_search_); |
| m_search_ = NULL; |
| |
| // !!! dlf m_collator_ is an odd beast. basically it is an aliasing |
| // wrapper around the internal collator and rules, which (here) are |
| // owned by this stringsearch object. this means 1) it's destructor |
| // _should not_ delete the ucollator or rules, and 2) changes made |
| // to the exposed collator (setStrength etc) _should_ modify the |
| // ucollator. thus the collator is not a copy-on-write alias, and it |
| // needs to distinguish itself not merely from 'stand alone' colators |
| // but also from copy-on-write ones. it needs additional state, which |
| // setUCollator should set. |
| |
| if (U_SUCCESS(status)) { |
| // Alias the collator |
| m_collator_.setUCollator((UCollator *)m_strsrch_->collator); |
| // m_search_ has been created by the base SearchIterator class |
| m_search_ = m_strsrch_->search; |
| } |
| } |
| |
| StringSearch::StringSearch(const UnicodeString &pattern, |
| const UnicodeString &text, |
| RuleBasedCollator *coll, |
| BreakIterator *breakiter, |
| UErrorCode &status) : |
| SearchIterator(text, breakiter), |
| m_collator_(), |
| m_pattern_(pattern) |
| { |
| if (U_FAILURE(status)) { |
| m_strsrch_ = NULL; |
| return; |
| } |
| if (coll == NULL) { |
| status = U_ILLEGAL_ARGUMENT_ERROR; |
| m_strsrch_ = NULL; |
| return; |
| } |
| m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(), |
| m_pattern_.length(), |
| m_text_.getBuffer(), |
| m_text_.length(), coll->ucollator, |
| (UBreakIterator *)breakiter, |
| &status); |
| uprv_free(m_search_); |
| m_search_ = NULL; |
| |
| if (U_SUCCESS(status)) { |
| // Alias the collator |
| m_collator_.setUCollator((UCollator *)m_strsrch_->collator); |
| // m_search_ has been created by the base SearchIterator class |
| m_search_ = m_strsrch_->search; |
| } |
| } |
| |
| StringSearch::StringSearch(const UnicodeString &pattern, |
| CharacterIterator &text, |
| const Locale &locale, |
| BreakIterator *breakiter, |
| UErrorCode &status) : |
| SearchIterator(text, breakiter), |
| m_collator_(), |
| m_pattern_(pattern) |
| { |
| if (U_FAILURE(status)) { |
| m_strsrch_ = NULL; |
| return; |
| } |
| m_strsrch_ = usearch_open(m_pattern_.getBuffer(), m_pattern_.length(), |
| m_text_.getBuffer(), m_text_.length(), |
| locale.getName(), (UBreakIterator *)breakiter, |
| &status); |
| uprv_free(m_search_); |
| m_search_ = NULL; |
| |
| if (U_SUCCESS(status)) { |
| // Alias the collator |
| m_collator_.setUCollator((UCollator *)m_strsrch_->collator); |
| // m_search_ has been created by the base SearchIterator class |
| m_search_ = m_strsrch_->search; |
| } |
| } |
| |
| StringSearch::StringSearch(const UnicodeString &pattern, |
| CharacterIterator &text, |
| RuleBasedCollator *coll, |
| BreakIterator *breakiter, |
| UErrorCode &status) : |
| SearchIterator(text, breakiter), |
| m_collator_(), |
| m_pattern_(pattern) |
| { |
| if (U_FAILURE(status)) { |
| m_strsrch_ = NULL; |
| return; |
| } |
| if (coll == NULL) { |
| status = U_ILLEGAL_ARGUMENT_ERROR; |
| m_strsrch_ = NULL; |
| return; |
| } |
| m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(), |
| m_pattern_.length(), |
| m_text_.getBuffer(), |
| m_text_.length(), coll->ucollator, |
| (UBreakIterator *)breakiter, |
| &status); |
| uprv_free(m_search_); |
| m_search_ = NULL; |
| |
| if (U_SUCCESS(status)) { |
| // Alias the collator |
| m_collator_.setUCollator((UCollator *)m_strsrch_->collator); |
| // m_search_ has been created by the base SearchIterator class |
| m_search_ = m_strsrch_->search; |
| } |
| } |
| |
| StringSearch::StringSearch(const StringSearch &that) : |
| SearchIterator(that.m_text_, that.m_breakiterator_), |
| m_collator_(), |
| m_pattern_(that.m_pattern_) |
| { |
| UErrorCode status = U_ZERO_ERROR; |
| |
| // Free m_search_ from the superclass |
| uprv_free(m_search_); |
| m_search_ = NULL; |
| |
| if (that.m_strsrch_ == NULL) { |
| // This was not a good copy |
| m_strsrch_ = NULL; |
| } |
| else { |
| // Make a deep copy |
| m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(), |
| m_pattern_.length(), |
| m_text_.getBuffer(), |
| m_text_.length(), |
| that.m_strsrch_->collator, |
| (UBreakIterator *)that.m_breakiterator_, |
| &status); |
| if (U_SUCCESS(status)) { |
| // Alias the collator |
| m_collator_.setUCollator((UCollator *)m_strsrch_->collator); |
| // m_search_ has been created by the base SearchIterator class |
| m_search_ = m_strsrch_->search; |
| } |
| } |
| } |
| |
| StringSearch::~StringSearch() |
| { |
| if (m_strsrch_ != NULL) { |
| usearch_close(m_strsrch_); |
| m_search_ = NULL; |
| } |
| } |
| |
| StringSearch * |
| StringSearch::clone() const { |
| return new StringSearch(*this); |
| } |
| |
| // operator overloading --------------------------------------------- |
| StringSearch & StringSearch::operator=(const StringSearch &that) |
| { |
| if ((*this) != that) { |
| UErrorCode status = U_ZERO_ERROR; |
| m_text_ = that.m_text_; |
| m_breakiterator_ = that.m_breakiterator_; |
| m_pattern_ = that.m_pattern_; |
| // all m_search_ in the parent class is linked up with m_strsrch_ |
| usearch_close(m_strsrch_); |
| m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(), |
| m_pattern_.length(), |
| m_text_.getBuffer(), |
| m_text_.length(), |
| that.m_strsrch_->collator, |
| NULL, &status); |
| // Check null pointer |
| if (m_strsrch_ != NULL) { |
| // Alias the collator |
| m_collator_.setUCollator((UCollator *)m_strsrch_->collator); |
| m_search_ = m_strsrch_->search; |
| } |
| } |
| return *this; |
| } |
| |
| UBool StringSearch::operator==(const SearchIterator &that) const |
| { |
| if (this == &that) { |
| return TRUE; |
| } |
| if (SearchIterator::operator ==(that)) { |
| StringSearch &thatsrch = (StringSearch &)that; |
| return (this->m_pattern_ == thatsrch.m_pattern_ && |
| this->m_strsrch_->collator == thatsrch.m_strsrch_->collator); |
| } |
| return FALSE; |
| } |
| |
| // public get and set methods ---------------------------------------- |
| |
| void StringSearch::setOffset(int32_t position, UErrorCode &status) |
| { |
| // status checked in usearch_setOffset |
| usearch_setOffset(m_strsrch_, position, &status); |
| } |
| |
| int32_t StringSearch::getOffset(void) const |
| { |
| return usearch_getOffset(m_strsrch_); |
| } |
| |
| void StringSearch::setText(const UnicodeString &text, UErrorCode &status) |
| { |
| if (U_SUCCESS(status)) { |
| m_text_ = text; |
| usearch_setText(m_strsrch_, text.getBuffer(), text.length(), &status); |
| } |
| } |
| |
| void StringSearch::setText(CharacterIterator &text, UErrorCode &status) |
| { |
| if (U_SUCCESS(status)) { |
| text.getText(m_text_); |
| usearch_setText(m_strsrch_, m_text_.getBuffer(), m_text_.length(), &status); |
| } |
| } |
| |
| RuleBasedCollator * StringSearch::getCollator() const |
| { |
| return (RuleBasedCollator *)&m_collator_; |
| } |
| |
| void StringSearch::setCollator(RuleBasedCollator *coll, UErrorCode &status) |
| { |
| if (U_SUCCESS(status)) { |
| usearch_setCollator(m_strsrch_, coll->getUCollator(), &status); |
| // Alias the collator |
| m_collator_.setUCollator((UCollator *)m_strsrch_->collator); |
| } |
| } |
| |
| void StringSearch::setPattern(const UnicodeString &pattern, |
| UErrorCode &status) |
| { |
| if (U_SUCCESS(status)) { |
| m_pattern_ = pattern; |
| usearch_setPattern(m_strsrch_, m_pattern_.getBuffer(), m_pattern_.length(), |
| &status); |
| } |
| } |
| |
| const UnicodeString & StringSearch::getPattern() const |
| { |
| return m_pattern_; |
| } |
| |
| // public methods ---------------------------------------------------- |
| |
| void StringSearch::reset() |
| { |
| usearch_reset(m_strsrch_); |
| } |
| |
| SearchIterator * StringSearch::safeClone(void) const |
| { |
| UErrorCode status = U_ZERO_ERROR; |
| StringSearch *result = new StringSearch(m_pattern_, m_text_, |
| (RuleBasedCollator *)&m_collator_, |
| m_breakiterator_, |
| status); |
| /* test for NULL */ |
| if (result == 0) { |
| status = U_MEMORY_ALLOCATION_ERROR; |
| return 0; |
| } |
| result->setOffset(getOffset(), status); |
| result->setMatchStart(m_strsrch_->search->matchedIndex); |
| result->setMatchLength(m_strsrch_->search->matchedLength); |
| if (U_FAILURE(status)) { |
| return NULL; |
| } |
| return result; |
| } |
| |
| // protected method ------------------------------------------------- |
| |
| int32_t StringSearch::handleNext(int32_t position, UErrorCode &status) |
| { |
| // values passed here are already in the pre-shift position |
| if (U_SUCCESS(status)) { |
| if (m_strsrch_->pattern.CELength == 0) { |
| m_search_->matchedIndex = |
| m_search_->matchedIndex == USEARCH_DONE ? |
| getOffset() : m_search_->matchedIndex + 1; |
| m_search_->matchedLength = 0; |
| ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex, |
| &status); |
| if (m_search_->matchedIndex == m_search_->textLength) { |
| m_search_->matchedIndex = USEARCH_DONE; |
| } |
| } |
| else { |
| // looking at usearch.cpp, this part is shifted out to |
| // StringSearch instead of SearchIterator because m_strsrch_ is |
| // not accessible in SearchIterator |
| #if 0 |
| if (position + m_strsrch_->pattern.defaultShiftSize |
| > m_search_->textLength) { |
| setMatchNotFound(); |
| return USEARCH_DONE; |
| } |
| #endif |
| if (m_search_->matchedLength <= 0) { |
| // the flipping direction issue has already been handled |
| // in next() |
| // for boundary check purposes. this will ensure that the |
| // next match will not preceed the current offset |
| // note search->matchedIndex will always be set to something |
| // in the code |
| m_search_->matchedIndex = position - 1; |
| } |
| |
| ucol_setOffset(m_strsrch_->textIter, position, &status); |
| |
| #if 0 |
| for (;;) { |
| if (m_search_->isCanonicalMatch) { |
| // can't use exact here since extra accents are allowed. |
| usearch_handleNextCanonical(m_strsrch_, &status); |
| } |
| else { |
| usearch_handleNextExact(m_strsrch_, &status); |
| } |
| if (U_FAILURE(status)) { |
| return USEARCH_DONE; |
| } |
| if (m_breakiterator_ == NULL |
| #if !UCONFIG_NO_BREAK_ITERATION |
| || |
| m_search_->matchedIndex == USEARCH_DONE || |
| (m_breakiterator_->isBoundary(m_search_->matchedIndex) && |
| m_breakiterator_->isBoundary(m_search_->matchedIndex + |
| m_search_->matchedLength)) |
| #endif |
| ) { |
| if (m_search_->matchedIndex == USEARCH_DONE) { |
| ucol_setOffset(m_strsrch_->textIter, |
| m_search_->textLength, &status); |
| } |
| else { |
| ucol_setOffset(m_strsrch_->textIter, |
| m_search_->matchedIndex, &status); |
| } |
| return m_search_->matchedIndex; |
| } |
| } |
| #else |
| // if m_strsrch_->breakIter is always the same as m_breakiterator_ |
| // then we don't need to check the match boundaries here because |
| // usearch_handleNextXXX will already have done it. |
| if (m_search_->isCanonicalMatch) { |
| // *could* actually use exact here 'cause no extra accents allowed... |
| usearch_handleNextCanonical(m_strsrch_, &status); |
| } else { |
| usearch_handleNextExact(m_strsrch_, &status); |
| } |
| |
| if (U_FAILURE(status)) { |
| return USEARCH_DONE; |
| } |
| |
| if (m_search_->matchedIndex == USEARCH_DONE) { |
| ucol_setOffset(m_strsrch_->textIter, m_search_->textLength, &status); |
| } else { |
| ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex, &status); |
| } |
| |
| return m_search_->matchedIndex; |
| #endif |
| } |
| } |
| return USEARCH_DONE; |
| } |
| |
| int32_t StringSearch::handlePrev(int32_t position, UErrorCode &status) |
| { |
| // values passed here are already in the pre-shift position |
| if (U_SUCCESS(status)) { |
| if (m_strsrch_->pattern.CELength == 0) { |
| m_search_->matchedIndex = |
| (m_search_->matchedIndex == USEARCH_DONE ? getOffset() : |
| m_search_->matchedIndex); |
| if (m_search_->matchedIndex == 0) { |
| setMatchNotFound(); |
| } |
| else { |
| m_search_->matchedIndex --; |
| ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex, |
| &status); |
| m_search_->matchedLength = 0; |
| } |
| } |
| else { |
| // looking at usearch.cpp, this part is shifted out to |
| // StringSearch instead of SearchIterator because m_strsrch_ is |
| // not accessible in SearchIterator |
| #if 0 |
| if (!m_search_->isOverlap && |
| position - m_strsrch_->pattern.defaultShiftSize < 0) { |
| setMatchNotFound(); |
| return USEARCH_DONE; |
| } |
| |
| for (;;) { |
| if (m_search_->isCanonicalMatch) { |
| // can't use exact here since extra accents are allowed. |
| usearch_handlePreviousCanonical(m_strsrch_, &status); |
| } |
| else { |
| usearch_handlePreviousExact(m_strsrch_, &status); |
| } |
| if (U_FAILURE(status)) { |
| return USEARCH_DONE; |
| } |
| if (m_breakiterator_ == NULL |
| #if !UCONFIG_NO_BREAK_ITERATION |
| || |
| m_search_->matchedIndex == USEARCH_DONE || |
| (m_breakiterator_->isBoundary(m_search_->matchedIndex) && |
| m_breakiterator_->isBoundary(m_search_->matchedIndex + |
| m_search_->matchedLength)) |
| #endif |
| ) { |
| return m_search_->matchedIndex; |
| } |
| } |
| #else |
| ucol_setOffset(m_strsrch_->textIter, position, &status); |
| |
| if (m_search_->isCanonicalMatch) { |
| // *could* use exact match here since extra accents *not* allowed! |
| usearch_handlePreviousCanonical(m_strsrch_, &status); |
| } else { |
| usearch_handlePreviousExact(m_strsrch_, &status); |
| } |
| |
| if (U_FAILURE(status)) { |
| return USEARCH_DONE; |
| } |
| |
| return m_search_->matchedIndex; |
| #endif |
| } |
| |
| return m_search_->matchedIndex; |
| } |
| return USEARCH_DONE; |
| } |
| |
| U_NAMESPACE_END |
| |
| #endif /* #if !UCONFIG_NO_COLLATION */ |