| // © 2016 and later: Unicode, Inc. and others. |
| // License & terms of use: http://www.unicode.org/copyright.html |
| /* |
| ********************************************************************** |
| * Copyright (C) 2001-2008,2010 IBM and others. All rights reserved. |
| ********************************************************************** |
| * Date Name Description |
| * 03/22/2000 helena Creation. |
| ********************************************************************** |
| */ |
| |
| #include "unicode/utypes.h" |
| |
| #if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION |
| |
| #if defined(STARBOARD) |
| #include "starboard/client_porting/poem/string_poem.h" |
| #endif // defined(STARBOARD) |
| #include "unicode/brkiter.h" |
| #include "unicode/schriter.h" |
| #include "unicode/search.h" |
| #include "usrchimp.h" |
| #include "cmemory.h" |
| |
| // public constructors and destructors ----------------------------------- |
| U_NAMESPACE_BEGIN |
| |
| SearchIterator::SearchIterator(const SearchIterator &other) |
| : UObject(other) |
| { |
| m_breakiterator_ = other.m_breakiterator_; |
| m_text_ = other.m_text_; |
| m_search_ = (USearch *)uprv_malloc(sizeof(USearch)); |
| m_search_->breakIter = other.m_search_->breakIter; |
| m_search_->isCanonicalMatch = other.m_search_->isCanonicalMatch; |
| m_search_->isOverlap = other.m_search_->isOverlap; |
| m_search_->elementComparisonType = other.m_search_->elementComparisonType; |
| m_search_->matchedIndex = other.m_search_->matchedIndex; |
| m_search_->matchedLength = other.m_search_->matchedLength; |
| m_search_->text = other.m_search_->text; |
| m_search_->textLength = other.m_search_->textLength; |
| } |
| |
| SearchIterator::~SearchIterator() |
| { |
| if (m_search_ != NULL) { |
| uprv_free(m_search_); |
| } |
| } |
| |
| // public get and set methods ---------------------------------------- |
| |
| void SearchIterator::setAttribute(USearchAttribute attribute, |
| USearchAttributeValue value, |
| UErrorCode &status) |
| { |
| if (U_SUCCESS(status)) { |
| switch (attribute) |
| { |
| case USEARCH_OVERLAP : |
| m_search_->isOverlap = (value == USEARCH_ON ? TRUE : FALSE); |
| break; |
| case USEARCH_CANONICAL_MATCH : |
| m_search_->isCanonicalMatch = (value == USEARCH_ON ? TRUE : FALSE); |
| break; |
| case USEARCH_ELEMENT_COMPARISON : |
| if (value == USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD || value == USEARCH_ANY_BASE_WEIGHT_IS_WILDCARD) { |
| m_search_->elementComparisonType = (int16_t)value; |
| } else { |
| m_search_->elementComparisonType = 0; |
| } |
| break; |
| default: |
| status = U_ILLEGAL_ARGUMENT_ERROR; |
| } |
| } |
| if (value == USEARCH_ATTRIBUTE_VALUE_COUNT) { |
| status = U_ILLEGAL_ARGUMENT_ERROR; |
| } |
| } |
| |
| USearchAttributeValue SearchIterator::getAttribute( |
| USearchAttribute attribute) const |
| { |
| switch (attribute) { |
| case USEARCH_OVERLAP : |
| return (m_search_->isOverlap == TRUE ? USEARCH_ON : USEARCH_OFF); |
| case USEARCH_CANONICAL_MATCH : |
| return (m_search_->isCanonicalMatch == TRUE ? USEARCH_ON : |
| USEARCH_OFF); |
| case USEARCH_ELEMENT_COMPARISON : |
| { |
| int16_t value = m_search_->elementComparisonType; |
| if (value == USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD || value == USEARCH_ANY_BASE_WEIGHT_IS_WILDCARD) { |
| return (USearchAttributeValue)value; |
| } else { |
| return USEARCH_STANDARD_ELEMENT_COMPARISON; |
| } |
| } |
| default : |
| return USEARCH_DEFAULT; |
| } |
| } |
| |
| int32_t SearchIterator::getMatchedStart() const |
| { |
| return m_search_->matchedIndex; |
| } |
| |
| int32_t SearchIterator::getMatchedLength() const |
| { |
| return m_search_->matchedLength; |
| } |
| |
| void SearchIterator::getMatchedText(UnicodeString &result) const |
| { |
| int32_t matchedindex = m_search_->matchedIndex; |
| int32_t matchedlength = m_search_->matchedLength; |
| if (matchedindex != USEARCH_DONE && matchedlength != 0) { |
| result.setTo(m_search_->text + matchedindex, matchedlength); |
| } |
| else { |
| result.remove(); |
| } |
| } |
| |
| void SearchIterator::setBreakIterator(BreakIterator *breakiter, |
| UErrorCode &status) |
| { |
| if (U_SUCCESS(status)) { |
| #if 0 |
| m_search_->breakIter = NULL; |
| // the c++ breakiterator may not make use of ubreakiterator. |
| // so we'll have to keep track of it ourselves. |
| #else |
| // Well, gee... the Constructors that take a BreakIterator |
| // all cast the BreakIterator to a UBreakIterator and |
| // pass it to the corresponding usearch_openFromXXX |
| // routine, so there's no reason not to do this. |
| // |
| // Besides, a UBreakIterator is a BreakIterator, so |
| // any subclass of BreakIterator should work fine here... |
| m_search_->breakIter = (UBreakIterator *) breakiter; |
| #endif |
| |
| m_breakiterator_ = breakiter; |
| } |
| } |
| |
| const BreakIterator * SearchIterator::getBreakIterator(void) const |
| { |
| return m_breakiterator_; |
| } |
| |
| void SearchIterator::setText(const UnicodeString &text, UErrorCode &status) |
| { |
| if (U_SUCCESS(status)) { |
| if (text.length() == 0) { |
| status = U_ILLEGAL_ARGUMENT_ERROR; |
| } |
| else { |
| m_text_ = text; |
| m_search_->text = m_text_.getBuffer(); |
| m_search_->textLength = m_text_.length(); |
| } |
| } |
| } |
| |
| void SearchIterator::setText(CharacterIterator &text, UErrorCode &status) |
| { |
| if (U_SUCCESS(status)) { |
| text.getText(m_text_); |
| setText(m_text_, status); |
| } |
| } |
| |
| const UnicodeString & SearchIterator::getText(void) const |
| { |
| return m_text_; |
| } |
| |
| // operator overloading ---------------------------------------------- |
| |
| UBool SearchIterator::operator==(const SearchIterator &that) const |
| { |
| if (this == &that) { |
| return TRUE; |
| } |
| return (m_breakiterator_ == that.m_breakiterator_ && |
| m_search_->isCanonicalMatch == that.m_search_->isCanonicalMatch && |
| m_search_->isOverlap == that.m_search_->isOverlap && |
| m_search_->elementComparisonType == that.m_search_->elementComparisonType && |
| m_search_->matchedIndex == that.m_search_->matchedIndex && |
| m_search_->matchedLength == that.m_search_->matchedLength && |
| m_search_->textLength == that.m_search_->textLength && |
| getOffset() == that.getOffset() && |
| (uprv_memcmp(m_search_->text, that.m_search_->text, |
| m_search_->textLength * sizeof(UChar)) == 0)); |
| } |
| |
| // public methods ---------------------------------------------------- |
| |
| int32_t SearchIterator::first(UErrorCode &status) |
| { |
| if (U_FAILURE(status)) { |
| return USEARCH_DONE; |
| } |
| setOffset(0, status); |
| return handleNext(0, status); |
| } |
| |
| int32_t SearchIterator::following(int32_t position, |
| UErrorCode &status) |
| { |
| if (U_FAILURE(status)) { |
| return USEARCH_DONE; |
| } |
| setOffset(position, status); |
| return handleNext(position, status); |
| } |
| |
| int32_t SearchIterator::last(UErrorCode &status) |
| { |
| if (U_FAILURE(status)) { |
| return USEARCH_DONE; |
| } |
| setOffset(m_search_->textLength, status); |
| return handlePrev(m_search_->textLength, status); |
| } |
| |
| int32_t SearchIterator::preceding(int32_t position, |
| UErrorCode &status) |
| { |
| if (U_FAILURE(status)) { |
| return USEARCH_DONE; |
| } |
| setOffset(position, status); |
| return handlePrev(position, status); |
| } |
| |
| int32_t SearchIterator::next(UErrorCode &status) |
| { |
| if (U_SUCCESS(status)) { |
| int32_t offset = getOffset(); |
| int32_t matchindex = m_search_->matchedIndex; |
| int32_t matchlength = m_search_->matchedLength; |
| m_search_->reset = FALSE; |
| if (m_search_->isForwardSearching == TRUE) { |
| int32_t textlength = m_search_->textLength; |
| if (offset == textlength || matchindex == textlength || |
| (matchindex != USEARCH_DONE && |
| matchindex + matchlength >= textlength)) { |
| // not enough characters to match |
| setMatchNotFound(); |
| return USEARCH_DONE; |
| } |
| } |
| else { |
| // switching direction. |
| // if matchedIndex == USEARCH_DONE, it means that either a |
| // setOffset has been called or that previous ran off the text |
| // string. the iterator would have been set to offset 0 if a |
| // match is not found. |
| m_search_->isForwardSearching = TRUE; |
| if (m_search_->matchedIndex != USEARCH_DONE) { |
| // there's no need to set the collation element iterator |
| // the next call to next will set the offset. |
| return matchindex; |
| } |
| } |
| |
| if (matchlength > 0) { |
| // if matchlength is 0 we are at the start of the iteration |
| if (m_search_->isOverlap) { |
| offset ++; |
| } |
| else { |
| offset += matchlength; |
| } |
| } |
| return handleNext(offset, status); |
| } |
| return USEARCH_DONE; |
| } |
| |
| int32_t SearchIterator::previous(UErrorCode &status) |
| { |
| if (U_SUCCESS(status)) { |
| int32_t offset; |
| if (m_search_->reset) { |
| offset = m_search_->textLength; |
| m_search_->isForwardSearching = FALSE; |
| m_search_->reset = FALSE; |
| setOffset(offset, status); |
| } |
| else { |
| offset = getOffset(); |
| } |
| |
| int32_t matchindex = m_search_->matchedIndex; |
| if (m_search_->isForwardSearching == TRUE) { |
| // switching direction. |
| // if matchedIndex == USEARCH_DONE, it means that either a |
| // setOffset has been called or that next ran off the text |
| // string. the iterator would have been set to offset textLength if |
| // a match is not found. |
| m_search_->isForwardSearching = FALSE; |
| if (matchindex != USEARCH_DONE) { |
| return matchindex; |
| } |
| } |
| else { |
| if (offset == 0 || matchindex == 0) { |
| // not enough characters to match |
| setMatchNotFound(); |
| return USEARCH_DONE; |
| } |
| } |
| |
| if (matchindex != USEARCH_DONE) { |
| if (m_search_->isOverlap) { |
| matchindex += m_search_->matchedLength - 2; |
| } |
| |
| return handlePrev(matchindex, status); |
| } |
| |
| return handlePrev(offset, status); |
| } |
| |
| return USEARCH_DONE; |
| } |
| |
| void SearchIterator::reset() |
| { |
| UErrorCode status = U_ZERO_ERROR; |
| setMatchNotFound(); |
| setOffset(0, status); |
| m_search_->isOverlap = FALSE; |
| m_search_->isCanonicalMatch = FALSE; |
| m_search_->elementComparisonType = 0; |
| m_search_->isForwardSearching = TRUE; |
| m_search_->reset = TRUE; |
| } |
| |
| // protected constructors and destructors ----------------------------- |
| |
| SearchIterator::SearchIterator() |
| { |
| m_search_ = (USearch *)uprv_malloc(sizeof(USearch)); |
| m_search_->breakIter = NULL; |
| m_search_->isOverlap = FALSE; |
| m_search_->isCanonicalMatch = FALSE; |
| m_search_->elementComparisonType = 0; |
| m_search_->isForwardSearching = TRUE; |
| m_search_->reset = TRUE; |
| m_search_->matchedIndex = USEARCH_DONE; |
| m_search_->matchedLength = 0; |
| m_search_->text = NULL; |
| m_search_->textLength = 0; |
| m_breakiterator_ = NULL; |
| } |
| |
| SearchIterator::SearchIterator(const UnicodeString &text, |
| BreakIterator *breakiter) : |
| m_breakiterator_(breakiter), |
| m_text_(text) |
| { |
| m_search_ = (USearch *)uprv_malloc(sizeof(USearch)); |
| m_search_->breakIter = NULL; |
| m_search_->isOverlap = FALSE; |
| m_search_->isCanonicalMatch = FALSE; |
| m_search_->elementComparisonType = 0; |
| m_search_->isForwardSearching = TRUE; |
| m_search_->reset = TRUE; |
| m_search_->matchedIndex = USEARCH_DONE; |
| m_search_->matchedLength = 0; |
| m_search_->text = m_text_.getBuffer(); |
| m_search_->textLength = text.length(); |
| } |
| |
| SearchIterator::SearchIterator(CharacterIterator &text, |
| BreakIterator *breakiter) : |
| m_breakiterator_(breakiter) |
| { |
| m_search_ = (USearch *)uprv_malloc(sizeof(USearch)); |
| m_search_->breakIter = NULL; |
| m_search_->isOverlap = FALSE; |
| m_search_->isCanonicalMatch = FALSE; |
| m_search_->elementComparisonType = 0; |
| m_search_->isForwardSearching = TRUE; |
| m_search_->reset = TRUE; |
| m_search_->matchedIndex = USEARCH_DONE; |
| m_search_->matchedLength = 0; |
| text.getText(m_text_); |
| m_search_->text = m_text_.getBuffer(); |
| m_search_->textLength = m_text_.length(); |
| m_breakiterator_ = breakiter; |
| } |
| |
| // protected methods ------------------------------------------------------ |
| |
| SearchIterator & SearchIterator::operator=(const SearchIterator &that) |
| { |
| if (this != &that) { |
| m_breakiterator_ = that.m_breakiterator_; |
| m_text_ = that.m_text_; |
| m_search_->breakIter = that.m_search_->breakIter; |
| m_search_->isCanonicalMatch = that.m_search_->isCanonicalMatch; |
| m_search_->isOverlap = that.m_search_->isOverlap; |
| m_search_->elementComparisonType = that.m_search_->elementComparisonType; |
| m_search_->matchedIndex = that.m_search_->matchedIndex; |
| m_search_->matchedLength = that.m_search_->matchedLength; |
| m_search_->text = that.m_search_->text; |
| m_search_->textLength = that.m_search_->textLength; |
| } |
| return *this; |
| } |
| |
| void SearchIterator::setMatchLength(int32_t length) |
| { |
| m_search_->matchedLength = length; |
| } |
| |
| void SearchIterator::setMatchStart(int32_t position) |
| { |
| m_search_->matchedIndex = position; |
| } |
| |
| void SearchIterator::setMatchNotFound() |
| { |
| setMatchStart(USEARCH_DONE); |
| setMatchLength(0); |
| UErrorCode status = U_ZERO_ERROR; |
| // by default no errors should be returned here since offsets are within |
| // range. |
| if (m_search_->isForwardSearching) { |
| setOffset(m_search_->textLength, status); |
| } |
| else { |
| setOffset(0, status); |
| } |
| } |
| |
| |
| U_NAMESPACE_END |
| |
| #endif /* #if !UCONFIG_NO_COLLATION */ |