| /* |
| ******************************************************************************* |
| * Copyright (C) 2010-2011, International Business Machines |
| * Corporation and others. All Rights Reserved. |
| ******************************************************************************* |
| * file name: ucharstrieiterator.h |
| * encoding: US-ASCII |
| * tab size: 8 (not used) |
| * indentation:4 |
| * |
| * created on: 2010nov15 |
| * created by: Markus W. Scherer |
| */ |
| |
| #include "unicode/utypes.h" |
| #include "unicode/ucharstrie.h" |
| #include "unicode/unistr.h" |
| #include "uvectr32.h" |
| |
| U_NAMESPACE_BEGIN |
| |
| UCharsTrie::Iterator::Iterator(const UChar *trieUChars, int32_t maxStringLength, |
| UErrorCode &errorCode) |
| : uchars_(trieUChars), |
| pos_(uchars_), initialPos_(uchars_), |
| remainingMatchLength_(-1), initialRemainingMatchLength_(-1), |
| skipValue_(FALSE), |
| maxLength_(maxStringLength), value_(0), stack_(NULL) { |
| if(U_FAILURE(errorCode)) { |
| return; |
| } |
| // stack_ is a pointer so that it's easy to turn ucharstrie.h into |
| // a public API header for which we would want it to depend only on |
| // other public headers. |
| // Unlike UCharsTrie itself, its Iterator performs memory allocations anyway |
| // via the UnicodeString and UVector32 implementations, so this additional |
| // cost is minimal. |
| stack_=new UVector32(errorCode); |
| if(stack_==NULL) { |
| errorCode=U_MEMORY_ALLOCATION_ERROR; |
| } |
| } |
| |
| UCharsTrie::Iterator::Iterator(const UCharsTrie &trie, int32_t maxStringLength, |
| UErrorCode &errorCode) |
| : uchars_(trie.uchars_), pos_(trie.pos_), initialPos_(trie.pos_), |
| remainingMatchLength_(trie.remainingMatchLength_), |
| initialRemainingMatchLength_(trie.remainingMatchLength_), |
| skipValue_(FALSE), |
| maxLength_(maxStringLength), value_(0), stack_(NULL) { |
| if(U_FAILURE(errorCode)) { |
| return; |
| } |
| stack_=new UVector32(errorCode); |
| if(U_FAILURE(errorCode)) { |
| return; |
| } |
| if(stack_==NULL) { |
| errorCode=U_MEMORY_ALLOCATION_ERROR; |
| return; |
| } |
| int32_t length=remainingMatchLength_; // Actual remaining match length minus 1. |
| if(length>=0) { |
| // Pending linear-match node, append remaining UChars to str_. |
| ++length; |
| if(maxLength_>0 && length>maxLength_) { |
| length=maxLength_; // This will leave remainingMatchLength>=0 as a signal. |
| } |
| str_.append(pos_, length); |
| pos_+=length; |
| remainingMatchLength_-=length; |
| } |
| } |
| |
| UCharsTrie::Iterator::~Iterator() { |
| delete stack_; |
| } |
| |
| UCharsTrie::Iterator & |
| UCharsTrie::Iterator::reset() { |
| pos_=initialPos_; |
| remainingMatchLength_=initialRemainingMatchLength_; |
| skipValue_=FALSE; |
| int32_t length=remainingMatchLength_+1; // Remaining match length. |
| if(maxLength_>0 && length>maxLength_) { |
| length=maxLength_; |
| } |
| str_.truncate(length); |
| pos_+=length; |
| remainingMatchLength_-=length; |
| stack_->setSize(0); |
| return *this; |
| } |
| |
| UBool |
| UCharsTrie::Iterator::hasNext() const { return pos_!=NULL || !stack_->isEmpty(); } |
| |
| UBool |
| UCharsTrie::Iterator::next(UErrorCode &errorCode) { |
| if(U_FAILURE(errorCode)) { |
| return FALSE; |
| } |
| const UChar *pos=pos_; |
| if(pos==NULL) { |
| if(stack_->isEmpty()) { |
| return FALSE; |
| } |
| // Pop the state off the stack and continue with the next outbound edge of |
| // the branch node. |
| int32_t stackSize=stack_->size(); |
| int32_t length=stack_->elementAti(stackSize-1); |
| pos=uchars_+stack_->elementAti(stackSize-2); |
| stack_->setSize(stackSize-2); |
| str_.truncate(length&0xffff); |
| length=(int32_t)((uint32_t)length>>16); |
| if(length>1) { |
| pos=branchNext(pos, length, errorCode); |
| if(pos==NULL) { |
| return TRUE; // Reached a final value. |
| } |
| } else { |
| str_.append(*pos++); |
| } |
| } |
| if(remainingMatchLength_>=0) { |
| // We only get here if we started in a pending linear-match node |
| // with more than maxLength remaining units. |
| return truncateAndStop(); |
| } |
| for(;;) { |
| int32_t node=*pos++; |
| if(node>=kMinValueLead) { |
| if(skipValue_) { |
| pos=skipNodeValue(pos, node); |
| node&=kNodeTypeMask; |
| skipValue_=FALSE; |
| } else { |
| // Deliver value for the string so far. |
| UBool isFinal=(UBool)(node>>15); |
| if(isFinal) { |
| value_=readValue(pos, node&0x7fff); |
| } else { |
| value_=readNodeValue(pos, node); |
| } |
| if(isFinal || (maxLength_>0 && str_.length()==maxLength_)) { |
| pos_=NULL; |
| } else { |
| // We cannot skip the value right here because it shares its |
| // lead unit with a match node which we have to evaluate |
| // next time. |
| // Instead, keep pos_ on the node lead unit itself. |
| pos_=pos-1; |
| skipValue_=TRUE; |
| } |
| return TRUE; |
| } |
| } |
| if(maxLength_>0 && str_.length()==maxLength_) { |
| return truncateAndStop(); |
| } |
| if(node<kMinLinearMatch) { |
| if(node==0) { |
| node=*pos++; |
| } |
| pos=branchNext(pos, node+1, errorCode); |
| if(pos==NULL) { |
| return TRUE; // Reached a final value. |
| } |
| } else { |
| // Linear-match node, append length units to str_. |
| int32_t length=node-kMinLinearMatch+1; |
| if(maxLength_>0 && str_.length()+length>maxLength_) { |
| str_.append(pos, maxLength_-str_.length()); |
| return truncateAndStop(); |
| } |
| str_.append(pos, length); |
| pos+=length; |
| } |
| } |
| } |
| |
| // Branch node, needs to take the first outbound edge and push state for the rest. |
| const UChar * |
| UCharsTrie::Iterator::branchNext(const UChar *pos, int32_t length, UErrorCode &errorCode) { |
| while(length>kMaxBranchLinearSubNodeLength) { |
| ++pos; // ignore the comparison unit |
| // Push state for the greater-or-equal edge. |
| stack_->addElement((int32_t)(skipDelta(pos)-uchars_), errorCode); |
| stack_->addElement(((length-(length>>1))<<16)|str_.length(), errorCode); |
| // Follow the less-than edge. |
| length>>=1; |
| pos=jumpByDelta(pos); |
| } |
| // List of key-value pairs where values are either final values or jump deltas. |
| // Read the first (key, value) pair. |
| UChar trieUnit=*pos++; |
| int32_t node=*pos++; |
| UBool isFinal=(UBool)(node>>15); |
| int32_t value=readValue(pos, node&=0x7fff); |
| pos=skipValue(pos, node); |
| stack_->addElement((int32_t)(pos-uchars_), errorCode); |
| stack_->addElement(((length-1)<<16)|str_.length(), errorCode); |
| str_.append(trieUnit); |
| if(isFinal) { |
| pos_=NULL; |
| value_=value; |
| return NULL; |
| } else { |
| return pos+value; |
| } |
| } |
| |
| U_NAMESPACE_END |