/* ******************************************************************************* * Copyright (C) 2010-2011, International Business Machines * Corporation and others. All Rights Reserved. ******************************************************************************* * file name: ucharstrieiterator.h * encoding: US-ASCII * tab size: 8 (not used) * indentation:4 * * created on: 2010nov15 * created by: Markus W. Scherer */ #include "unicode/utypes.h" #include "unicode/ucharstrie.h" #include "unicode/unistr.h" #include "uvectr32.h" U_NAMESPACE_BEGIN UCharsTrie::Iterator::Iterator(const UChar *trieUChars, int32_t maxStringLength, UErrorCode &errorCode) : uchars_(trieUChars), pos_(uchars_), initialPos_(uchars_), remainingMatchLength_(-1), initialRemainingMatchLength_(-1), skipValue_(FALSE), maxLength_(maxStringLength), value_(0), stack_(NULL) { if(U_FAILURE(errorCode)) { return; } // stack_ is a pointer so that it's easy to turn ucharstrie.h into // a public API header for which we would want it to depend only on // other public headers. // Unlike UCharsTrie itself, its Iterator performs memory allocations anyway // via the UnicodeString and UVector32 implementations, so this additional // cost is minimal. stack_=new UVector32(errorCode); if(stack_==NULL) { errorCode=U_MEMORY_ALLOCATION_ERROR; } } UCharsTrie::Iterator::Iterator(const UCharsTrie &trie, int32_t maxStringLength, UErrorCode &errorCode) : uchars_(trie.uchars_), pos_(trie.pos_), initialPos_(trie.pos_), remainingMatchLength_(trie.remainingMatchLength_), initialRemainingMatchLength_(trie.remainingMatchLength_), skipValue_(FALSE), maxLength_(maxStringLength), value_(0), stack_(NULL) { if(U_FAILURE(errorCode)) { return; } stack_=new UVector32(errorCode); if(U_FAILURE(errorCode)) { return; } if(stack_==NULL) { errorCode=U_MEMORY_ALLOCATION_ERROR; return; } int32_t length=remainingMatchLength_; // Actual remaining match length minus 1. if(length>=0) { // Pending linear-match node, append remaining UChars to str_. ++length; if(maxLength_>0 && length>maxLength_) { length=maxLength_; // This will leave remainingMatchLength>=0 as a signal. } str_.append(pos_, length); pos_+=length; remainingMatchLength_-=length; } } UCharsTrie::Iterator::~Iterator() { delete stack_; } UCharsTrie::Iterator & UCharsTrie::Iterator::reset() { pos_=initialPos_; remainingMatchLength_=initialRemainingMatchLength_; skipValue_=FALSE; int32_t length=remainingMatchLength_+1; // Remaining match length. if(maxLength_>0 && length>maxLength_) { length=maxLength_; } str_.truncate(length); pos_+=length; remainingMatchLength_-=length; stack_->setSize(0); return *this; } UBool UCharsTrie::Iterator::hasNext() const { return pos_!=NULL || !stack_->isEmpty(); } UBool UCharsTrie::Iterator::next(UErrorCode &errorCode) { if(U_FAILURE(errorCode)) { return FALSE; } const UChar *pos=pos_; if(pos==NULL) { if(stack_->isEmpty()) { return FALSE; } // Pop the state off the stack and continue with the next outbound edge of // the branch node. int32_t stackSize=stack_->size(); int32_t length=stack_->elementAti(stackSize-1); pos=uchars_+stack_->elementAti(stackSize-2); stack_->setSize(stackSize-2); str_.truncate(length&0xffff); length=(int32_t)((uint32_t)length>>16); if(length>1) { pos=branchNext(pos, length, errorCode); if(pos==NULL) { return TRUE; // Reached a final value. } } else { str_.append(*pos++); } } if(remainingMatchLength_>=0) { // We only get here if we started in a pending linear-match node // with more than maxLength remaining units. return truncateAndStop(); } for(;;) { int32_t node=*pos++; if(node>=kMinValueLead) { if(skipValue_) { pos=skipNodeValue(pos, node); node&=kNodeTypeMask; skipValue_=FALSE; } else { // Deliver value for the string so far. UBool isFinal=(UBool)(node>>15); if(isFinal) { value_=readValue(pos, node&0x7fff); } else { value_=readNodeValue(pos, node); } if(isFinal || (maxLength_>0 && str_.length()==maxLength_)) { pos_=NULL; } else { // We cannot skip the value right here because it shares its // lead unit with a match node which we have to evaluate // next time. // Instead, keep pos_ on the node lead unit itself. pos_=pos-1; skipValue_=TRUE; } return TRUE; } } if(maxLength_>0 && str_.length()==maxLength_) { return truncateAndStop(); } if(node<kMinLinearMatch) { if(node==0) { node=*pos++; } pos=branchNext(pos, node+1, errorCode); if(pos==NULL) { return TRUE; // Reached a final value. } } else { // Linear-match node, append length units to str_. int32_t length=node-kMinLinearMatch+1; if(maxLength_>0 && str_.length()+length>maxLength_) { str_.append(pos, maxLength_-str_.length()); return truncateAndStop(); } str_.append(pos, length); pos+=length; } } } // Branch node, needs to take the first outbound edge and push state for the rest. const UChar * UCharsTrie::Iterator::branchNext(const UChar *pos, int32_t length, UErrorCode &errorCode) { while(length>kMaxBranchLinearSubNodeLength) { ++pos; // ignore the comparison unit // Push state for the greater-or-equal edge. stack_->addElement((int32_t)(skipDelta(pos)-uchars_), errorCode); stack_->addElement(((length-(length>>1))<<16)|str_.length(), errorCode); // Follow the less-than edge. length>>=1; pos=jumpByDelta(pos); } // List of key-value pairs where values are either final values or jump deltas. // Read the first (key, value) pair. UChar trieUnit=*pos++; int32_t node=*pos++; UBool isFinal=(UBool)(node>>15); int32_t value=readValue(pos, node&=0x7fff); pos=skipValue(pos, node); stack_->addElement((int32_t)(pos-uchars_), errorCode); stack_->addElement(((length-1)<<16)|str_.length(), errorCode); str_.append(trieUnit); if(isFinal) { pos_=NULL; value_=value; return NULL; } else { return pos+value; } } U_NAMESPACE_END