/* ********************************************************************** * Copyright (c) 2002-2008, International Business Machines * Corporation and others. All Rights Reserved. ********************************************************************** */ #ifndef USETITER_H #define USETITER_H #include "unicode/utypes.h" #include "unicode/uobject.h" #include "unicode/unistr.h" /** * \file * \brief C++ API: UnicodeSetIterator iterates over the contents of a UnicodeSet. */ U_NAMESPACE_BEGIN class UnicodeSet; class UnicodeString; /** * * UnicodeSetIterator iterates over the contents of a UnicodeSet. It * iterates over either code points or code point ranges. After all * code points or ranges have been returned, it returns the * multicharacter strings of the UnicodeSet, if any. * * This class is not intended to be subclassed. Consider any fields * or methods declared as "protected" to be private. The use of * protected in this class is an artifact of history. * * <p>To iterate over code points and strings, use a loop like this: * <pre> * UnicodeSetIterator it(set); * while (it.next()) { * processItem(it.getString()); * } * </pre> * <p>Each item in the set is accessed as a string. Set elements * consisting of single code points are returned as strings containing * just the one code point. * * <p>To iterate over code point ranges, instead of individual code points, * use a loop like this: * <pre> * UnicodeSetIterator it(set); * while (it.nextRange()) { * if (it.isString()) { * processString(it.getString()); * } else { * processCodepointRange(it.getCodepoint(), it.getCodepointEnd()); * } * } * </pre> * @author M. Davis * @stable ICU 2.4 */ class U_COMMON_API UnicodeSetIterator : public UObject { protected: /** * Value of <tt>codepoint</tt> if the iterator points to a string. * If <tt>codepoint == IS_STRING</tt>, then examine * <tt>string</tt> for the current iteration result. * @stable ICU 2.4 */ enum { IS_STRING = -1 }; /** * Current code point, or the special value <tt>IS_STRING</tt>, if * the iterator points to a string. * @stable ICU 2.4 */ UChar32 codepoint; /** * When iterating over ranges using <tt>nextRange()</tt>, * <tt>codepointEnd</tt> contains the inclusive end of the * iteration range, if <tt>codepoint != IS_STRING</tt>. If * iterating over code points using <tt>next()</tt>, or if * <tt>codepoint == IS_STRING</tt>, then the value of * <tt>codepointEnd</tt> is undefined. * @stable ICU 2.4 */ UChar32 codepointEnd; /** * If <tt>codepoint == IS_STRING</tt>, then <tt>string</tt> points * to the current string. If <tt>codepoint != IS_STRING</tt>, the * value of <tt>string</tt> is undefined. * @stable ICU 2.4 */ const UnicodeString* string; public: /** * Create an iterator over the given set. The iterator is valid * only so long as <tt>set</tt> is valid. * @param set set to iterate over * @stable ICU 2.4 */ UnicodeSetIterator(const UnicodeSet& set); /** * Create an iterator over nothing. <tt>next()</tt> and * <tt>nextRange()</tt> return false. This is a convenience * constructor allowing the target to be set later. * @stable ICU 2.4 */ UnicodeSetIterator(); /** * Destructor. * @stable ICU 2.4 */ virtual ~UnicodeSetIterator(); /** * Returns true if the current element is a string. If so, the * caller can retrieve it with <tt>getString()</tt>. If this * method returns false, the current element is a code point or * code point range, depending on whether <tt>next()</tt> or * <tt>nextRange()</tt> was called. * Elements of types string and codepoint can both be retrieved * with the function <tt>getString()</tt>. * Elements of type codepoint can also be retrieved with * <tt>getCodepoint()</tt>. * For ranges, <tt>getCodepoint()</tt> returns the starting codepoint * of the range, and <tt>getCodepointEnd()</tt> returns the end * of the range. * @stable ICU 2.4 */ inline UBool isString() const; /** * Returns the current code point, if <tt>isString()</tt> returned * false. Otherwise returns an undefined result. * @stable ICU 2.4 */ inline UChar32 getCodepoint() const; /** * Returns the end of the current code point range, if * <tt>isString()</tt> returned false and <tt>nextRange()</tt> was * called. Otherwise returns an undefined result. * @stable ICU 2.4 */ inline UChar32 getCodepointEnd() const; /** * Returns the current string, if <tt>isString()</tt> returned * true. If the current iteration item is a code point, a UnicodeString * containing that single code point is returned. * * Ownership of the returned string remains with the iterator. * The string is guaranteed to remain valid only until the iterator is * advanced to the next item, or until the iterator is deleted. * * @stable ICU 2.4 */ const UnicodeString& getString(); /** * Advances the iteration position to the next element in the set, * which can be either a single code point or a string. * If there are no more elements in the set, return false. * * <p> * If <tt>isString() == TRUE</tt>, the value is a * string, otherwise the value is a * single code point. Elements of either type can be retrieved * with the function <tt>getString()</tt>, while elements of * consisting of a single code point can be retrieved with * <tt>getCodepoint()</tt> * * <p>The order of iteration is all code points in sorted order, * followed by all strings sorted order. Do not mix * calls to <tt>next()</tt> and <tt>nextRange()</tt> without * calling <tt>reset()</tt> between them. The results of doing so * are undefined. * * @return true if there was another element in the set. * @stable ICU 2.4 */ UBool next(); /** * Returns the next element in the set, either a code point range * or a string. If there are no more elements in the set, return * false. If <tt>isString() == TRUE</tt>, the value is a * string and can be accessed with <tt>getString()</tt>. Otherwise the value is a * range of one or more code points from <tt>getCodepoint()</tt> to * <tt>getCodepointeEnd()</tt> inclusive. * * <p>The order of iteration is all code points ranges in sorted * order, followed by all strings sorted order. Ranges are * disjoint and non-contiguous. The value returned from <tt>getString()</tt> * is undefined unless <tt>isString() == TRUE</tt>. Do not mix calls to * <tt>next()</tt> and <tt>nextRange()</tt> without calling * <tt>reset()</tt> between them. The results of doing so are * undefined. * * @return true if there was another element in the set. * @stable ICU 2.4 */ UBool nextRange(); /** * Sets this iterator to visit the elements of the given set and * resets it to the start of that set. The iterator is valid only * so long as <tt>set</tt> is valid. * @param set the set to iterate over. * @stable ICU 2.4 */ void reset(const UnicodeSet& set); /** * Resets this iterator to the start of the set. * @stable ICU 2.4 */ void reset(); /** * ICU "poor man's RTTI", returns a UClassID for this class. * * @stable ICU 2.4 */ static UClassID U_EXPORT2 getStaticClassID(); /** * ICU "poor man's RTTI", returns a UClassID for the actual class. * * @stable ICU 2.4 */ virtual UClassID getDynamicClassID() const; // ======================= PRIVATES =========================== protected: // endElement and nextElements are really UChar32's, but we keep // them as signed int32_t's so we can do comparisons with // endElement set to -1. Leave them as int32_t's. /** The set * @stable ICU 2.4 */ const UnicodeSet* set; /** End range * @stable ICU 2.4 */ int32_t endRange; /** Range * @stable ICU 2.4 */ int32_t range; /** End element * @stable ICU 2.4 */ int32_t endElement; /** Next element * @stable ICU 2.4 */ int32_t nextElement; //UBool abbreviated; /** Next string * @stable ICU 2.4 */ int32_t nextString; /** String count * @stable ICU 2.4 */ int32_t stringCount; /** * Points to the string to use when the caller asks for a * string and the current iteration item is a code point, not a string. * @internal */ UnicodeString *cpString; /** Copy constructor. Disallowed. * @stable ICU 2.4 */ UnicodeSetIterator(const UnicodeSetIterator&); // disallow /** Assignment operator. Disallowed. * @stable ICU 2.4 */ UnicodeSetIterator& operator=(const UnicodeSetIterator&); // disallow /** Load range * @stable ICU 2.4 */ virtual void loadRange(int32_t range); }; inline UBool UnicodeSetIterator::isString() const { return codepoint == (UChar32)IS_STRING; } inline UChar32 UnicodeSetIterator::getCodepoint() const { return codepoint; } inline UChar32 UnicodeSetIterator::getCodepointEnd() const { return codepointEnd; } U_NAMESPACE_END #endif