|  | /* | 
|  | ******************************************************************************* | 
|  | * | 
|  | *   Copyright (C) 2002-2011 International Business Machines | 
|  | *   Corporation and others.  All Rights Reserved. | 
|  | * | 
|  | ******************************************************************************* | 
|  | *   file name:  uiter.h | 
|  | *   encoding:   US-ASCII | 
|  | *   tab size:   8 (not used) | 
|  | *   indentation:4 | 
|  | * | 
|  | *   created on: 2002jan18 | 
|  | *   created by: Markus W. Scherer | 
|  | */ | 
|  |  | 
|  | #ifndef __UITER_H__ | 
|  | #define __UITER_H__ | 
|  |  | 
|  | /** | 
|  | * \file | 
|  | * \brief C API: Unicode Character Iteration | 
|  | * | 
|  | * @see UCharIterator | 
|  | */ | 
|  |  | 
|  | #include "unicode/utypes.h" | 
|  |  | 
|  | #if U_SHOW_CPLUSPLUS_API | 
|  | U_NAMESPACE_BEGIN | 
|  |  | 
|  | class CharacterIterator; | 
|  | class Replaceable; | 
|  |  | 
|  | U_NAMESPACE_END | 
|  | #endif | 
|  |  | 
|  | U_CDECL_BEGIN | 
|  |  | 
|  | struct UCharIterator; | 
|  | typedef struct UCharIterator UCharIterator; /**< C typedef for struct UCharIterator. @stable ICU 2.1 */ | 
|  |  | 
|  | /** | 
|  | * Origin constants for UCharIterator.getIndex() and UCharIterator.move(). | 
|  | * @see UCharIteratorMove | 
|  | * @see UCharIterator | 
|  | * @stable ICU 2.1 | 
|  | */ | 
|  | typedef enum UCharIteratorOrigin { | 
|  | UITER_START, UITER_CURRENT, UITER_LIMIT, UITER_ZERO, UITER_LENGTH | 
|  | } UCharIteratorOrigin; | 
|  |  | 
|  | /** Constants for UCharIterator. @stable ICU 2.6 */ | 
|  | enum { | 
|  | /** | 
|  | * Constant value that may be returned by UCharIteratorMove | 
|  | * indicating that the final UTF-16 index is not known, but that the move succeeded. | 
|  | * This can occur when moving relative to limit or length, or | 
|  | * when moving relative to the current index after a setState() | 
|  | * when the current UTF-16 index is not known. | 
|  | * | 
|  | * It would be very inefficient to have to count from the beginning of the text | 
|  | * just to get the current/limit/length index after moving relative to it. | 
|  | * The actual index can be determined with getIndex(UITER_CURRENT) | 
|  | * which will count the UChars if necessary. | 
|  | * | 
|  | * @stable ICU 2.6 | 
|  | */ | 
|  | UITER_UNKNOWN_INDEX=-2 | 
|  | }; | 
|  |  | 
|  |  | 
|  | /** | 
|  | * Constant for UCharIterator getState() indicating an error or | 
|  | * an unknown state. | 
|  | * Returned by uiter_getState()/UCharIteratorGetState | 
|  | * when an error occurs. | 
|  | * Also, some UCharIterator implementations may not be able to return | 
|  | * a valid state for each position. This will be clearly documented | 
|  | * for each such iterator (none of the public ones here). | 
|  | * | 
|  | * @stable ICU 2.6 | 
|  | */ | 
|  | #define UITER_NO_STATE ((uint32_t)0xffffffff) | 
|  |  | 
|  | /** | 
|  | * Function type declaration for UCharIterator.getIndex(). | 
|  | * | 
|  | * Gets the current position, or the start or limit of the | 
|  | * iteration range. | 
|  | * | 
|  | * This function may perform slowly for UITER_CURRENT after setState() was called, | 
|  | * or for UITER_LENGTH, because an iterator implementation may have to count | 
|  | * UChars if the underlying storage is not UTF-16. | 
|  | * | 
|  | * @param iter the UCharIterator structure ("this pointer") | 
|  | * @param origin get the 0, start, limit, length, or current index | 
|  | * @return the requested index, or U_SENTINEL in an error condition | 
|  | * | 
|  | * @see UCharIteratorOrigin | 
|  | * @see UCharIterator | 
|  | * @stable ICU 2.1 | 
|  | */ | 
|  | typedef int32_t U_CALLCONV | 
|  | UCharIteratorGetIndex(UCharIterator *iter, UCharIteratorOrigin origin); | 
|  |  | 
|  | /** | 
|  | * Function type declaration for UCharIterator.move(). | 
|  | * | 
|  | * Use iter->move(iter, index, UITER_ZERO) like CharacterIterator::setIndex(index). | 
|  | * | 
|  | * Moves the current position relative to the start or limit of the | 
|  | * iteration range, or relative to the current position itself. | 
|  | * The movement is expressed in numbers of code units forward | 
|  | * or backward by specifying a positive or negative delta. | 
|  | * Out of bounds movement will be pinned to the start or limit. | 
|  | * | 
|  | * This function may perform slowly for moving relative to UITER_LENGTH | 
|  | * because an iterator implementation may have to count the rest of the | 
|  | * UChars if the native storage is not UTF-16. | 
|  | * | 
|  | * When moving relative to the limit or length, or | 
|  | * relative to the current position after setState() was called, | 
|  | * move() may return UITER_UNKNOWN_INDEX (-2) to avoid an inefficient | 
|  | * determination of the actual UTF-16 index. | 
|  | * The actual index can be determined with getIndex(UITER_CURRENT) | 
|  | * which will count the UChars if necessary. | 
|  | * See UITER_UNKNOWN_INDEX for details. | 
|  | * | 
|  | * @param iter the UCharIterator structure ("this pointer") | 
|  | * @param delta can be positive, zero, or negative | 
|  | * @param origin move relative to the 0, start, limit, length, or current index | 
|  | * @return the new index, or U_SENTINEL on an error condition, | 
|  | *         or UITER_UNKNOWN_INDEX when the index is not known. | 
|  | * | 
|  | * @see UCharIteratorOrigin | 
|  | * @see UCharIterator | 
|  | * @see UITER_UNKNOWN_INDEX | 
|  | * @stable ICU 2.1 | 
|  | */ | 
|  | typedef int32_t U_CALLCONV | 
|  | UCharIteratorMove(UCharIterator *iter, int32_t delta, UCharIteratorOrigin origin); | 
|  |  | 
|  | /** | 
|  | * Function type declaration for UCharIterator.hasNext(). | 
|  | * | 
|  | * Check if current() and next() can still | 
|  | * return another code unit. | 
|  | * | 
|  | * @param iter the UCharIterator structure ("this pointer") | 
|  | * @return boolean value for whether current() and next() can still return another code unit | 
|  | * | 
|  | * @see UCharIterator | 
|  | * @stable ICU 2.1 | 
|  | */ | 
|  | typedef UBool U_CALLCONV | 
|  | UCharIteratorHasNext(UCharIterator *iter); | 
|  |  | 
|  | /** | 
|  | * Function type declaration for UCharIterator.hasPrevious(). | 
|  | * | 
|  | * Check if previous() can still return another code unit. | 
|  | * | 
|  | * @param iter the UCharIterator structure ("this pointer") | 
|  | * @return boolean value for whether previous() can still return another code unit | 
|  | * | 
|  | * @see UCharIterator | 
|  | * @stable ICU 2.1 | 
|  | */ | 
|  | typedef UBool U_CALLCONV | 
|  | UCharIteratorHasPrevious(UCharIterator *iter); | 
|  |  | 
|  | /** | 
|  | * Function type declaration for UCharIterator.current(). | 
|  | * | 
|  | * Return the code unit at the current position, | 
|  | * or U_SENTINEL if there is none (index is at the limit). | 
|  | * | 
|  | * @param iter the UCharIterator structure ("this pointer") | 
|  | * @return the current code unit | 
|  | * | 
|  | * @see UCharIterator | 
|  | * @stable ICU 2.1 | 
|  | */ | 
|  | typedef UChar32 U_CALLCONV | 
|  | UCharIteratorCurrent(UCharIterator *iter); | 
|  |  | 
|  | /** | 
|  | * Function type declaration for UCharIterator.next(). | 
|  | * | 
|  | * Return the code unit at the current index and increment | 
|  | * the index (post-increment, like s[i++]), | 
|  | * or return U_SENTINEL if there is none (index is at the limit). | 
|  | * | 
|  | * @param iter the UCharIterator structure ("this pointer") | 
|  | * @return the current code unit (and post-increment the current index) | 
|  | * | 
|  | * @see UCharIterator | 
|  | * @stable ICU 2.1 | 
|  | */ | 
|  | typedef UChar32 U_CALLCONV | 
|  | UCharIteratorNext(UCharIterator *iter); | 
|  |  | 
|  | /** | 
|  | * Function type declaration for UCharIterator.previous(). | 
|  | * | 
|  | * Decrement the index and return the code unit from there | 
|  | * (pre-decrement, like s[--i]), | 
|  | * or return U_SENTINEL if there is none (index is at the start). | 
|  | * | 
|  | * @param iter the UCharIterator structure ("this pointer") | 
|  | * @return the previous code unit (after pre-decrementing the current index) | 
|  | * | 
|  | * @see UCharIterator | 
|  | * @stable ICU 2.1 | 
|  | */ | 
|  | typedef UChar32 U_CALLCONV | 
|  | UCharIteratorPrevious(UCharIterator *iter); | 
|  |  | 
|  | /** | 
|  | * Function type declaration for UCharIterator.reservedFn(). | 
|  | * Reserved for future use. | 
|  | * | 
|  | * @param iter the UCharIterator structure ("this pointer") | 
|  | * @param something some integer argument | 
|  | * @return some integer | 
|  | * | 
|  | * @see UCharIterator | 
|  | * @stable ICU 2.1 | 
|  | */ | 
|  | typedef int32_t U_CALLCONV | 
|  | UCharIteratorReserved(UCharIterator *iter, int32_t something); | 
|  |  | 
|  | /** | 
|  | * Function type declaration for UCharIterator.getState(). | 
|  | * | 
|  | * Get the "state" of the iterator in the form of a single 32-bit word. | 
|  | * It is recommended that the state value be calculated to be as small as | 
|  | * is feasible. For strings with limited lengths, fewer than 32 bits may | 
|  | * be sufficient. | 
|  | * | 
|  | * This is used together with setState()/UCharIteratorSetState | 
|  | * to save and restore the iterator position more efficiently than with | 
|  | * getIndex()/move(). | 
|  | * | 
|  | * The iterator state is defined as a uint32_t value because it is designed | 
|  | * for use in ucol_nextSortKeyPart() which provides 32 bits to store the state | 
|  | * of the character iterator. | 
|  | * | 
|  | * With some UCharIterator implementations (e.g., UTF-8), | 
|  | * getting and setting the UTF-16 index with existing functions | 
|  | * (getIndex(UITER_CURRENT) followed by move(pos, UITER_ZERO)) is possible but | 
|  | * relatively slow because the iterator has to "walk" from a known index | 
|  | * to the requested one. | 
|  | * This takes more time the farther it needs to go. | 
|  | * | 
|  | * An opaque state value allows an iterator implementation to provide | 
|  | * an internal index (UTF-8: the source byte array index) for | 
|  | * fast, constant-time restoration. | 
|  | * | 
|  | * After calling setState(), a getIndex(UITER_CURRENT) may be slow because | 
|  | * the UTF-16 index may not be restored as well, but the iterator can deliver | 
|  | * the correct text contents and move relative to the current position | 
|  | * without performance degradation. | 
|  | * | 
|  | * Some UCharIterator implementations may not be able to return | 
|  | * a valid state for each position, in which case they return UITER_NO_STATE instead. | 
|  | * This will be clearly documented for each such iterator (none of the public ones here). | 
|  | * | 
|  | * @param iter the UCharIterator structure ("this pointer") | 
|  | * @return the state word | 
|  | * | 
|  | * @see UCharIterator | 
|  | * @see UCharIteratorSetState | 
|  | * @see UITER_NO_STATE | 
|  | * @stable ICU 2.6 | 
|  | */ | 
|  | typedef uint32_t U_CALLCONV | 
|  | UCharIteratorGetState(const UCharIterator *iter); | 
|  |  | 
|  | /** | 
|  | * Function type declaration for UCharIterator.setState(). | 
|  | * | 
|  | * Restore the "state" of the iterator using a state word from a getState() call. | 
|  | * The iterator object need not be the same one as for which getState() was called, | 
|  | * but it must be of the same type (set up using the same uiter_setXYZ function) | 
|  | * and it must iterate over the same string | 
|  | * (binary identical regardless of memory address). | 
|  | * For more about the state word see UCharIteratorGetState. | 
|  | * | 
|  | * After calling setState(), a getIndex(UITER_CURRENT) may be slow because | 
|  | * the UTF-16 index may not be restored as well, but the iterator can deliver | 
|  | * the correct text contents and move relative to the current position | 
|  | * without performance degradation. | 
|  | * | 
|  | * @param iter the UCharIterator structure ("this pointer") | 
|  | * @param state the state word from a getState() call | 
|  | *              on a same-type, same-string iterator | 
|  | * @param pErrorCode Must be a valid pointer to an error code value, | 
|  | *                   which must not indicate a failure before the function call. | 
|  | * | 
|  | * @see UCharIterator | 
|  | * @see UCharIteratorGetState | 
|  | * @stable ICU 2.6 | 
|  | */ | 
|  | typedef void U_CALLCONV | 
|  | UCharIteratorSetState(UCharIterator *iter, uint32_t state, UErrorCode *pErrorCode); | 
|  |  | 
|  |  | 
|  | /** | 
|  | * C API for code unit iteration. | 
|  | * This can be used as a C wrapper around | 
|  | * CharacterIterator, Replaceable, or implemented using simple strings, etc. | 
|  | * | 
|  | * There are two roles for using UCharIterator: | 
|  | * | 
|  | * A "provider" sets the necessary function pointers and controls the "protected" | 
|  | * fields of the UCharIterator structure. A "provider" passes a UCharIterator | 
|  | * into C APIs that need a UCharIterator as an abstract, flexible string interface. | 
|  | * | 
|  | * Implementations of such C APIs are "callers" of UCharIterator functions; | 
|  | * they only use the "public" function pointers and never access the "protected" | 
|  | * fields directly. | 
|  | * | 
|  | * The current() and next() functions only check the current index against the | 
|  | * limit, and previous() only checks the current index against the start, | 
|  | * to see if the iterator already reached the end of the iteration range. | 
|  | * | 
|  | * The assumption - in all iterators - is that the index is moved via the API, | 
|  | * which means it won't go out of bounds, or the index is modified by | 
|  | * user code that knows enough about the iterator implementation to set valid | 
|  | * index values. | 
|  | * | 
|  | * UCharIterator functions return code unit values 0..0xffff, | 
|  | * or U_SENTINEL if the iteration bounds are reached. | 
|  | * | 
|  | * @stable ICU 2.1 | 
|  | */ | 
|  | struct UCharIterator { | 
|  | /** | 
|  | * (protected) Pointer to string or wrapped object or similar. | 
|  | * Not used by caller. | 
|  | * @stable ICU 2.1 | 
|  | */ | 
|  | const void *context; | 
|  |  | 
|  | /** | 
|  | * (protected) Length of string or similar. | 
|  | * Not used by caller. | 
|  | * @stable ICU 2.1 | 
|  | */ | 
|  | int32_t length; | 
|  |  | 
|  | /** | 
|  | * (protected) Start index or similar. | 
|  | * Not used by caller. | 
|  | * @stable ICU 2.1 | 
|  | */ | 
|  | int32_t start; | 
|  |  | 
|  | /** | 
|  | * (protected) Current index or similar. | 
|  | * Not used by caller. | 
|  | * @stable ICU 2.1 | 
|  | */ | 
|  | int32_t index; | 
|  |  | 
|  | /** | 
|  | * (protected) Limit index or similar. | 
|  | * Not used by caller. | 
|  | * @stable ICU 2.1 | 
|  | */ | 
|  | int32_t limit; | 
|  |  | 
|  | /** | 
|  | * (protected) Used by UTF-8 iterators and possibly others. | 
|  | * @stable ICU 2.1 | 
|  | */ | 
|  | int32_t reservedField; | 
|  |  | 
|  | /** | 
|  | * (public) Returns the current position or the | 
|  | * start or limit index of the iteration range. | 
|  | * | 
|  | * @see UCharIteratorGetIndex | 
|  | * @stable ICU 2.1 | 
|  | */ | 
|  | UCharIteratorGetIndex *getIndex; | 
|  |  | 
|  | /** | 
|  | * (public) Moves the current position relative to the start or limit of the | 
|  | * iteration range, or relative to the current position itself. | 
|  | * The movement is expressed in numbers of code units forward | 
|  | * or backward by specifying a positive or negative delta. | 
|  | * | 
|  | * @see UCharIteratorMove | 
|  | * @stable ICU 2.1 | 
|  | */ | 
|  | UCharIteratorMove *move; | 
|  |  | 
|  | /** | 
|  | * (public) Check if current() and next() can still | 
|  | * return another code unit. | 
|  | * | 
|  | * @see UCharIteratorHasNext | 
|  | * @stable ICU 2.1 | 
|  | */ | 
|  | UCharIteratorHasNext *hasNext; | 
|  |  | 
|  | /** | 
|  | * (public) Check if previous() can still return another code unit. | 
|  | * | 
|  | * @see UCharIteratorHasPrevious | 
|  | * @stable ICU 2.1 | 
|  | */ | 
|  | UCharIteratorHasPrevious *hasPrevious; | 
|  |  | 
|  | /** | 
|  | * (public) Return the code unit at the current position, | 
|  | * or U_SENTINEL if there is none (index is at the limit). | 
|  | * | 
|  | * @see UCharIteratorCurrent | 
|  | * @stable ICU 2.1 | 
|  | */ | 
|  | UCharIteratorCurrent *current; | 
|  |  | 
|  | /** | 
|  | * (public) Return the code unit at the current index and increment | 
|  | * the index (post-increment, like s[i++]), | 
|  | * or return U_SENTINEL if there is none (index is at the limit). | 
|  | * | 
|  | * @see UCharIteratorNext | 
|  | * @stable ICU 2.1 | 
|  | */ | 
|  | UCharIteratorNext *next; | 
|  |  | 
|  | /** | 
|  | * (public) Decrement the index and return the code unit from there | 
|  | * (pre-decrement, like s[--i]), | 
|  | * or return U_SENTINEL if there is none (index is at the start). | 
|  | * | 
|  | * @see UCharIteratorPrevious | 
|  | * @stable ICU 2.1 | 
|  | */ | 
|  | UCharIteratorPrevious *previous; | 
|  |  | 
|  | /** | 
|  | * (public) Reserved for future use. Currently NULL. | 
|  | * | 
|  | * @see UCharIteratorReserved | 
|  | * @stable ICU 2.1 | 
|  | */ | 
|  | UCharIteratorReserved *reservedFn; | 
|  |  | 
|  | /** | 
|  | * (public) Return the state of the iterator, to be restored later with setState(). | 
|  | * This function pointer is NULL if the iterator does not implement it. | 
|  | * | 
|  | * @see UCharIteratorGet | 
|  | * @stable ICU 2.6 | 
|  | */ | 
|  | UCharIteratorGetState *getState; | 
|  |  | 
|  | /** | 
|  | * (public) Restore the iterator state from the state word from a call | 
|  | * to getState(). | 
|  | * This function pointer is NULL if the iterator does not implement it. | 
|  | * | 
|  | * @see UCharIteratorSet | 
|  | * @stable ICU 2.6 | 
|  | */ | 
|  | UCharIteratorSetState *setState; | 
|  | }; | 
|  |  | 
|  | /** | 
|  | * Helper function for UCharIterator to get the code point | 
|  | * at the current index. | 
|  | * | 
|  | * Return the code point that includes the code unit at the current position, | 
|  | * or U_SENTINEL if there is none (index is at the limit). | 
|  | * If the current code unit is a lead or trail surrogate, | 
|  | * then the following or preceding surrogate is used to form | 
|  | * the code point value. | 
|  | * | 
|  | * @param iter the UCharIterator structure ("this pointer") | 
|  | * @return the current code point | 
|  | * | 
|  | * @see UCharIterator | 
|  | * @see U16_GET | 
|  | * @see UnicodeString::char32At() | 
|  | * @stable ICU 2.1 | 
|  | */ | 
|  | U_STABLE UChar32 U_EXPORT2 | 
|  | uiter_current32(UCharIterator *iter); | 
|  |  | 
|  | /** | 
|  | * Helper function for UCharIterator to get the next code point. | 
|  | * | 
|  | * Return the code point at the current index and increment | 
|  | * the index (post-increment, like s[i++]), | 
|  | * or return U_SENTINEL if there is none (index is at the limit). | 
|  | * | 
|  | * @param iter the UCharIterator structure ("this pointer") | 
|  | * @return the current code point (and post-increment the current index) | 
|  | * | 
|  | * @see UCharIterator | 
|  | * @see U16_NEXT | 
|  | * @stable ICU 2.1 | 
|  | */ | 
|  | U_STABLE UChar32 U_EXPORT2 | 
|  | uiter_next32(UCharIterator *iter); | 
|  |  | 
|  | /** | 
|  | * Helper function for UCharIterator to get the previous code point. | 
|  | * | 
|  | * Decrement the index and return the code point from there | 
|  | * (pre-decrement, like s[--i]), | 
|  | * or return U_SENTINEL if there is none (index is at the start). | 
|  | * | 
|  | * @param iter the UCharIterator structure ("this pointer") | 
|  | * @return the previous code point (after pre-decrementing the current index) | 
|  | * | 
|  | * @see UCharIterator | 
|  | * @see U16_PREV | 
|  | * @stable ICU 2.1 | 
|  | */ | 
|  | U_STABLE UChar32 U_EXPORT2 | 
|  | uiter_previous32(UCharIterator *iter); | 
|  |  | 
|  | /** | 
|  | * Get the "state" of the iterator in the form of a single 32-bit word. | 
|  | * This is a convenience function that calls iter->getState(iter) | 
|  | * if iter->getState is not NULL; | 
|  | * if it is NULL or any other error occurs, then UITER_NO_STATE is returned. | 
|  | * | 
|  | * Some UCharIterator implementations may not be able to return | 
|  | * a valid state for each position, in which case they return UITER_NO_STATE instead. | 
|  | * This will be clearly documented for each such iterator (none of the public ones here). | 
|  | * | 
|  | * @param iter the UCharIterator structure ("this pointer") | 
|  | * @return the state word | 
|  | * | 
|  | * @see UCharIterator | 
|  | * @see UCharIteratorGetState | 
|  | * @see UITER_NO_STATE | 
|  | * @stable ICU 2.6 | 
|  | */ | 
|  | U_STABLE uint32_t U_EXPORT2 | 
|  | uiter_getState(const UCharIterator *iter); | 
|  |  | 
|  | /** | 
|  | * Restore the "state" of the iterator using a state word from a getState() call. | 
|  | * This is a convenience function that calls iter->setState(iter, state, pErrorCode) | 
|  | * if iter->setState is not NULL; if it is NULL, then U_UNSUPPORTED_ERROR is set. | 
|  | * | 
|  | * @param iter the UCharIterator structure ("this pointer") | 
|  | * @param state the state word from a getState() call | 
|  | *              on a same-type, same-string iterator | 
|  | * @param pErrorCode Must be a valid pointer to an error code value, | 
|  | *                   which must not indicate a failure before the function call. | 
|  | * | 
|  | * @see UCharIterator | 
|  | * @see UCharIteratorSetState | 
|  | * @stable ICU 2.6 | 
|  | */ | 
|  | U_STABLE void U_EXPORT2 | 
|  | uiter_setState(UCharIterator *iter, uint32_t state, UErrorCode *pErrorCode); | 
|  |  | 
|  | /** | 
|  | * Set up a UCharIterator to iterate over a string. | 
|  | * | 
|  | * Sets the UCharIterator function pointers for iteration over the string s | 
|  | * with iteration boundaries start=index=0 and length=limit=string length. | 
|  | * The "provider" may set the start, index, and limit values at any time | 
|  | * within the range 0..length. | 
|  | * The length field will be ignored. | 
|  | * | 
|  | * The string pointer s is set into UCharIterator.context without copying | 
|  | * or reallocating the string contents. | 
|  | * | 
|  | * getState() simply returns the current index. | 
|  | * move() will always return the final index. | 
|  | * | 
|  | * @param iter UCharIterator structure to be set for iteration | 
|  | * @param s String to iterate over | 
|  | * @param length Length of s, or -1 if NUL-terminated | 
|  | * | 
|  | * @see UCharIterator | 
|  | * @stable ICU 2.1 | 
|  | */ | 
|  | U_STABLE void U_EXPORT2 | 
|  | uiter_setString(UCharIterator *iter, const UChar *s, int32_t length); | 
|  |  | 
|  | /** | 
|  | * Set up a UCharIterator to iterate over a UTF-16BE string | 
|  | * (byte vector with a big-endian pair of bytes per UChar). | 
|  | * | 
|  | * Everything works just like with a normal UChar iterator (uiter_setString), | 
|  | * except that UChars are assembled from byte pairs, | 
|  | * and that the length argument here indicates an even number of bytes. | 
|  | * | 
|  | * getState() simply returns the current index. | 
|  | * move() will always return the final index. | 
|  | * | 
|  | * @param iter UCharIterator structure to be set for iteration | 
|  | * @param s UTF-16BE string to iterate over | 
|  | * @param length Length of s as an even number of bytes, or -1 if NUL-terminated | 
|  | *               (NUL means pair of 0 bytes at even index from s) | 
|  | * | 
|  | * @see UCharIterator | 
|  | * @see uiter_setString | 
|  | * @stable ICU 2.6 | 
|  | */ | 
|  | U_STABLE void U_EXPORT2 | 
|  | uiter_setUTF16BE(UCharIterator *iter, const char *s, int32_t length); | 
|  |  | 
|  | /** | 
|  | * Set up a UCharIterator to iterate over a UTF-8 string. | 
|  | * | 
|  | * Sets the UCharIterator function pointers for iteration over the UTF-8 string s | 
|  | * with UTF-8 iteration boundaries 0 and length. | 
|  | * The implementation counts the UTF-16 index on the fly and | 
|  | * lazily evaluates the UTF-16 length of the text. | 
|  | * | 
|  | * The start field is used as the UTF-8 offset, the limit field as the UTF-8 length. | 
|  | * When the reservedField is not 0, then it contains a supplementary code point | 
|  | * and the UTF-16 index is between the two corresponding surrogates. | 
|  | * At that point, the UTF-8 index is behind that code point. | 
|  | * | 
|  | * The UTF-8 string pointer s is set into UCharIterator.context without copying | 
|  | * or reallocating the string contents. | 
|  | * | 
|  | * getState() returns a state value consisting of | 
|  | * - the current UTF-8 source byte index (bits 31..1) | 
|  | * - a flag (bit 0) that indicates whether the UChar position is in the middle | 
|  | *   of a surrogate pair | 
|  | *   (from a 4-byte UTF-8 sequence for the corresponding supplementary code point) | 
|  | * | 
|  | * getState() cannot also encode the UTF-16 index in the state value. | 
|  | * move(relative to limit or length), or | 
|  | * move(relative to current) after setState(), may return UITER_UNKNOWN_INDEX. | 
|  | * | 
|  | * @param iter UCharIterator structure to be set for iteration | 
|  | * @param s UTF-8 string to iterate over | 
|  | * @param length Length of s in bytes, or -1 if NUL-terminated | 
|  | * | 
|  | * @see UCharIterator | 
|  | * @stable ICU 2.6 | 
|  | */ | 
|  | U_STABLE void U_EXPORT2 | 
|  | uiter_setUTF8(UCharIterator *iter, const char *s, int32_t length); | 
|  |  | 
|  | #if U_SHOW_CPLUSPLUS_API | 
|  |  | 
|  | /** | 
|  | * Set up a UCharIterator to wrap around a C++ CharacterIterator. | 
|  | * | 
|  | * Sets the UCharIterator function pointers for iteration using the | 
|  | * CharacterIterator charIter. | 
|  | * | 
|  | * The CharacterIterator pointer charIter is set into UCharIterator.context | 
|  | * without copying or cloning the CharacterIterator object. | 
|  | * The other "protected" UCharIterator fields are set to 0 and will be ignored. | 
|  | * The iteration index and boundaries are controlled by the CharacterIterator. | 
|  | * | 
|  | * getState() simply returns the current index. | 
|  | * move() will always return the final index. | 
|  | * | 
|  | * @param iter UCharIterator structure to be set for iteration | 
|  | * @param charIter CharacterIterator to wrap | 
|  | * | 
|  | * @see UCharIterator | 
|  | * @stable ICU 2.1 | 
|  | */ | 
|  | U_STABLE void U_EXPORT2 | 
|  | uiter_setCharacterIterator(UCharIterator *iter, icu::CharacterIterator *charIter); | 
|  |  | 
|  | /** | 
|  | * Set up a UCharIterator to iterate over a C++ Replaceable. | 
|  | * | 
|  | * Sets the UCharIterator function pointers for iteration over the | 
|  | * Replaceable rep with iteration boundaries start=index=0 and | 
|  | * length=limit=rep->length(). | 
|  | * The "provider" may set the start, index, and limit values at any time | 
|  | * within the range 0..length=rep->length(). | 
|  | * The length field will be ignored. | 
|  | * | 
|  | * The Replaceable pointer rep is set into UCharIterator.context without copying | 
|  | * or cloning/reallocating the Replaceable object. | 
|  | * | 
|  | * getState() simply returns the current index. | 
|  | * move() will always return the final index. | 
|  | * | 
|  | * @param iter UCharIterator structure to be set for iteration | 
|  | * @param rep Replaceable to iterate over | 
|  | * | 
|  | * @see UCharIterator | 
|  | * @stable ICU 2.1 | 
|  | */ | 
|  | U_STABLE void U_EXPORT2 | 
|  | uiter_setReplaceable(UCharIterator *iter, const icu::Replaceable *rep); | 
|  |  | 
|  | #endif | 
|  |  | 
|  | U_CDECL_END | 
|  |  | 
|  | #endif |