| /* |
| ******************************************************************************* |
| * |
| * Copyright (C) 2002-2010, International Business Machines |
| * Corporation and others. All Rights Reserved. |
| * |
| ******************************************************************************* |
| * file name: propsvec.h |
| * encoding: US-ASCII |
| * tab size: 8 (not used) |
| * indentation:4 |
| * |
| * created on: 2002feb22 |
| * created by: Markus W. Scherer |
| * |
| * Store bits (Unicode character properties) in bit set vectors. |
| */ |
| |
| #ifndef __UPROPSVEC_H__ |
| #define __UPROPSVEC_H__ |
| |
| #include "unicode/utypes.h" |
| #include "utrie.h" |
| #include "utrie2.h" |
| |
| U_CDECL_BEGIN |
| |
| /** |
| * Unicode Properties Vectors associated with code point ranges. |
| * |
| * Rows of uint32_t integers in a contiguous array store |
| * the range limits and the properties vectors. |
| * |
| * Logically, each row has a certain number of uint32_t values, |
| * which is set via the upvec_open() "columns" parameter. |
| * |
| * Internally, two additional columns are stored. |
| * In each internal row, |
| * row[0] contains the start code point and |
| * row[1] contains the limit code point, |
| * which is the start of the next range. |
| * |
| * Initially, there is only one "normal" row for |
| * range [0..0x110000[ with values 0. |
| * There are additional rows for special purposes, see UPVEC_FIRST_SPECIAL_CP. |
| * |
| * It would be possible to store only one range boundary per row, |
| * but self-contained rows allow to later sort them by contents. |
| */ |
| struct UPropsVectors; |
| typedef struct UPropsVectors UPropsVectors; |
| |
| /* |
| * Special pseudo code points for storing the initialValue and the errorValue, |
| * which are used to initialize a UTrie2 or similar. |
| */ |
| #define UPVEC_FIRST_SPECIAL_CP 0x110000 |
| #define UPVEC_INITIAL_VALUE_CP 0x110000 |
| #define UPVEC_ERROR_VALUE_CP 0x110001 |
| #define UPVEC_MAX_CP 0x110001 |
| |
| /* |
| * Special pseudo code point used in upvec_compact() signalling the end of |
| * delivering special values and the beginning of delivering real ones. |
| * Stable value, unlike UPVEC_MAX_CP which might grow over time. |
| */ |
| #define UPVEC_START_REAL_VALUES_CP 0x200000 |
| |
| /* |
| * Open a UPropsVectors object. |
| * @param columns Number of value integers (uint32_t) per row. |
| */ |
| U_CAPI UPropsVectors * U_EXPORT2 |
| upvec_open(int32_t columns, UErrorCode *pErrorCode); |
| |
| U_CAPI void U_EXPORT2 |
| upvec_close(UPropsVectors *pv); |
| |
| /* |
| * In rows for code points [start..end], select the column, |
| * reset the mask bits and set the value bits (ANDed with the mask). |
| * |
| * Will set U_NO_WRITE_PERMISSION if called after upvec_compact(). |
| */ |
| U_CAPI void U_EXPORT2 |
| upvec_setValue(UPropsVectors *pv, |
| UChar32 start, UChar32 end, |
| int32_t column, |
| uint32_t value, uint32_t mask, |
| UErrorCode *pErrorCode); |
| |
| /* |
| * Logically const but must not be used on the same pv concurrently! |
| * Always returns 0 if called after upvec_compact(). |
| */ |
| U_CAPI uint32_t U_EXPORT2 |
| upvec_getValue(const UPropsVectors *pv, UChar32 c, int32_t column); |
| |
| /* |
| * pRangeStart and pRangeEnd can be NULL. |
| * @return NULL if rowIndex out of range and for illegal arguments, |
| * or if called after upvec_compact() |
| */ |
| U_CAPI uint32_t * U_EXPORT2 |
| upvec_getRow(const UPropsVectors *pv, int32_t rowIndex, |
| UChar32 *pRangeStart, UChar32 *pRangeEnd); |
| |
| /* |
| * Compact the vectors: |
| * - modify the memory |
| * - keep only unique vectors |
| * - store them contiguously from the beginning of the memory |
| * - for each (non-unique) row, call the handler function |
| * |
| * The handler's rowIndex is the index of the row in the compacted |
| * memory block. |
| * (Therefore, it starts at 0 increases in increments of the columns value.) |
| * |
| * In a first phase, only special values are delivered (each exactly once), |
| * with start==end both equalling a special pseudo code point. |
| * Then the handler is called once more with start==end==UPVEC_START_REAL_VALUES_CP |
| * where rowIndex is the length of the compacted array, |
| * and the row is arbitrary (but not NULL). |
| * Then, in the second phase, the handler is called for each row of real values. |
| */ |
| typedef void U_CALLCONV |
| UPVecCompactHandler(void *context, |
| UChar32 start, UChar32 end, |
| int32_t rowIndex, uint32_t *row, int32_t columns, |
| UErrorCode *pErrorCode); |
| |
| U_CAPI void U_EXPORT2 |
| upvec_compact(UPropsVectors *pv, UPVecCompactHandler *handler, void *context, UErrorCode *pErrorCode); |
| |
| /* |
| * Get the vectors array after calling upvec_compact(). |
| * The caller must not modify nor release the returned array. |
| * Returns NULL if called before upvec_compact(). |
| */ |
| U_CAPI const uint32_t * U_EXPORT2 |
| upvec_getArray(const UPropsVectors *pv, int32_t *pRows, int32_t *pColumns); |
| |
| /* |
| * Get a clone of the vectors array after calling upvec_compact(). |
| * The caller owns the returned array and must uprv_free() it. |
| * Returns NULL if called before upvec_compact(). |
| */ |
| U_CAPI uint32_t * U_EXPORT2 |
| upvec_cloneArray(const UPropsVectors *pv, |
| int32_t *pRows, int32_t *pColumns, UErrorCode *pErrorCode); |
| |
| /* |
| * Call upvec_compact(), create a 16-bit UTrie2 with indexes into the compacted |
| * vectors array, and freeze the trie. |
| */ |
| U_CAPI UTrie2 * U_EXPORT2 |
| upvec_compactToUTrie2WithRowIndexes(UPropsVectors *pv, UErrorCode *pErrorCode); |
| |
| struct UPVecToUTrie2Context { |
| UTrie2 *trie; |
| int32_t initialValue; |
| int32_t errorValue; |
| int32_t maxValue; |
| }; |
| typedef struct UPVecToUTrie2Context UPVecToUTrie2Context; |
| |
| /* context=UPVecToUTrie2Context, creates the trie and stores the rowIndex values */ |
| U_CAPI void U_CALLCONV |
| upvec_compactToUTrie2Handler(void *context, |
| UChar32 start, UChar32 end, |
| int32_t rowIndex, uint32_t *row, int32_t columns, |
| UErrorCode *pErrorCode); |
| |
| U_CDECL_END |
| |
| #endif |