third_party/icu/source/common/utrie2_impl.h - cobalt - Git at Google

 // © 2016 and later: Unicode, Inc. and others.
 // License & terms of use: http://www.unicode.org/copyright.html
 /*
 ******************************************************************************
 *
 *   Copyright (C) 2001-2008, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 *
 ******************************************************************************
 *   file name:  utrie2_impl.h
 *   encoding:   UTF-8
 *   tab size:   8 (not used)
 *   indentation:4
 *
 *   created on: 2008sep26 (split off from utrie2.c)
 *   created by: Markus W. Scherer
 *
 *   Definitions needed for both runtime and builder code for UTrie2,
 *   used by utrie2.c and utrie2_builder.c.
 */

 #ifndef __UTRIE2_IMPL_H__
 #define __UTRIE2_IMPL_H__

 #ifdef UCPTRIE_DEBUG
 #include "unicode/umutablecptrie.h"
 #endif
 #include "utrie2.h"

 /* Public UTrie2 API implementation ----------------------------------------- */

 /*
  * These definitions are mostly needed by utrie2.cpp,
  * but also by utrie2_serialize() and utrie2_swap().
  */

 // UTrie2 signature values, in platform endianness and opposite endianness.
 // The UTrie2 signature ASCII byte values spell "Tri2".
 #define UTRIE2_SIG      0x54726932
 #define UTRIE2_OE_SIG   0x32697254

 /**
  * Trie data structure in serialized form:
  *
  * UTrie2Header header;
  * uint16_t index[header.index2Length];
  * uint16_t data[header.shiftedDataLength<<2];  -- or uint32_t data[...]
  * @internal
  */
 typedef struct UTrie2Header {
     /** "Tri2" in big-endian US-ASCII (0x54726932) */
     uint32_t signature;

     /**
      * options bit field:
      * 15.. 4   reserved (0)
      *  3.. 0   UTrie2ValueBits valueBits
      */
     uint16_t options;

     /** UTRIE2_INDEX_1_OFFSET..UTRIE2_MAX_INDEX_LENGTH */
     uint16_t indexLength;

     /** (UTRIE2_DATA_START_OFFSET..UTRIE2_MAX_DATA_LENGTH)>>UTRIE2_INDEX_SHIFT */
     uint16_t shiftedDataLength;

     /** Null index and data blocks, not shifted. */
     uint16_t index2NullOffset, dataNullOffset;

     /**
      * First code point of the single-value range ending with U+10ffff,
      * rounded up and then shifted right by UTRIE2_SHIFT_1.
      */
     uint16_t shiftedHighStart;
 } UTrie2Header;

 /**
  * Constants for use with UTrie2Header.options.
  * @internal
  */
 enum {
     /** Mask to get the UTrie2ValueBits valueBits from options. */
     UTRIE2_OPTIONS_VALUE_BITS_MASK=0xf
 };

 /* Building a trie ---------------------------------------------------------- */

 /*
  * These definitions are mostly needed by utrie2_builder.c, but also by
  * utrie2_get32() and utrie2_enum().
  */

 enum {
     /**
      * At build time, leave a gap in the index-2 table,
      * at least as long as the maximum lengths of the 2-byte UTF-8 index-2 table
      * and the supplementary index-1 table.
      * Round up to UTRIE2_INDEX_2_BLOCK_LENGTH for proper compacting.
      */
     UNEWTRIE2_INDEX_GAP_OFFSET=UTRIE2_INDEX_2_BMP_LENGTH,
     UNEWTRIE2_INDEX_GAP_LENGTH=
         ((UTRIE2_UTF8_2B_INDEX_2_LENGTH+UTRIE2_MAX_INDEX_1_LENGTH)+UTRIE2_INDEX_2_MASK)&
         ~UTRIE2_INDEX_2_MASK,

     /**
      * Maximum length of the build-time index-2 array.
      * Maximum number of Unicode code points (0x110000) shifted right by UTRIE2_SHIFT_2,
      * plus the part of the index-2 table for lead surrogate code points,
      * plus the build-time index gap,
      * plus the null index-2 block.
      */
     UNEWTRIE2_MAX_INDEX_2_LENGTH=
         (0x110000>>UTRIE2_SHIFT_2)+
         UTRIE2_LSCP_INDEX_2_LENGTH+
         UNEWTRIE2_INDEX_GAP_LENGTH+
         UTRIE2_INDEX_2_BLOCK_LENGTH,

     UNEWTRIE2_INDEX_1_LENGTH=0x110000>>UTRIE2_SHIFT_1
 };

 /**
  * Maximum length of the build-time data array.
  * One entry per 0x110000 code points, plus the illegal-UTF-8 block and the null block,
  * plus values for the 0x400 surrogate code units.
  */
 #define UNEWTRIE2_MAX_DATA_LENGTH (0x110000+0x40+0x40+0x400)

 /*
  * Build-time trie structure.
  *
  * Just using a boolean flag for "repeat use" could lead to data array overflow
  * because we would not be able to detect when a data block becomes unused.
  * It also leads to orphan data blocks that are kept through serialization.
  *
  * Need to use reference counting for data blocks,
  * and allocDataBlock() needs to look for a free block before increasing dataLength.
  *
  * This scheme seems like overkill for index-2 blocks since the whole index array is
  * preallocated anyway (unlike the growable data array).
  * Just allocating multiple index-2 blocks as needed.
  */
 struct UNewTrie2 {
     int32_t index1[UNEWTRIE2_INDEX_1_LENGTH];
     int32_t index2[UNEWTRIE2_MAX_INDEX_2_LENGTH];
     uint32_t *data;
 #ifdef UCPTRIE_DEBUG
     UMutableCPTrie *t3;
 #endif

     uint32_t initialValue, errorValue;
     int32_t index2Length, dataCapacity, dataLength;
     int32_t firstFreeBlock;
     int32_t index2NullOffset, dataNullOffset;
     UChar32 highStart;
     UBool isCompacted;

     /**
      * Multi-purpose per-data-block table.
      *
      * Before compacting:
      *
      * Per-data-block reference counters/free-block list.
      *  0: unused
      * >0: reference counter (number of index-2 entries pointing here)
      * <0: next free data block in free-block list
      *
      * While compacting:
      *
      * Map of adjusted indexes, used in compactData() and compactIndex2().
      * Maps from original indexes to new ones.
      */
     int32_t map[UNEWTRIE2_MAX_DATA_LENGTH>>UTRIE2_SHIFT_2];
 };

 #endif
	// © 2016 and later: Unicode, Inc. and others.
	// License & terms of use: http://www.unicode.org/copyright.html
	/*
	******************************************************************************
	*
	* Copyright (C) 2001-2008, International Business Machines
	* Corporation and others. All Rights Reserved.
	*
	******************************************************************************
	* file name: utrie2_impl.h
	* encoding: UTF-8
	* tab size: 8 (not used)
	* indentation:4
	*
	* created on: 2008sep26 (split off from utrie2.c)
	* created by: Markus W. Scherer
	*
	* Definitions needed for both runtime and builder code for UTrie2,
	* used by utrie2.c and utrie2_builder.c.
	*/

	#ifndef __UTRIE2_IMPL_H__
	#define __UTRIE2_IMPL_H__

	#ifdef UCPTRIE_DEBUG
	#include "unicode/umutablecptrie.h"
	#endif
	#include "utrie2.h"

	/* Public UTrie2 API implementation ----------------------------------------- */

	/*
	* These definitions are mostly needed by utrie2.cpp,
	* but also by utrie2_serialize() and utrie2_swap().
	*/

	// UTrie2 signature values, in platform endianness and opposite endianness.
	// The UTrie2 signature ASCII byte values spell "Tri2".
	#define UTRIE2_SIG 0x54726932
	#define UTRIE2_OE_SIG 0x32697254

	/**
	* Trie data structure in serialized form:
	*
	* UTrie2Header header;
	* uint16_t index[header.index2Length];
	* uint16_t data[header.shiftedDataLength<<2]; -- or uint32_t data[...]
	* @internal
	*/
	typedef struct UTrie2Header {
	/** "Tri2" in big-endian US-ASCII (0x54726932) */
	uint32_t signature;

	/**
	* options bit field:
	* 15.. 4 reserved (0)
	* 3.. 0 UTrie2ValueBits valueBits
	*/
	uint16_t options;

	/** UTRIE2_INDEX_1_OFFSET..UTRIE2_MAX_INDEX_LENGTH */
	uint16_t indexLength;

	/** (UTRIE2_DATA_START_OFFSET..UTRIE2_MAX_DATA_LENGTH)>>UTRIE2_INDEX_SHIFT */
	uint16_t shiftedDataLength;

	/** Null index and data blocks, not shifted. */
	uint16_t index2NullOffset, dataNullOffset;

	/**
	* First code point of the single-value range ending with U+10ffff,
	* rounded up and then shifted right by UTRIE2_SHIFT_1.
	*/
	uint16_t shiftedHighStart;
	} UTrie2Header;

	/**
	* Constants for use with UTrie2Header.options.
	* @internal
	*/
	enum {
	/** Mask to get the UTrie2ValueBits valueBits from options. */
	UTRIE2_OPTIONS_VALUE_BITS_MASK=0xf
	};

	/* Building a trie ---------------------------------------------------------- */

	/*
	* These definitions are mostly needed by utrie2_builder.c, but also by
	* utrie2_get32() and utrie2_enum().
	*/

	enum {
	/**
	* At build time, leave a gap in the index-2 table,
	* at least as long as the maximum lengths of the 2-byte UTF-8 index-2 table
	* and the supplementary index-1 table.
	* Round up to UTRIE2_INDEX_2_BLOCK_LENGTH for proper compacting.
	*/
	UNEWTRIE2_INDEX_GAP_OFFSET=UTRIE2_INDEX_2_BMP_LENGTH,
	UNEWTRIE2_INDEX_GAP_LENGTH=
	((UTRIE2_UTF8_2B_INDEX_2_LENGTH+UTRIE2_MAX_INDEX_1_LENGTH)+UTRIE2_INDEX_2_MASK)&
	~UTRIE2_INDEX_2_MASK,

	/**
	* Maximum length of the build-time index-2 array.
	* Maximum number of Unicode code points (0x110000) shifted right by UTRIE2_SHIFT_2,
	* plus the part of the index-2 table for lead surrogate code points,
	* plus the build-time index gap,
	* plus the null index-2 block.
	*/
	UNEWTRIE2_MAX_INDEX_2_LENGTH=
	(0x110000>>UTRIE2_SHIFT_2)+
	UTRIE2_LSCP_INDEX_2_LENGTH+
	UNEWTRIE2_INDEX_GAP_LENGTH+
	UTRIE2_INDEX_2_BLOCK_LENGTH,

	UNEWTRIE2_INDEX_1_LENGTH=0x110000>>UTRIE2_SHIFT_1
	};

	/**
	* Maximum length of the build-time data array.
	* One entry per 0x110000 code points, plus the illegal-UTF-8 block and the null block,
	* plus values for the 0x400 surrogate code units.
	*/
	#define UNEWTRIE2_MAX_DATA_LENGTH (0x110000+0x40+0x40+0x400)

	/*
	* Build-time trie structure.
	*
	* Just using a boolean flag for "repeat use" could lead to data array overflow
	* because we would not be able to detect when a data block becomes unused.
	* It also leads to orphan data blocks that are kept through serialization.
	*
	* Need to use reference counting for data blocks,
	* and allocDataBlock() needs to look for a free block before increasing dataLength.
	*
	* This scheme seems like overkill for index-2 blocks since the whole index array is
	* preallocated anyway (unlike the growable data array).
	* Just allocating multiple index-2 blocks as needed.
	*/
	struct UNewTrie2 {
	int32_t index1[UNEWTRIE2_INDEX_1_LENGTH];
	int32_t index2[UNEWTRIE2_MAX_INDEX_2_LENGTH];
	uint32_t *data;
	#ifdef UCPTRIE_DEBUG
	UMutableCPTrie *t3;
	#endif

	uint32_t initialValue, errorValue;
	int32_t index2Length, dataCapacity, dataLength;
	int32_t firstFreeBlock;
	int32_t index2NullOffset, dataNullOffset;
	UChar32 highStart;
	UBool isCompacted;

	/**
	* Multi-purpose per-data-block table.
	*
	* Before compacting:
	*
	* Per-data-block reference counters/free-block list.
	* 0: unused
	* >0: reference counter (number of index-2 entries pointing here)
	* <0: next free data block in free-block list
	*
	* While compacting:
	*
	* Map of adjusted indexes, used in compactData() and compactIndex2().
	* Maps from original indexes to new ones.
	*/
	int32_t map[UNEWTRIE2_MAX_DATA_LENGTH>>UTRIE2_SHIFT_2];
	};

	#endif