blob: 96872b916844bb7ef9c78f5b1a398aa1ec55206e [file] [log] [blame]
/*
**********************************************************************
* Copyright (c) 2001-2007, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
* Date Name Description
* 11/20/2001 aliu Creation.
**********************************************************************
*/
#ifndef ESCTRN_H
#define ESCTRN_H
#include "unicode/utypes.h"
#if !UCONFIG_NO_TRANSLITERATION
#include "unicode/translit.h"
U_NAMESPACE_BEGIN
/**
* A transliterator that converts Unicode characters to an escape
* form. Examples of escape forms are "U+4E01" and "".
* Escape forms have a prefix and suffix, either of which may be
* empty, a radix, typically 16 or 10, a minimum digit count,
* typically 1, 4, or 8, and a boolean that specifies whether
* supplemental characters are handled as 32-bit code points or as two
* 16-bit code units. Most escape forms handle 32-bit code points,
* but some, such as the Java form, intentionally break them into two
* surrogate pairs, for backward compatibility.
*
* <p>Some escape forms actually have two different patterns, one for
* BMP characters (0..FFFF) and one for supplements (>FFFF). To
* handle this, a second EscapeTransliterator may be defined that
* specifies the pattern to be produced for supplementals. An example
* of a form that requires this is the C form, which uses "\\uFFFF"
* for BMP characters and "\\U0010FFFF" for supplementals.
*
* <p>This class is package private. It registers several standard
* variants with the system which are then accessed via their IDs.
*
* @author Alan Liu
*/
class EscapeTransliterator : public Transliterator {
private:
/**
* The prefix of the escape form; may be empty, but usually isn't.
*/
UnicodeString prefix;
/**
* The prefix of the escape form; often empty.
*/
UnicodeString suffix;
/**
* The radix to display the number in. Typically 16 or 10. Must
* be in the range 2 to 36.
*/
int32_t radix;
/**
* The minimum number of digits. Typically 1, 4, or 8. Values
* less than 1 are equivalent to 1.
*/
int32_t minDigits;
/**
* If true, supplementals are handled as 32-bit code points. If
* false, they are handled as two 16-bit code units.
*/
UBool grokSupplementals;
/**
* The form to be used for supplementals. If this is null then
* the same form is used for BMP characters and supplementals. If
* this is not null and if grokSupplementals is true then the
* prefix, suffix, radix, and minDigits of this object are used
* for supplementals. This pointer is owned.
*/
EscapeTransliterator* supplementalHandler;
public:
/**
* Registers standard variants with the system. Called by
* Transliterator during initialization.
*/
static void registerIDs();
/**
* Constructs an escape transliterator with the given ID and
* parameters. See the class member documentation for details.
*/
EscapeTransliterator(const UnicodeString& ID,
const UnicodeString& prefix, const UnicodeString& suffix,
int32_t radix, int32_t minDigits,
UBool grokSupplementals,
EscapeTransliterator* adoptedSupplementalHandler);
/**
* Copy constructor.
*/
EscapeTransliterator(const EscapeTransliterator&);
/**
* Destructor.
*/
virtual ~EscapeTransliterator();
/**
* Transliterator API.
*/
virtual Transliterator* clone() const;
/**
* ICU "poor man's RTTI", returns a UClassID for the actual class.
*/
virtual UClassID getDynamicClassID() const;
/**
* ICU "poor man's RTTI", returns a UClassID for this class.
*/
U_I18N_API static UClassID U_EXPORT2 getStaticClassID();
protected:
/**
* Implements {@link Transliterator#handleTransliterate}.
*/
virtual void handleTransliterate(Replaceable& text, UTransPosition& offset,
UBool isIncremental) const;
};
U_NAMESPACE_END
#endif /* #if !UCONFIG_NO_TRANSLITERATION */
#endif