src/third_party/mozjs-45/js/public/CharacterEncoding.h - cobalt - Git at Google

 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
  * vim: set ts=8 sts=4 et sw=4 tw=99:
  * This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

 #ifndef js_CharacterEncoding_h
 #define js_CharacterEncoding_h

 #include "mozilla/Range.h"

 #include "js/TypeDecls.h"
 #include "js/Utility.h"

 namespace js {
 class ExclusiveContext;
 } // namespace js

 class JSFlatString;

 namespace JS {

 /*
  * By default, all C/C++ 1-byte-per-character strings passed into the JSAPI
  * are treated as ISO/IEC 8859-1, also known as Latin-1. That is, each
  * byte is treated as a 2-byte character, and there is no way to pass in a
  * string containing characters beyond U+00FF.
  */
 class Latin1Chars : public mozilla::Range<Latin1Char>
 {
     typedef mozilla::Range<Latin1Char> Base;

   public:
     Latin1Chars() : Base() {}
     Latin1Chars(char* aBytes, size_t aLength) : Base(reinterpret_cast<Latin1Char*>(aBytes), aLength) {}
     Latin1Chars(const Latin1Char* aBytes, size_t aLength)
       : Base(const_cast<Latin1Char*>(aBytes), aLength)
     {}
     Latin1Chars(const char* aBytes, size_t aLength)
       : Base(reinterpret_cast<Latin1Char*>(const_cast<char*>(aBytes)), aLength)
     {}
 };

 /*
  * A Latin1Chars, but with \0 termination for C compatibility.
  */
 class Latin1CharsZ : public mozilla::RangedPtr<Latin1Char>
 {
     typedef mozilla::RangedPtr<Latin1Char> Base;

   public:
     Latin1CharsZ() : Base(nullptr, 0) {}

     Latin1CharsZ(char* aBytes, size_t aLength)
       : Base(reinterpret_cast<Latin1Char*>(aBytes), aLength)
     {
         MOZ_ASSERT(aBytes[aLength] == '\0');
     }

     Latin1CharsZ(Latin1Char* aBytes, size_t aLength)
       : Base(aBytes, aLength)
     {
         MOZ_ASSERT(aBytes[aLength] == '\0');
     }

     using Base::operator=;

     char* c_str() { return reinterpret_cast<char*>(get()); }
 };

 class UTF8Chars : public mozilla::Range<unsigned char>
 {
     typedef mozilla::Range<unsigned char> Base;

   public:
     UTF8Chars() : Base() {}
     UTF8Chars(char* aBytes, size_t aLength)
       : Base(reinterpret_cast<unsigned char*>(aBytes), aLength)
     {}
     UTF8Chars(const char* aBytes, size_t aLength)
       : Base(reinterpret_cast<unsigned char*>(const_cast<char*>(aBytes)), aLength)
     {}
 };

 /*
  * SpiderMonkey also deals directly with UTF-8 encoded text in some places.
  */
 class UTF8CharsZ : public mozilla::RangedPtr<unsigned char>
 {
     typedef mozilla::RangedPtr<unsigned char> Base;

   public:
     UTF8CharsZ() : Base(nullptr, 0) {}

     UTF8CharsZ(char* aBytes, size_t aLength)
       : Base(reinterpret_cast<unsigned char*>(aBytes), aLength)
     {
         MOZ_ASSERT(aBytes[aLength] == '\0');
     }

     UTF8CharsZ(unsigned char* aBytes, size_t aLength)
       : Base(aBytes, aLength)
     {
         MOZ_ASSERT(aBytes[aLength] == '\0');
     }

     using Base::operator=;

     char* c_str() { return reinterpret_cast<char*>(get()); }
 };

 /*
  * SpiderMonkey uses a 2-byte character representation: it is a
  * 2-byte-at-a-time view of a UTF-16 byte stream. This is similar to UCS-2,
  * but unlike UCS-2, we do not strip UTF-16 extension bytes. This allows a
  * sufficiently dedicated JavaScript program to be fully unicode-aware by
  * manually interpreting UTF-16 extension characters embedded in the JS
  * string.
  */
 class TwoByteChars : public mozilla::Range<char16_t>
 {
     typedef mozilla::Range<char16_t> Base;

   public:
     TwoByteChars() : Base() {}
     TwoByteChars(char16_t* aChars, size_t aLength) : Base(aChars, aLength) {}
     TwoByteChars(const char16_t* aChars, size_t aLength) : Base(const_cast<char16_t*>(aChars), aLength) {}
 };

 /*
  * A TwoByteChars, but \0 terminated for compatibility with JSFlatString.
  */
 class TwoByteCharsZ : public mozilla::RangedPtr<char16_t>
 {
     typedef mozilla::RangedPtr<char16_t> Base;

   public:
     TwoByteCharsZ() : Base(nullptr, 0) {}

     TwoByteCharsZ(char16_t* chars, size_t length)
       : Base(chars, length)
     {
         MOZ_ASSERT(chars[length] == '\0');
     }

     using Base::operator=;
 };

 typedef mozilla::RangedPtr<const char16_t> ConstCharPtr;

 /*
  * Like TwoByteChars, but the chars are const.
  */
 class ConstTwoByteChars : public mozilla::Range<const char16_t>
 {
     typedef mozilla::Range<const char16_t> Base;

   public:
     ConstTwoByteChars() : Base() {}
     ConstTwoByteChars(const char16_t* aChars, size_t aLength) : Base(aChars, aLength) {}
 };

 /*
  * Convert a 2-byte character sequence to "ISO-Latin-1". This works by
  * truncating each 2-byte pair in the sequence to a 1-byte pair. If the source
  * contains any UTF-16 extension characters, then this may give invalid Latin1
  * output. The returned string is zero terminated. The returned string or the
  * returned string's |start()| must be freed with JS_free or js_free,
  * respectively. If allocation fails, an OOM error will be set and the method
  * will return a nullptr chars (which can be tested for with the ! operator).
  * This method cannot trigger GC.
  */
 extern Latin1CharsZ
 LossyTwoByteCharsToNewLatin1CharsZ(js::ExclusiveContext* cx,
                                    const mozilla::Range<const char16_t> tbchars);

 template <typename CharT>
 extern UTF8CharsZ
 CharsToNewUTF8CharsZ(js::ExclusiveContext* maybeCx, const mozilla::Range<const CharT> chars);

 uint32_t
 Utf8ToOneUcs4Char(const uint8_t* utf8Buffer, int utf8Length);

 /*
  * Inflate bytes in UTF-8 encoding to char16_t.
  * - On error, returns an empty TwoByteCharsZ.
  * - On success, returns a malloc'd TwoByteCharsZ, and updates |outlen| to hold
  *   its length;  the length value excludes the trailing null.
  */
 extern TwoByteCharsZ
 UTF8CharsToNewTwoByteCharsZ(JSContext* cx, const UTF8Chars utf8, size_t* outlen);

 /*
  * The same as UTF8CharsToNewTwoByteCharsZ(), except that any malformed UTF-8 characters
  * will be replaced by \uFFFD. No exception will be thrown for malformed UTF-8
  * input.
  */
 extern TwoByteCharsZ
 LossyUTF8CharsToNewTwoByteCharsZ(JSContext* cx, const UTF8Chars utf8, size_t* outlen);

 /*
  * Returns the length of the char buffer required to encode |s| as UTF8.
  * Does not include the null-terminator.
  */
 JS_PUBLIC_API(size_t)
 GetDeflatedUTF8StringLength(JSFlatString* s);

 /*
  * Encode |src| as UTF8. The caller must ensure |dst| has enough space.
  * Does not write the null terminator.
  */
 JS_PUBLIC_API(void)
 DeflateStringToUTF8Buffer(JSFlatString* src, mozilla::RangedPtr<char> dst);

 } // namespace JS

 inline void JS_free(JS::Latin1CharsZ& ptr) { js_free((void*)ptr.get()); }
 inline void JS_free(JS::UTF8CharsZ& ptr) { js_free((void*)ptr.get()); }

 #endif /* js_CharacterEncoding_h */
	/* -- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 --
	* vim: set ts=8 sts=4 et sw=4 tw=99:
	* This Source Code Form is subject to the terms of the Mozilla Public
	* License, v. 2.0. If a copy of the MPL was not distributed with this
	* file, You can obtain one at http://mozilla.org/MPL/2.0/. */

	#ifndef js_CharacterEncoding_h
	#define js_CharacterEncoding_h

	#include "mozilla/Range.h"

	#include "js/TypeDecls.h"
	#include "js/Utility.h"

	namespace js {
	class ExclusiveContext;
	} // namespace js

	class JSFlatString;

	namespace JS {

	/*
	* By default, all C/C++ 1-byte-per-character strings passed into the JSAPI
	* are treated as ISO/IEC 8859-1, also known as Latin-1. That is, each
	* byte is treated as a 2-byte character, and there is no way to pass in a
	* string containing characters beyond U+00FF.
	*/
	class Latin1Chars : public mozilla::Range<Latin1Char>
	{
	typedef mozilla::Range<Latin1Char> Base;

	public:
	Latin1Chars() : Base() {}
	Latin1Chars(char* aBytes, size_t aLength) : Base(reinterpret_cast<Latin1Char*>(aBytes), aLength) {}
	Latin1Chars(const Latin1Char* aBytes, size_t aLength)
	: Base(const_cast<Latin1Char*>(aBytes), aLength)
	{}
	Latin1Chars(const char* aBytes, size_t aLength)
	: Base(reinterpret_cast<Latin1Char>(const_cast<char>(aBytes)), aLength)
	{}
	};

	/*
	* A Latin1Chars, but with \0 termination for C compatibility.
	*/
	class Latin1CharsZ : public mozilla::RangedPtr<Latin1Char>
	{
	typedef mozilla::RangedPtr<Latin1Char> Base;

	public:
	Latin1CharsZ() : Base(nullptr, 0) {}

	Latin1CharsZ(char* aBytes, size_t aLength)
	: Base(reinterpret_cast<Latin1Char*>(aBytes), aLength)
	{
	MOZ_ASSERT(aBytes[aLength] == '\0');
	}

	Latin1CharsZ(Latin1Char* aBytes, size_t aLength)
	: Base(aBytes, aLength)
	{
	MOZ_ASSERT(aBytes[aLength] == '\0');
	}

	using Base::operator=;

	char* c_str() { return reinterpret_cast<char*>(get()); }
	};

	class UTF8Chars : public mozilla::Range<unsigned char>
	{
	typedef mozilla::Range<unsigned char> Base;

	public:
	UTF8Chars() : Base() {}
	UTF8Chars(char* aBytes, size_t aLength)
	: Base(reinterpret_cast<unsigned char*>(aBytes), aLength)
	{}
	UTF8Chars(const char* aBytes, size_t aLength)
	: Base(reinterpret_cast<unsigned char>(const_cast<char>(aBytes)), aLength)
	{}
	};

	/*
	* SpiderMonkey also deals directly with UTF-8 encoded text in some places.
	*/
	class UTF8CharsZ : public mozilla::RangedPtr<unsigned char>
	{
	typedef mozilla::RangedPtr<unsigned char> Base;

	public:
	UTF8CharsZ() : Base(nullptr, 0) {}

	UTF8CharsZ(char* aBytes, size_t aLength)
	: Base(reinterpret_cast<unsigned char*>(aBytes), aLength)
	{
	MOZ_ASSERT(aBytes[aLength] == '\0');
	}

	UTF8CharsZ(unsigned char* aBytes, size_t aLength)
	: Base(aBytes, aLength)
	{
	MOZ_ASSERT(aBytes[aLength] == '\0');
	}

	using Base::operator=;

	char* c_str() { return reinterpret_cast<char*>(get()); }
	};

	/*
	* SpiderMonkey uses a 2-byte character representation: it is a
	* 2-byte-at-a-time view of a UTF-16 byte stream. This is similar to UCS-2,
	* but unlike UCS-2, we do not strip UTF-16 extension bytes. This allows a
	* sufficiently dedicated JavaScript program to be fully unicode-aware by
	* manually interpreting UTF-16 extension characters embedded in the JS
	* string.
	*/
	class TwoByteChars : public mozilla::Range<char16_t>
	{
	typedef mozilla::Range<char16_t> Base;

	public:
	TwoByteChars() : Base() {}
	TwoByteChars(char16_t* aChars, size_t aLength) : Base(aChars, aLength) {}
	TwoByteChars(const char16_t* aChars, size_t aLength) : Base(const_cast<char16_t*>(aChars), aLength) {}
	};

	/*
	* A TwoByteChars, but \0 terminated for compatibility with JSFlatString.
	*/
	class TwoByteCharsZ : public mozilla::RangedPtr<char16_t>
	{
	typedef mozilla::RangedPtr<char16_t> Base;

	public:
	TwoByteCharsZ() : Base(nullptr, 0) {}

	TwoByteCharsZ(char16_t* chars, size_t length)
	: Base(chars, length)
	{
	MOZ_ASSERT(chars[length] == '\0');
	}

	using Base::operator=;
	};

	typedef mozilla::RangedPtr<const char16_t> ConstCharPtr;

	/*
	* Like TwoByteChars, but the chars are const.
	*/
	class ConstTwoByteChars : public mozilla::Range<const char16_t>
	{
	typedef mozilla::Range<const char16_t> Base;

	public:
	ConstTwoByteChars() : Base() {}
	ConstTwoByteChars(const char16_t* aChars, size_t aLength) : Base(aChars, aLength) {}
	};

	/*
	* Convert a 2-byte character sequence to "ISO-Latin-1". This works by
	* truncating each 2-byte pair in the sequence to a 1-byte pair. If the source
	* contains any UTF-16 extension characters, then this may give invalid Latin1
	* output. The returned string is zero terminated. The returned string or the
	* returned string's \|start()\| must be freed with JS_free or js_free,
	* respectively. If allocation fails, an OOM error will be set and the method
	* will return a nullptr chars (which can be tested for with the ! operator).
	* This method cannot trigger GC.
	*/
	extern Latin1CharsZ
	LossyTwoByteCharsToNewLatin1CharsZ(js::ExclusiveContext* cx,
	const mozilla::Range<const char16_t> tbchars);

	template <typename CharT>
	extern UTF8CharsZ
	CharsToNewUTF8CharsZ(js::ExclusiveContext* maybeCx, const mozilla::Range<const CharT> chars);

	uint32_t
	Utf8ToOneUcs4Char(const uint8_t* utf8Buffer, int utf8Length);

	/*
	* Inflate bytes in UTF-8 encoding to char16_t.
	* - On error, returns an empty TwoByteCharsZ.
	* - On success, returns a malloc'd TwoByteCharsZ, and updates \|outlen\| to hold
	* its length; the length value excludes the trailing null.
	*/
	extern TwoByteCharsZ
	UTF8CharsToNewTwoByteCharsZ(JSContext* cx, const UTF8Chars utf8, size_t* outlen);

	/*
	* The same as UTF8CharsToNewTwoByteCharsZ(), except that any malformed UTF-8 characters
	* will be replaced by \uFFFD. No exception will be thrown for malformed UTF-8
	* input.
	*/
	extern TwoByteCharsZ
	LossyUTF8CharsToNewTwoByteCharsZ(JSContext* cx, const UTF8Chars utf8, size_t* outlen);

	/*
	* Returns the length of the char buffer required to encode \|s\| as UTF8.
	* Does not include the null-terminator.
	*/
	JS_PUBLIC_API(size_t)
	GetDeflatedUTF8StringLength(JSFlatString* s);

	/*
	* Encode \|src\| as UTF8. The caller must ensure \|dst\| has enough space.
	* Does not write the null terminator.
	*/
	JS_PUBLIC_API(void)
	DeflateStringToUTF8Buffer(JSFlatString* src, mozilla::RangedPtr<char> dst);

	} // namespace JS

	inline void JS_free(JS::Latin1CharsZ& ptr) { js_free((void*)ptr.get()); }
	inline void JS_free(JS::UTF8CharsZ& ptr) { js_free((void*)ptr.get()); }

	#endif /* js_CharacterEncoding_h */