src/third_party/mozjs-45/js/src/vm/CharacterEncoding.cpp - cobalt - Git at Google

 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
  * vim: set ts=8 sts=4 et sw=4 tw=99:
  * This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

 #include "js/CharacterEncoding.h"

 #include "mozilla/Range.h"

 #include "jscntxt.h"
 #include "jsprf.h"

 using namespace js;

 Latin1CharsZ
 JS::LossyTwoByteCharsToNewLatin1CharsZ(js::ExclusiveContext* cx,
                                        const mozilla::Range<const char16_t> tbchars)
 {
     MOZ_ASSERT(cx);
     size_t len = tbchars.length();
     unsigned char* latin1 = cx->pod_malloc<unsigned char>(len + 1);
     if (!latin1)
         return Latin1CharsZ();
     for (size_t i = 0; i < len; ++i)
         latin1[i] = static_cast<unsigned char>(tbchars[i]);
     latin1[len] = '\0';
     return Latin1CharsZ(latin1, len);
 }

 template <typename CharT>
 static size_t
 GetDeflatedUTF8StringLength(const CharT* chars, size_t nchars)
 {
     size_t nbytes = nchars;
     for (const CharT* end = chars + nchars; chars < end; chars++) {
         char16_t c = *chars;
         if (c < 0x80)
             continue;
         uint32_t v;
         if (0xD800 <= c && c <= 0xDFFF) {
             /* nbytes sets 1 length since this is surrogate pair. */
             if (c >= 0xDC00 || (chars + 1) == end) {
                 nbytes += 2; /* Bad Surrogate */
                 continue;
             }
             char16_t c2 = chars[1];
             if (c2 < 0xDC00 || c2 > 0xDFFF) {
                 nbytes += 2; /* Bad Surrogate */
                 continue;
             }
             v = ((c - 0xD800) << 10) + (c2 - 0xDC00) + 0x10000;
             nbytes--;
             chars++;
         } else {
             v = c;
         }
         v >>= 11;
         nbytes++;
         while (v) {
             v >>= 5;
             nbytes++;
         }
     }
     return nbytes;
 }

 JS_PUBLIC_API(size_t)
 JS::GetDeflatedUTF8StringLength(JSFlatString* s)
 {
     JS::AutoCheckCannotGC nogc;
     return s->hasLatin1Chars()
            ? ::GetDeflatedUTF8StringLength(s->latin1Chars(nogc), s->length())
            : ::GetDeflatedUTF8StringLength(s->twoByteChars(nogc), s->length());
 }

 static void
 PutUTF8ReplacementCharacter(mozilla::RangedPtr<char>& dst)
 {
     *dst++ = char(0xEF);
     *dst++ = char(0xBF);
     *dst++ = char(0xBD);
 }

 template <typename CharT>
 static void
 DeflateStringToUTF8Buffer(const CharT* src, size_t srclen, mozilla::RangedPtr<char> dst)
 {
     while (srclen) {
         uint32_t v;
         char16_t c = *src++;
         srclen--;
         if (c >= 0xDC00 && c <= 0xDFFF) {
             PutUTF8ReplacementCharacter(dst);
             continue;
         } else if (c < 0xD800 || c > 0xDBFF) {
             v = c;
         } else {
             if (srclen < 1) {
                 PutUTF8ReplacementCharacter(dst);
                 continue;
             }
             char16_t c2 = *src;
             if ((c2 < 0xDC00) || (c2 > 0xDFFF)) {
                 PutUTF8ReplacementCharacter(dst);
                 continue;
             }
             src++;
             srclen--;
             v = ((c - 0xD800) << 10) + (c2 - 0xDC00) + 0x10000;
         }
         size_t utf8Len;
         if (v < 0x0080) {
             /* no encoding necessary - performance hack */
             *dst++ = char(v);
             utf8Len = 1;
         } else {
             uint8_t utf8buf[4];
             utf8Len = OneUcs4ToUtf8Char(utf8buf, v);
             for (size_t i = 0; i < utf8Len; i++)
                 *dst++ = char(utf8buf[i]);
         }
     }
 }

 JS_PUBLIC_API(void)
 JS::DeflateStringToUTF8Buffer(JSFlatString* src, mozilla::RangedPtr<char> dst)
 {
     JS::AutoCheckCannotGC nogc;
     return src->hasLatin1Chars()
            ? ::DeflateStringToUTF8Buffer(src->latin1Chars(nogc), src->length(), dst)
            : ::DeflateStringToUTF8Buffer(src->twoByteChars(nogc), src->length(), dst);
 }

 template <typename CharT>
 UTF8CharsZ
 JS::CharsToNewUTF8CharsZ(js::ExclusiveContext* maybeCx, const mozilla::Range<const CharT> chars)
 {
     /* Get required buffer size. */
     const CharT* str = chars.start().get();
     size_t len = ::GetDeflatedUTF8StringLength(str, chars.length());

     /* Allocate buffer. */
     char* utf8;
     if (maybeCx)
         utf8 = maybeCx->pod_malloc<char>(len + 1);
     else
         utf8 = js_pod_malloc<char>(len + 1);
     if (!utf8)
         return UTF8CharsZ();

     /* Encode to UTF8. */
     ::DeflateStringToUTF8Buffer(str, chars.length(), mozilla::RangedPtr<char>(utf8, len));
     utf8[len] = '\0';

     return UTF8CharsZ(utf8, len);
 }

 template UTF8CharsZ
 JS::CharsToNewUTF8CharsZ(js::ExclusiveContext* maybeCx,
                          const mozilla::Range<const Latin1Char> chars);

 template UTF8CharsZ
 JS::CharsToNewUTF8CharsZ(js::ExclusiveContext* maybeCx,
                          const mozilla::Range<const char16_t> chars);

 static const uint32_t INVALID_UTF8 = UINT32_MAX;

 /*
  * Convert a utf8 character sequence into a UCS-4 character and return that
  * character.  It is assumed that the caller already checked that the sequence
  * is valid.
  */
 uint32_t
 JS::Utf8ToOneUcs4Char(const uint8_t* utf8Buffer, int utf8Length)
 {
     MOZ_ASSERT(1 <= utf8Length && utf8Length <= 4);

     if (utf8Length == 1) {
         MOZ_ASSERT(!(*utf8Buffer & 0x80));
         return *utf8Buffer;
     }

     /* from Unicode 3.1, non-shortest form is illegal */
     static const uint32_t minucs4Table[] = { 0x80, 0x800, 0x10000 };

     MOZ_ASSERT((*utf8Buffer & (0x100 - (1 << (7 - utf8Length)))) ==
                (0x100 - (1 << (8 - utf8Length))));
     uint32_t ucs4Char = *utf8Buffer++ & ((1 << (7 - utf8Length)) - 1);
     uint32_t minucs4Char = minucs4Table[utf8Length - 2];
     while (--utf8Length) {
         MOZ_ASSERT((*utf8Buffer & 0xC0) == 0x80);
         ucs4Char = (ucs4Char << 6) | (*utf8Buffer++ & 0x3F);
     }

     if (MOZ_UNLIKELY(ucs4Char < minucs4Char || (ucs4Char >= 0xD800 && ucs4Char <= 0xDFFF)))
         return INVALID_UTF8;

     return ucs4Char;
 }

 static void
 ReportInvalidCharacter(JSContext* cx, uint32_t offset)
 {
     char buffer[10];
     JS_snprintf(buffer, 10, "%d", offset);
     JS_ReportErrorFlagsAndNumber(cx, JSREPORT_ERROR, GetErrorMessage, nullptr,
                                  JSMSG_MALFORMED_UTF8_CHAR, buffer);
 }

 static void
 ReportBufferTooSmall(JSContext* cx, uint32_t dummy)
 {
     JS_ReportErrorNumber(cx, GetErrorMessage, nullptr, JSMSG_BUFFER_TOO_SMALL);
 }

 static void
 ReportTooBigCharacter(JSContext* cx, uint32_t v)
 {
     char buffer[10];
     JS_snprintf(buffer, 10, "0x%x", v + 0x10000);
     JS_ReportErrorFlagsAndNumber(cx, JSREPORT_ERROR, GetErrorMessage, nullptr,
                                  JSMSG_UTF8_CHAR_TOO_LARGE, buffer);
 }

 enum InflateUTF8Action {
     CountAndReportInvalids,
     CountAndIgnoreInvalids,
     Copy
 };

 static const uint32_t REPLACE_UTF8 = 0xFFFD;

 // If making changes to this algorithm, make sure to also update
 // LossyConvertUTF8toUTF16() in dom/wifi/WifiUtils.cpp
 template <InflateUTF8Action action>
 static bool
 InflateUTF8StringToBuffer(JSContext* cx, const UTF8Chars src, char16_t* dst, size_t* dstlenp,
                           bool* isAsciip)
 {
     *isAsciip = true;

     // Count how many char16_t characters need to be in the inflated string.
     // |i| is the index into |src|, and |j| is the the index into |dst|.
     size_t srclen = src.length();
     uint32_t j = 0;
     for (uint32_t i = 0; i < srclen; i++, j++) {
         uint32_t v = uint32_t(src[i]);
         if (!(v & 0x80)) {
             // ASCII code unit.  Simple copy.
             if (action == Copy)
                 dst[j] = char16_t(v);

         } else {
             // Non-ASCII code unit.  Determine its length in bytes (n).
             *isAsciip = false;
             uint32_t n = 1;
             while (v & (0x80 >> n))
                 n++;

         #define INVALID(report, arg, n2)                                \
             do {                                                        \
                 if (action == CountAndReportInvalids) {                 \
                     report(cx, arg);                                    \
                     return false;                                       \
                 } else {                                                \
                     if (action == Copy)                                 \
                         dst[j] = char16_t(REPLACE_UTF8);                \
                     else                                                \
                         MOZ_ASSERT(action == CountAndIgnoreInvalids);   \
                     n = n2;                                             \
                     goto invalidMultiByteCodeUnit;                      \
                 }                                                       \
             } while (0)

             // Check the leading byte.
             if (n < 2 || n > 4)
                 INVALID(ReportInvalidCharacter, i, 1);

             // Check that |src| is large enough to hold an n-byte code unit.
             if (i + n > srclen)
                 INVALID(ReportBufferTooSmall, /* dummy = */ 0, 1);

             // Check the second byte.  From Unicode Standard v6.2, Table 3-7
             // Well-Formed UTF-8 Byte Sequences.
             if ((v == 0xE0 && ((uint8_t)src[i + 1] & 0xE0) != 0xA0) ||  // E0 A0~BF
                 (v == 0xED && ((uint8_t)src[i + 1] & 0xE0) != 0x80) ||  // ED 80~9F
                 (v == 0xF0 && ((uint8_t)src[i + 1] & 0xF0) == 0x80) ||  // F0 90~BF
                 (v == 0xF4 && ((uint8_t)src[i + 1] & 0xF0) != 0x80))    // F4 80~8F
             {
                 INVALID(ReportInvalidCharacter, i, 1);
             }

             // Check the continuation bytes.
             for (uint32_t m = 1; m < n; m++)
                 if ((src[i + m] & 0xC0) != 0x80)
                     INVALID(ReportInvalidCharacter, i, m);

             // Determine the code unit's length in char16_t and act accordingly.
             v = JS::Utf8ToOneUcs4Char((uint8_t*)&src[i], n);
             if (v < 0x10000) {
                 // The n-byte UTF8 code unit will fit in a single char16_t.
                 if (action == Copy)
                     dst[j] = char16_t(v);

             } else {
                 v -= 0x10000;
                 if (v <= 0xFFFFF) {
                     // The n-byte UTF8 code unit will fit in two char16_t units.
                     if (action == Copy)
                         dst[j] = char16_t((v >> 10) + 0xD800);
                     j++;
                     if (action == Copy)
                         dst[j] = char16_t((v & 0x3FF) + 0xDC00);

                 } else {
                     // The n-byte UTF8 code unit won't fit in two char16_t units.
                     INVALID(ReportTooBigCharacter, v, 1);
                 }
             }

           invalidMultiByteCodeUnit:
             // Move i to the last byte of the multi-byte code unit;  the loop
             // header will do the final i++ to move to the start of the next
             // code unit.
             i += n - 1;
         }
     }

     *dstlenp = j;

     return true;
 }

 typedef bool (*CountAction)(JSContext*, const UTF8Chars, char16_t*, size_t*, bool* isAsciip);

 static TwoByteCharsZ
 InflateUTF8StringHelper(JSContext* cx, const UTF8Chars src, CountAction countAction, size_t* outlen)
 {
     *outlen = 0;

     bool isAscii;
     if (!countAction(cx, src, /* dst = */ nullptr, outlen, &isAscii))
         return TwoByteCharsZ();

     char16_t* dst = cx->pod_malloc<char16_t>(*outlen + 1);  // +1 for NUL
     if (!dst) {
         ReportOutOfMemory(cx);
         return TwoByteCharsZ();
     }

     if (isAscii) {
         size_t srclen = src.length();
         MOZ_ASSERT(*outlen == srclen);
         for (uint32_t i = 0; i < srclen; i++)
             dst[i] = char16_t(src[i]);

     } else {
         JS_ALWAYS_TRUE(InflateUTF8StringToBuffer<Copy>(cx, src, dst, outlen, &isAscii));
     }

     dst[*outlen] = 0;    // NUL char

     return TwoByteCharsZ(dst, *outlen);
 }

 TwoByteCharsZ
 JS::UTF8CharsToNewTwoByteCharsZ(JSContext* cx, const UTF8Chars utf8, size_t* outlen)
 {
     return InflateUTF8StringHelper(cx, utf8, InflateUTF8StringToBuffer<CountAndReportInvalids>,
                                    outlen);
 }

 TwoByteCharsZ
 JS::LossyUTF8CharsToNewTwoByteCharsZ(JSContext* cx, const UTF8Chars utf8, size_t* outlen)
 {
     return InflateUTF8StringHelper(cx, utf8, InflateUTF8StringToBuffer<CountAndIgnoreInvalids>,
                                    outlen);
 }
	/* -- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 --
	* vim: set ts=8 sts=4 et sw=4 tw=99:
	* This Source Code Form is subject to the terms of the Mozilla Public
	* License, v. 2.0. If a copy of the MPL was not distributed with this
	* file, You can obtain one at http://mozilla.org/MPL/2.0/. */

	#include "js/CharacterEncoding.h"

	#include "mozilla/Range.h"

	#include "jscntxt.h"
	#include "jsprf.h"

	using namespace js;

	Latin1CharsZ
	JS::LossyTwoByteCharsToNewLatin1CharsZ(js::ExclusiveContext* cx,
	const mozilla::Range<const char16_t> tbchars)
	{
	MOZ_ASSERT(cx);
	size_t len = tbchars.length();
	unsigned char* latin1 = cx->pod_malloc<unsigned char>(len + 1);
	if (!latin1)
	return Latin1CharsZ();
	for (size_t i = 0; i < len; ++i)
	latin1[i] = static_cast<unsigned char>(tbchars[i]);
	latin1[len] = '\0';
	return Latin1CharsZ(latin1, len);
	}

	template <typename CharT>
	static size_t
	GetDeflatedUTF8StringLength(const CharT* chars, size_t nchars)
	{
	size_t nbytes = nchars;
	for (const CharT* end = chars + nchars; chars < end; chars++) {
	char16_t c = *chars;
	if (c < 0x80)
	continue;
	uint32_t v;
	if (0xD800 <= c && c <= 0xDFFF) {
	/* nbytes sets 1 length since this is surrogate pair. */
	if (c >= 0xDC00 \|\| (chars + 1) == end) {
	nbytes += 2; /* Bad Surrogate */
	continue;
	}
	char16_t c2 = chars[1];
	if (c2 < 0xDC00 \|\| c2 > 0xDFFF) {
	nbytes += 2; /* Bad Surrogate */
	continue;
	}
	v = ((c - 0xD800) << 10) + (c2 - 0xDC00) + 0x10000;
	nbytes--;
	chars++;
	} else {
	v = c;
	}
	v >>= 11;
	nbytes++;
	while (v) {
	v >>= 5;
	nbytes++;
	}
	}
	return nbytes;
	}

	JS_PUBLIC_API(size_t)
	JS::GetDeflatedUTF8StringLength(JSFlatString* s)
	{
	JS::AutoCheckCannotGC nogc;
	return s->hasLatin1Chars()
	? ::GetDeflatedUTF8StringLength(s->latin1Chars(nogc), s->length())
	: ::GetDeflatedUTF8StringLength(s->twoByteChars(nogc), s->length());
	}

	static void
	PutUTF8ReplacementCharacter(mozilla::RangedPtr<char>& dst)
	{
	*dst++ = char(0xEF);
	*dst++ = char(0xBF);
	*dst++ = char(0xBD);
	}

	template <typename CharT>
	static void
	DeflateStringToUTF8Buffer(const CharT* src, size_t srclen, mozilla::RangedPtr<char> dst)
	{
	while (srclen) {
	uint32_t v;
	char16_t c = *src++;
	srclen--;
	if (c >= 0xDC00 && c <= 0xDFFF) {
	PutUTF8ReplacementCharacter(dst);
	continue;
	} else if (c < 0xD800 \|\| c > 0xDBFF) {
	v = c;
	} else {
	if (srclen < 1) {
	PutUTF8ReplacementCharacter(dst);
	continue;
	}
	char16_t c2 = *src;
	if ((c2 < 0xDC00) \|\| (c2 > 0xDFFF)) {
	PutUTF8ReplacementCharacter(dst);
	continue;
	}
	src++;
	srclen--;
	v = ((c - 0xD800) << 10) + (c2 - 0xDC00) + 0x10000;
	}
	size_t utf8Len;
	if (v < 0x0080) {
	/* no encoding necessary - performance hack */
	*dst++ = char(v);
	utf8Len = 1;
	} else {
	uint8_t utf8buf[4];
	utf8Len = OneUcs4ToUtf8Char(utf8buf, v);
	for (size_t i = 0; i < utf8Len; i++)
	*dst++ = char(utf8buf[i]);
	}
	}
	}

	JS_PUBLIC_API(void)
	JS::DeflateStringToUTF8Buffer(JSFlatString* src, mozilla::RangedPtr<char> dst)
	{
	JS::AutoCheckCannotGC nogc;
	return src->hasLatin1Chars()
	? ::DeflateStringToUTF8Buffer(src->latin1Chars(nogc), src->length(), dst)
	: ::DeflateStringToUTF8Buffer(src->twoByteChars(nogc), src->length(), dst);
	}

	template <typename CharT>
	UTF8CharsZ
	JS::CharsToNewUTF8CharsZ(js::ExclusiveContext* maybeCx, const mozilla::Range<const CharT> chars)
	{
	/* Get required buffer size. */
	const CharT* str = chars.start().get();
	size_t len = ::GetDeflatedUTF8StringLength(str, chars.length());

	/* Allocate buffer. */
	char* utf8;
	if (maybeCx)
	utf8 = maybeCx->pod_malloc<char>(len + 1);
	else
	utf8 = js_pod_malloc<char>(len + 1);
	if (!utf8)
	return UTF8CharsZ();

	/* Encode to UTF8. */
	::DeflateStringToUTF8Buffer(str, chars.length(), mozilla::RangedPtr<char>(utf8, len));
	utf8[len] = '\0';

	return UTF8CharsZ(utf8, len);
	}

	template UTF8CharsZ
	JS::CharsToNewUTF8CharsZ(js::ExclusiveContext* maybeCx,
	const mozilla::Range<const Latin1Char> chars);

	template UTF8CharsZ
	JS::CharsToNewUTF8CharsZ(js::ExclusiveContext* maybeCx,
	const mozilla::Range<const char16_t> chars);

	static const uint32_t INVALID_UTF8 = UINT32_MAX;

	/*
	* Convert a utf8 character sequence into a UCS-4 character and return that
	* character. It is assumed that the caller already checked that the sequence
	* is valid.
	*/
	uint32_t
	JS::Utf8ToOneUcs4Char(const uint8_t* utf8Buffer, int utf8Length)
	{
	MOZ_ASSERT(1 <= utf8Length && utf8Length <= 4);

	if (utf8Length == 1) {
	MOZ_ASSERT(!(*utf8Buffer & 0x80));
	return *utf8Buffer;
	}

	/* from Unicode 3.1, non-shortest form is illegal */
	static const uint32_t minucs4Table[] = { 0x80, 0x800, 0x10000 };

	MOZ_ASSERT((*utf8Buffer & (0x100 - (1 << (7 - utf8Length)))) ==
	(0x100 - (1 << (8 - utf8Length))));
	uint32_t ucs4Char = *utf8Buffer++ & ((1 << (7 - utf8Length)) - 1);
	uint32_t minucs4Char = minucs4Table[utf8Length - 2];
	while (--utf8Length) {
	MOZ_ASSERT((*utf8Buffer & 0xC0) == 0x80);
	ucs4Char = (ucs4Char << 6) \| (*utf8Buffer++ & 0x3F);
	}

	if (MOZ_UNLIKELY(ucs4Char < minucs4Char \|\| (ucs4Char >= 0xD800 && ucs4Char <= 0xDFFF)))
	return INVALID_UTF8;

	return ucs4Char;
	}

	static void
	ReportInvalidCharacter(JSContext* cx, uint32_t offset)
	{
	char buffer[10];
	JS_snprintf(buffer, 10, "%d", offset);
	JS_ReportErrorFlagsAndNumber(cx, JSREPORT_ERROR, GetErrorMessage, nullptr,
	JSMSG_MALFORMED_UTF8_CHAR, buffer);
	}

	static void
	ReportBufferTooSmall(JSContext* cx, uint32_t dummy)
	{
	JS_ReportErrorNumber(cx, GetErrorMessage, nullptr, JSMSG_BUFFER_TOO_SMALL);
	}

	static void
	ReportTooBigCharacter(JSContext* cx, uint32_t v)
	{
	char buffer[10];
	JS_snprintf(buffer, 10, "0x%x", v + 0x10000);
	JS_ReportErrorFlagsAndNumber(cx, JSREPORT_ERROR, GetErrorMessage, nullptr,
	JSMSG_UTF8_CHAR_TOO_LARGE, buffer);
	}

	enum InflateUTF8Action {
	CountAndReportInvalids,
	CountAndIgnoreInvalids,
	Copy
	};

	static const uint32_t REPLACE_UTF8 = 0xFFFD;

	// If making changes to this algorithm, make sure to also update
	// LossyConvertUTF8toUTF16() in dom/wifi/WifiUtils.cpp
	template <InflateUTF8Action action>
	static bool
	InflateUTF8StringToBuffer(JSContext* cx, const UTF8Chars src, char16_t* dst, size_t* dstlenp,
	bool* isAsciip)
	{
	*isAsciip = true;

	// Count how many char16_t characters need to be in the inflated string.
	// \|i\| is the index into \|src\|, and \|j\| is the the index into \|dst\|.
	size_t srclen = src.length();
	uint32_t j = 0;
	for (uint32_t i = 0; i < srclen; i++, j++) {
	uint32_t v = uint32_t(src[i]);
	if (!(v & 0x80)) {
	// ASCII code unit. Simple copy.
	if (action == Copy)
	dst[j] = char16_t(v);

	} else {
	// Non-ASCII code unit. Determine its length in bytes (n).
	*isAsciip = false;
	uint32_t n = 1;
	while (v & (0x80 >> n))
	n++;

	#define INVALID(report, arg, n2) \
	do { \
	if (action == CountAndReportInvalids) { \
	report(cx, arg); \
	return false; \
	} else { \
	if (action == Copy) \
	dst[j] = char16_t(REPLACE_UTF8); \
	else \
	MOZ_ASSERT(action == CountAndIgnoreInvalids); \
	n = n2; \
	goto invalidMultiByteCodeUnit; \
	} \
	} while (0)

	// Check the leading byte.
	if (n < 2 \|\| n > 4)
	INVALID(ReportInvalidCharacter, i, 1);

	// Check that \|src\| is large enough to hold an n-byte code unit.
	if (i + n > srclen)
	INVALID(ReportBufferTooSmall, /* dummy = */ 0, 1);

	// Check the second byte. From Unicode Standard v6.2, Table 3-7
	// Well-Formed UTF-8 Byte Sequences.
	if ((v == 0xE0 && ((uint8_t)src[i + 1] & 0xE0) != 0xA0) \|\| // E0 A0~BF
	(v == 0xED && ((uint8_t)src[i + 1] & 0xE0) != 0x80) \|\| // ED 80~9F
	(v == 0xF0 && ((uint8_t)src[i + 1] & 0xF0) == 0x80) \|\| // F0 90~BF
	(v == 0xF4 && ((uint8_t)src[i + 1] & 0xF0) != 0x80)) // F4 80~8F
	{
	INVALID(ReportInvalidCharacter, i, 1);
	}

	// Check the continuation bytes.
	for (uint32_t m = 1; m < n; m++)
	if ((src[i + m] & 0xC0) != 0x80)
	INVALID(ReportInvalidCharacter, i, m);

	// Determine the code unit's length in char16_t and act accordingly.
	v = JS::Utf8ToOneUcs4Char((uint8_t*)&src[i], n);
	if (v < 0x10000) {
	// The n-byte UTF8 code unit will fit in a single char16_t.
	if (action == Copy)
	dst[j] = char16_t(v);

	} else {
	v -= 0x10000;
	if (v <= 0xFFFFF) {
	// The n-byte UTF8 code unit will fit in two char16_t units.
	if (action == Copy)
	dst[j] = char16_t((v >> 10) + 0xD800);
	j++;
	if (action == Copy)
	dst[j] = char16_t((v & 0x3FF) + 0xDC00);

	} else {
	// The n-byte UTF8 code unit won't fit in two char16_t units.
	INVALID(ReportTooBigCharacter, v, 1);
	}
	}

	invalidMultiByteCodeUnit:
	// Move i to the last byte of the multi-byte code unit; the loop
	// header will do the final i++ to move to the start of the next
	// code unit.
	i += n - 1;
	}
	}

	*dstlenp = j;

	return true;
	}

	typedef bool (CountAction)(JSContext, const UTF8Chars, char16_t, size_t, bool* isAsciip);

	static TwoByteCharsZ
	InflateUTF8StringHelper(JSContext* cx, const UTF8Chars src, CountAction countAction, size_t* outlen)
	{
	*outlen = 0;

	bool isAscii;
	if (!countAction(cx, src, /* dst = */ nullptr, outlen, &isAscii))
	return TwoByteCharsZ();

	char16_t* dst = cx->pod_malloc<char16_t>(*outlen + 1); // +1 for NUL
	if (!dst) {
	ReportOutOfMemory(cx);
	return TwoByteCharsZ();
	}

	if (isAscii) {
	size_t srclen = src.length();
	MOZ_ASSERT(*outlen == srclen);
	for (uint32_t i = 0; i < srclen; i++)
	dst[i] = char16_t(src[i]);

	} else {
	JS_ALWAYS_TRUE(InflateUTF8StringToBuffer<Copy>(cx, src, dst, outlen, &isAscii));
	}

	dst[*outlen] = 0; // NUL char

	return TwoByteCharsZ(dst, *outlen);
	}

	TwoByteCharsZ
	JS::UTF8CharsToNewTwoByteCharsZ(JSContext* cx, const UTF8Chars utf8, size_t* outlen)
	{
	return InflateUTF8StringHelper(cx, utf8, InflateUTF8StringToBuffer<CountAndReportInvalids>,
	outlen);
	}

	TwoByteCharsZ
	JS::LossyUTF8CharsToNewTwoByteCharsZ(JSContext* cx, const UTF8Chars utf8, size_t* outlen)
	{
	return InflateUTF8StringHelper(cx, utf8, InflateUTF8StringToBuffer<CountAndIgnoreInvalids>,
	outlen);
	}