blob: 282a8d7df704146eacf6f632201b4b37b77d3431 [file] [log] [blame]
// Copyright 2021 The Cobalt Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "cobalt/encoding/text_decoder.h"
#include <string>
#include <utility>
#include <vector>
#include "cobalt/base/polymorphic_downcast.h"
#include "cobalt/dom/testing/stub_window.h"
#include "cobalt/script/array_buffer_view.h"
#include "cobalt/script/environment_settings.h"
#include "cobalt/script/testing/mock_exception_state.h"
#include "cobalt/script/typed_arrays.h"
#include "testing/gtest/include/gtest/gtest.h"
using ::testing::_;
namespace cobalt {
namespace encoding {
namespace {
//////////////////////////////////////////////////////////////////////////
// TextDecoderTest
//////////////////////////////////////////////////////////////////////////
class TextDecoderTest : public ::testing::Test {
protected:
TextDecoderTest();
~TextDecoderTest();
cobalt::dom::testing::StubWindow stub_window_;
script::testing::MockExceptionState exception_state_;
};
TextDecoderTest::TextDecoderTest() {}
TextDecoderTest::~TextDecoderTest() {}
//////////////////////////////////////////////////////////////////////////
// Test cases
//////////////////////////////////////////////////////////////////////////
TEST_F(TextDecoderTest, Constructors) {
scoped_refptr<TextDecoder> text_decoder;
EXPECT_CALL(exception_state_, SetSimpleExceptionVA(script::kRangeError, _, _))
.Times(0);
text_decoder = new TextDecoder(&exception_state_);
EXPECT_EQ("utf-8", text_decoder->encoding());
text_decoder.reset();
EXPECT_CALL(exception_state_, SetSimpleExceptionVA(script::kRangeError, _, _))
.Times(0);
text_decoder = new TextDecoder("utf-16", &exception_state_);
// It seems default is little endian.
EXPECT_EQ("utf-16le", text_decoder->encoding());
text_decoder.reset();
EXPECT_CALL(exception_state_, SetSimpleExceptionVA(script::kRangeError, _, _))
.Times(0);
text_decoder = new TextDecoder("utf-16be", &exception_state_);
EXPECT_EQ("utf-16be", text_decoder->encoding());
text_decoder.reset();
EXPECT_CALL(exception_state_,
SetSimpleExceptionVA(script::kRangeError, _, _));
text_decoder = new TextDecoder("foo-encoding", &exception_state_);
EXPECT_EQ("", text_decoder->encoding());
text_decoder.reset();
}
TEST_F(TextDecoderTest, DecodeUTF8) {
EXPECT_CALL(exception_state_, SetSimpleExceptionVA(script::kRangeError, _, _))
.Times(0);
scoped_refptr<TextDecoder> text_decoder_ = new TextDecoder(&exception_state_);
std::vector<std::pair<std::vector<uint8>, std::string>> tests = {
{{72, 101, 108, 108, 111, 32, 119, 111, 114, 108, 100, 33},
"Hello world!"},
{{72, 101, 106, 33, 32, 208, 159, 209, 128, 208, 184, 208, 178, 208,
181, 209, 130, 33, 32, 228, 189, 160, 229, 165, 189, 239, 188, 129},
"Hej! Привет! 你好!"},
{{208, 148, 208, 176, 041}, "Да!"},
};
std::string want;
std::vector<uint8> data;
for (const auto &test : tests) {
std::tie(data, want) = test;
script::Handle<script::ArrayBuffer> array_buffer = script::ArrayBuffer::New(
stub_window_.global_environment(), data.data(), data.size());
EXPECT_CALL(exception_state_,
SetSimpleExceptionVA(script::kRangeError, _, _))
.Times(0);
std::string got = text_decoder_->Decode(dom::BufferSource(array_buffer),
&exception_state_);
EXPECT_EQ(got, want);
}
}
TEST_F(TextDecoderTest, DecodeUTF8Surrogates) {
EXPECT_CALL(exception_state_, SetSimpleExceptionVA(script::kRangeError, _, _))
.Times(0);
scoped_refptr<TextDecoder> text_decoder_ = new TextDecoder(&exception_state_);
std::vector<std::pair<std::vector<uint8>, std::string>> tests = {
{{0x61, 0x62, 0x63, 0x31, 0x32, 0x33}, "abc123"}, // Sanity check
{{0xef, 0xbf, 0xbd}, "\xEF\xBF\xBD"}, // Surrogate half (low)
{{0x61, 0x62, 0x63, 0xef, 0xbf, 0xbd, 0x31, 0x32, 0x33},
"abc\xEF\xBF\xBD"
"123"}, // Surrogate half (high)
{{0xef, 0xbf, 0xbd, 0xef, 0xbf, 0xbd},
"\xEF\xBF\xBD\xEF\xBF\xBD"}, // Wrong order
{{0xf0, 0x90, 0x80, 0x80}, "𐀀"}, // Right order
};
std::string want;
std::vector<uint8> data;
for (const auto &test : tests) {
std::tie(data, want) = test;
script::Handle<script::ArrayBuffer> array_buffer = script::ArrayBuffer::New(
stub_window_.global_environment(), data.data(), data.size());
EXPECT_CALL(exception_state_,
SetSimpleExceptionVA(script::kRangeError, _, _))
.Times(0);
std::string got = text_decoder_->Decode(dom::BufferSource(array_buffer),
&exception_state_);
EXPECT_EQ(got, want);
}
}
TEST_F(TextDecoderTest, DecodeUTF8Fatal) {
std::vector<std::pair<std::string, std::vector<uint8>>> tests = {
{"utf-8", {0xFF}}, // Invalid code
{"utf-8", {0xC0}}, // Ends early
{"utf-8", {0xE0}}, // Ends early (2)
{"utf-8", {0xC0, 0x00}}, // Invalid trail
{"utf-8", {0xC0, 0xC0}}, // Invalid trail (2)
{"utf-8", {0xE0, 0x00}}, // Invalid trail (3)
{"utf-8", {0xE0, 0xC0}}, // Invalid trail (4)
{"utf-8", {0xE0, 0x80, 0x00}}, // Invalid trail (5)
{"utf-8", {0xE0, 0x80, 0xC0}}, // Invalid trail (6)
{"utf-8", {0xFC, 0x80, 0x80, 0x80, 0x80, 0x80}}, // > 0x10FFFF
{"utf-8", {0xFE, 0x80, 0x80, 0x80, 0x80, 0x80}}, // Obsolete lead byte
{"utf-8", {0xC0, 0x80}}, // Overlong U+0000 - 2 bytes
{"utf-8", {0xE0, 0x80, 0x80}}, // overlong U+0000 - 3 bytes
{"utf-8", {0xF0, 0x80, 0x80, 0x80}}, // Overlong U+0000 - 4 bytes
{"utf-8", {0xF8, 0x80, 0x80, 0x80, 0x80}}, // Overlong U+0000 - 5 bytes
{"utf-8",
{0xFC, 0x80, 0x80, 0x80, 0x80, 0x80}}, // Overlong U+0000 - 6 bytes
{"utf-8", {0xC1, 0xBF}}, // Overlong U+007F - 2 bytes
{"utf-8", {0xE0, 0x81, 0xBF}}, // Overlong U+007F - 3 bytes
{"utf-8", {0xF0, 0x80, 0x81, 0xBF}}, // Overlong U+007F - 4 bytes
{"utf-8", {0xF8, 0x80, 0x80, 0x81, 0xBF}}, // Overlong U+007F - 5 bytes
{"utf-8",
{0xFC, 0x80, 0x80, 0x80, 0x81, 0xBF}}, // Overlong U+007F - 6 bytes
{"utf-8", {0xE0, 0x9F, 0xBF}}, // Overlong U+07FF - 3 bytes
{"utf-8", {0xF0, 0x80, 0x9F, 0xBF}}, // Overlong U+07FF - 4 bytes
{"utf-8", {0xF8, 0x80, 0x80, 0x9F, 0xBF}}, // Overlong U+07FF - 5 bytes
{"utf-8",
{0xFC, 0x80, 0x80, 0x80, 0x9F, 0xBF}}, // Overlong U+07FF - 6 bytes
{"utf-8", {0xF0, 0x8F, 0xBF, 0xBF}}, // Overlong U+FFFF - 4 bytes
{"utf-8", {0xF8, 0x80, 0x8F, 0xBF, 0xBF}}, // Overlong U+FFFF - 5 bytes
{"utf-8",
{0xFC, 0x80, 0x80, 0x8F, 0xBF, 0xBF}}, // Overlong U+FFFF - 6 bytes
{"utf-8", {0xF8, 0x84, 0x8F, 0xBF, 0xBF}}, // Overlong U+10FFFF - 5 bytes
{"utf-8",
{0xFC, 0x80, 0x84, 0x8F, 0xBF, 0xBF}}, // Overlong U+10FFFF - 6 bytes
{"utf-8", {0xED, 0xA0, 0x80}}, // Lead surrogate
{"utf-8", {0xED, 0xB0, 0x80}}, // Trail surrogate
{"utf-8", {0xED, 0xA0, 0x80, 0xED, 0xB0, 0x80}}, // Surrogate pair
{"utf-16le", {0x00}}, // Truncated code unit
};
std::string label, want = "";
std::vector<uint8> input;
TextDecoderOptions options;
options.set_fatal(true);
for (const auto &test : tests) {
std::tie(label, input) = test;
// No errors expected while constructing the object.
EXPECT_CALL(exception_state_,
SetSimpleExceptionVA(script::kRangeError, _, _))
.Times(0);
scoped_refptr<TextDecoder> text_decoder_ =
new TextDecoder(label, options, &exception_state_);
script::Handle<script::ArrayBuffer> array_buffer = script::ArrayBuffer::New(
stub_window_.global_environment(), input.data(), input.size());
// Malformed strings must race an exception when fatal is set to true.
EXPECT_CALL(exception_state_,
SetSimpleExceptionVA(script::kTypeError, _, _))
.Times(1);
std::string got = text_decoder_->Decode(dom::BufferSource(array_buffer),
&exception_state_);
EXPECT_EQ(got, want);
}
}
TEST_F(TextDecoderTest, DecodeIgnoreBOM) {
std::vector<std::pair<std::string, std::vector<uint8>>> tests = {
{"utf-8", {0xEF, 0xBB, 0xBF, 0x61, 0x62, 0x63}},
{"utf-16le", {0xFF, 0xFE, 0x61, 0x00, 0x62, 0x00, 0x63, 0x00}},
{"utf-16be", {0xFE, 0xFF, 0x00, 0x61, 0x00, 0x62, 0x00, 0x63}},
};
// u8 prefix needed for compilation on Windows.
const std::string kBOM = u8"\uFEFF";
const std::string kABC = "abc";
std::string label;
std::vector<uint8> data;
TextDecoderOptions options;
for (const auto &test : tests) {
std::tie(label, data) = test;
{ // BOM should be present in decoded string if ignored.
options.set_ignore_bom(true);
EXPECT_CALL(exception_state_,
SetSimpleExceptionVA(script::kRangeError, _, _))
.Times(0);
scoped_refptr<TextDecoder> text_decoder_ =
new TextDecoder(label, options, &exception_state_);
script::Handle<script::ArrayBuffer> array_buffer =
script::ArrayBuffer::New(stub_window_.global_environment(),
data.data(), data.size());
EXPECT_CALL(exception_state_,
SetSimpleExceptionVA(script::kRangeError, _, _))
.Times(0);
std::string got = text_decoder_->Decode(dom::BufferSource(array_buffer),
&exception_state_);
std::string want = kBOM + kABC;
EXPECT_EQ(got, want);
}
{ // BOM should be absent from decoded string if not ignored.
options.set_ignore_bom(false);
EXPECT_CALL(exception_state_,
SetSimpleExceptionVA(script::kRangeError, _, _))
.Times(0);
scoped_refptr<TextDecoder> text_decoder_ =
new TextDecoder(label, options, &exception_state_);
script::Handle<script::ArrayBuffer> array_buffer =
script::ArrayBuffer::New(stub_window_.global_environment(),
data.data(), data.size());
EXPECT_CALL(exception_state_,
SetSimpleExceptionVA(script::kRangeError, _, _))
.Times(0);
std::string got = text_decoder_->Decode(dom::BufferSource(array_buffer),
&exception_state_);
std::string want = kABC;
EXPECT_EQ(got, want);
}
{ // BOM should be absent from decoded string by default.
EXPECT_CALL(exception_state_,
SetSimpleExceptionVA(script::kRangeError, _, _))
.Times(0);
scoped_refptr<TextDecoder> text_decoder_ =
new TextDecoder(label, &exception_state_);
script::Handle<script::ArrayBuffer> array_buffer =
script::ArrayBuffer::New(stub_window_.global_environment(),
data.data(), data.size());
EXPECT_CALL(exception_state_,
SetSimpleExceptionVA(script::kRangeError, _, _))
.Times(0);
std::string got = text_decoder_->Decode(dom::BufferSource(array_buffer),
&exception_state_);
std::string want = kABC;
EXPECT_EQ(got, want);
}
}
}
TEST_F(TextDecoderTest, DecodeUTFStreamSimple) {
std::string want = "abc123Да!𐀀";
std::vector<std::pair<std::string, std::vector<uint8>>> tests = {
{
"utf-8",
{0x61, 0x62, 0x63, 0x31, 0x32, 0x33, 0xd0, 0x94, 0xd0, 0xb0, 0x21,
0xf0, 0x90, 0x80, 0x80},
},
{
"utf-16be",
{0x00, 0x61, 0x00, 0x62, 0x00, 0x63, 0x00, 0x31, 0x00, 0x32, 0x00,
0x33, 0x04, 0x14, 0x04, 0x30, 0x00, 0x21, 0xd8, 0x00, 0xdc, 0x00},
},
{
"utf-16le",
{0x61, 0x00, 0x62, 0x00, 0x63, 0x00, 0x31, 0x00, 0x32, 0x00, 0x33,
0x00, 0x14, 0x04, 0x30, 0x04, 0x21, 0x00, 0x00, 0xd8, 0x00, 0xdc},
},
};
std::string encoding_label;
std::vector<uint8> data;
TextDecodeOptions stream_option;
stream_option.set_stream(true);
for (int chunk_size = 1; chunk_size <= 1; ++chunk_size) {
for (const auto &test : tests) {
std::tie(encoding_label, data) = test;
EXPECT_CALL(exception_state_,
SetSimpleExceptionVA(script::kRangeError, _, _))
.Times(0);
scoped_refptr<TextDecoder> text_decoder_ =
new TextDecoder(encoding_label, &exception_state_);
std::string got;
for (std::size_t offset = 0; offset < data.size(); offset += chunk_size) {
const auto start = data.begin() + offset;
const auto end = (offset + chunk_size >= data.size())
? data.end()
: data.begin() + (offset + chunk_size);
std::vector<uint8> byte_chunk(start, end);
script::Handle<script::ArrayBuffer> chunk =
script::ArrayBuffer::New(stub_window_.global_environment(),
byte_chunk.data(), byte_chunk.size());
EXPECT_CALL(exception_state_,
SetSimpleExceptionVA(script::kRangeError, _, _))
.Times(0);
// decoding with {stream: true}
got += text_decoder_->Decode(dom::BufferSource(chunk), stream_option,
&exception_state_);
}
EXPECT_CALL(exception_state_,
SetSimpleExceptionVA(script::kRangeError, _, _))
.Times(0);
got += text_decoder_->Decode(&exception_state_);
EXPECT_EQ(got, want);
}
}
}
TEST_F(TextDecoderTest, DecodeUTF16) {
EXPECT_CALL(exception_state_, SetSimpleExceptionVA(script::kRangeError, _, _))
.Times(0);
scoped_refptr<TextDecoder> text_decoder_ =
new TextDecoder("utf-16", &exception_state_);
std::vector<std::pair<std::vector<uint8>, std::string>> tests = {
{{0x14, 0x04, 0x30, 0x04, 0x21, 0x00}, "Да!"},
};
std::string want;
std::vector<uint8> data;
for (const auto &test : tests) {
std::tie(data, want) = test;
script::Handle<script::ArrayBuffer> array_buffer = script::ArrayBuffer::New(
stub_window_.global_environment(), data.data(), data.size());
EXPECT_CALL(exception_state_,
SetSimpleExceptionVA(script::kRangeError, _, _))
.Times(0);
std::string got = text_decoder_->Decode(dom::BufferSource(array_buffer),
&exception_state_);
EXPECT_EQ(got, want);
}
}
TEST_F(TextDecoderTest, DecodeUTF16BE) {
EXPECT_CALL(exception_state_, SetSimpleExceptionVA(script::kRangeError, _, _))
.Times(0);
scoped_refptr<TextDecoder> text_decoder_ =
new TextDecoder("utf-16be", &exception_state_);
std::vector<std::pair<std::vector<uint8>, std::string>> tests = {
{{0x04, 0x14, 0x04, 0x30, 0x00, 0x21}, "Да!"},
};
std::string want;
std::vector<uint8> data;
for (const auto &test : tests) {
std::tie(data, want) = test;
script::Handle<script::ArrayBuffer> array_buffer = script::ArrayBuffer::New(
stub_window_.global_environment(), data.data(), data.size());
EXPECT_CALL(exception_state_,
SetSimpleExceptionVA(script::kRangeError, _, _))
.Times(0);
std::string got = text_decoder_->Decode(dom::BufferSource(array_buffer),
&exception_state_);
EXPECT_EQ(got, want);
}
}
} // namespace
} // namespace encoding
} // namespace cobalt