blob: d222a353af1091d3328cc62a0666f9e25eb0c9fd [file] [log] [blame]
// Copyright 2015 The Cobalt Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "cobalt/dom_parser/html_decoder.h"
#include <memory>
#include "base/callback.h"
#include "base/message_loop/message_loop.h"
#include "base/optional.h"
#include "base/threading/platform_thread.h"
#include "cobalt/dom/attr.h"
#include "cobalt/dom/document.h"
#include "cobalt/dom/dom_stat_tracker.h"
#include "cobalt/dom/element.h"
#include "cobalt/dom/html_collection.h"
#include "cobalt/dom/html_element_context.h"
#include "cobalt/dom/named_node_map.h"
#include "cobalt/dom/testing/stub_css_parser.h"
#include "cobalt/dom/testing/stub_environment_settings.h"
#include "cobalt/dom/testing/stub_script_runner.h"
#include "cobalt/dom/text.h"
#include "cobalt/dom_parser/parser.h"
#include "cobalt/loader/fetcher_factory.h"
#include "cobalt/loader/loader_factory.h"
#include "testing/gmock/include/gmock/gmock.h"
#include "testing/gtest/include/gtest/gtest.h"
namespace cobalt {
namespace dom_parser {
const int kDOMMaxElementDepth = 32;
class MockLoadCompleteCallback
: public base::Callback<void(const base::Optional<std::string>&)> {
public:
MOCK_METHOD1(Run, void(const base::Optional<std::string>&));
};
class HTMLDecoderTest : public ::testing::Test {
protected:
HTMLDecoderTest();
~HTMLDecoderTest() override {}
dom::testing::StubEnvironmentSettings environment_settings_;
base::NullDebuggerHooks null_debugger_hooks_;
loader::FetcherFactory fetcher_factory_;
loader::LoaderFactory loader_factory_;
std::unique_ptr<Parser> dom_parser_;
dom::testing::StubCSSParser stub_css_parser_;
dom::testing::StubScriptRunner stub_script_runner_;
std::unique_ptr<dom::DomStatTracker> dom_stat_tracker_;
dom::HTMLElementContext html_element_context_;
scoped_refptr<dom::Document> document_;
scoped_refptr<dom::Element> root_;
base::SourceLocation source_location_;
MockLoadCompleteCallback mock_load_complete_callback_;
std::unique_ptr<HTMLDecoder> html_decoder_;
base::MessageLoop message_loop_;
};
HTMLDecoderTest::HTMLDecoderTest()
: fetcher_factory_(NULL /* network_module */),
loader_factory_("Test" /* name */, &fetcher_factory_,
NULL /* ResourceProvider */, null_debugger_hooks_,
0 /* encoded_image_cache_capacity */,
base::ThreadPriority::DEFAULT),
dom_parser_(new Parser()),
dom_stat_tracker_(new dom::DomStatTracker("HTMLDecoderTest")),
html_element_context_(
&environment_settings_, &fetcher_factory_, &loader_factory_,
&stub_css_parser_, dom_parser_.get(),
NULL /* can_play_type_handler */, NULL /* web_media_player_factory */,
&stub_script_runner_, NULL /* script_value_factory */, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, dom_stat_tracker_.get(), "",
base::kApplicationStateStarted, NULL),
document_(new dom::Document(&html_element_context_)),
root_(new dom::Element(document_, base::Token("element"))),
source_location_(base::SourceLocation("[object HTMLDecoderTest]", 1, 1)) {
}
TEST_F(HTMLDecoderTest, CanParseEmptyDocument) {
const std::string input = "";
html_decoder_.reset(new HTMLDecoder(
document_, document_, NULL, kDOMMaxElementDepth, source_location_,
base::Bind(&MockLoadCompleteCallback::Run,
base::Unretained(&mock_load_complete_callback_)),
true, csp::kCSPRequired));
html_decoder_->DecodeChunk(input.c_str(), input.length());
html_decoder_->Finish();
dom::Element* element = document_->first_element_child();
ASSERT_TRUE(element);
EXPECT_EQ("html", element->local_name());
element = element->first_element_child();
ASSERT_TRUE(element);
EXPECT_EQ("body", element->local_name());
}
// TODO: Currently HTMLDecoder is using libxml2 SAX parser. It doesn't
// correctly add the implied tags according to HTML5. Enable the disabled tests
// after switching to new parser.
TEST_F(HTMLDecoderTest, DISABLED_CanParseDocumentWithOnlyNulls) {
unsigned char temp[] = {0x0, 0x0};
html_decoder_.reset(new HTMLDecoder(
document_, document_, NULL, kDOMMaxElementDepth, source_location_,
base::Bind(&MockLoadCompleteCallback::Run,
base::Unretained(&mock_load_complete_callback_)),
true, csp::kCSPRequired));
html_decoder_->DecodeChunk(reinterpret_cast<char*>(temp), sizeof(temp));
html_decoder_->Finish();
dom::Element* element = document_->first_element_child();
ASSERT_TRUE(element);
EXPECT_EQ("html", element->local_name());
element = element->first_element_child();
ASSERT_TRUE(element);
EXPECT_EQ("body", element->local_name());
}
TEST_F(HTMLDecoderTest, DISABLED_CanParseDocumentWithOnlySpaces) {
const std::string input = " ";
html_decoder_.reset(new HTMLDecoder(
document_, document_, NULL, kDOMMaxElementDepth, source_location_,
base::Bind(&MockLoadCompleteCallback::Run,
base::Unretained(&mock_load_complete_callback_)),
true, csp::kCSPRequired));
html_decoder_->DecodeChunk(input.c_str(), input.length());
html_decoder_->Finish();
dom::Element* element = document_->first_element_child();
ASSERT_TRUE(element);
EXPECT_EQ("html", element->local_name());
element = element->first_element_child();
ASSERT_TRUE(element);
EXPECT_EQ("body", element->local_name());
}
TEST_F(HTMLDecoderTest, DISABLED_CanParseDocumentWithOnlyHTML) {
const std::string input = "<html>";
html_decoder_.reset(new HTMLDecoder(
document_, document_, NULL, kDOMMaxElementDepth, source_location_,
base::Bind(&MockLoadCompleteCallback::Run,
base::Unretained(&mock_load_complete_callback_)),
true, csp::kCSPRequired));
html_decoder_->DecodeChunk(input.c_str(), input.length());
html_decoder_->Finish();
dom::Element* element = document_->first_element_child();
ASSERT_TRUE(element);
EXPECT_EQ("html", element->local_name());
element = element->first_element_child();
ASSERT_TRUE(element);
EXPECT_EQ("body", element->local_name());
}
TEST_F(HTMLDecoderTest, CanParseDocumentWithOnlyBody) {
const std::string input = "<body>";
html_decoder_.reset(new HTMLDecoder(
document_, document_, NULL, kDOMMaxElementDepth, source_location_,
base::Bind(&MockLoadCompleteCallback::Run,
base::Unretained(&mock_load_complete_callback_)),
true, csp::kCSPRequired));
html_decoder_->DecodeChunk(input.c_str(), input.length());
html_decoder_->Finish();
dom::Element* element = document_->first_element_child();
ASSERT_TRUE(element);
EXPECT_EQ("html", element->local_name());
element = element->first_element_child();
ASSERT_TRUE(element);
EXPECT_EQ("body", element->local_name());
}
TEST_F(HTMLDecoderTest, DecodingWholeDocumentShouldAddImpliedTags) {
const std::string input = "<p></p>";
html_decoder_.reset(new HTMLDecoder(
document_, document_, NULL, kDOMMaxElementDepth, source_location_,
base::Bind(&MockLoadCompleteCallback::Run,
base::Unretained(&mock_load_complete_callback_)),
true, csp::kCSPRequired));
html_decoder_->DecodeChunk(input.c_str(), input.length());
html_decoder_->Finish();
dom::Element* element = document_->first_element_child();
ASSERT_TRUE(element);
EXPECT_EQ("html", element->local_name());
element = element->first_element_child();
ASSERT_TRUE(element);
EXPECT_EQ("body", element->local_name());
element = element->first_element_child();
ASSERT_TRUE(element);
EXPECT_EQ("p", element->local_name());
EXPECT_FALSE(element->HasChildNodes());
}
TEST_F(HTMLDecoderTest, DecodingDocumentFragmentShouldNotAddImpliedTags) {
const std::string input = "<p></p>";
html_decoder_.reset(new HTMLDecoder(
document_, root_, NULL, kDOMMaxElementDepth, source_location_,
base::Bind(&MockLoadCompleteCallback::Run,
base::Unretained(&mock_load_complete_callback_)),
true, csp::kCSPRequired));
html_decoder_->DecodeChunk(input.c_str(), input.length());
html_decoder_->Finish();
dom::Element* element = root_->first_element_child();
ASSERT_TRUE(element);
EXPECT_EQ("p", element->local_name());
EXPECT_FALSE(element->HasChildNodes());
}
TEST_F(HTMLDecoderTest, CanParseAttributesWithAndWithoutValue) {
const std::string input = "<div a b=2 c d></div>";
html_decoder_.reset(new HTMLDecoder(
document_, root_, NULL, kDOMMaxElementDepth, source_location_,
base::Bind(&MockLoadCompleteCallback::Run,
base::Unretained(&mock_load_complete_callback_)),
true, csp::kCSPRequired));
html_decoder_->DecodeChunk(input.c_str(), input.length());
html_decoder_->Finish();
dom::Element* element = root_->first_element_child();
ASSERT_TRUE(element);
EXPECT_EQ("div", element->local_name());
EXPECT_TRUE(element->HasAttributes());
EXPECT_EQ(4, element->attributes()->length());
ASSERT_NE(nullptr, element->attributes()->GetNamedItem("a"));
EXPECT_EQ("a", element->attributes()->GetNamedItem("a")->name());
EXPECT_EQ("", element->attributes()->GetNamedItem("a")->value());
ASSERT_NE(nullptr, element->attributes()->GetNamedItem("b"));
EXPECT_EQ("b", element->attributes()->GetNamedItem("b")->name());
EXPECT_EQ("2", element->attributes()->GetNamedItem("b")->value());
ASSERT_NE(nullptr, element->attributes()->GetNamedItem("c"));
EXPECT_EQ("c", element->attributes()->GetNamedItem("c")->name());
EXPECT_EQ("", element->attributes()->GetNamedItem("c")->value());
ASSERT_NE(nullptr, element->attributes()->GetNamedItem("d"));
EXPECT_EQ("d", element->attributes()->GetNamedItem("d")->name());
EXPECT_EQ("", element->attributes()->GetNamedItem("d")->value());
}
TEST_F(HTMLDecoderTest, CanParseIncompleteAttributesAssignment) {
const std::string input = "<div a= b=2 c=></div>";
html_decoder_.reset(new HTMLDecoder(
document_, root_, NULL, kDOMMaxElementDepth, source_location_,
base::Bind(&MockLoadCompleteCallback::Run,
base::Unretained(&mock_load_complete_callback_)),
true, csp::kCSPRequired));
html_decoder_->DecodeChunk(input.c_str(), input.length());
html_decoder_->Finish();
dom::Element* element = root_->first_element_child();
ASSERT_TRUE(element);
EXPECT_EQ("div", element->local_name());
EXPECT_TRUE(element->HasAttributes());
EXPECT_EQ(2, element->attributes()->length());
ASSERT_NE(nullptr, element->attributes()->GetNamedItem("a"));
EXPECT_EQ("a", element->attributes()->GetNamedItem("a")->name());
EXPECT_EQ("b=2", element->attributes()->GetNamedItem("a")->value());
ASSERT_NE(nullptr, element->attributes()->GetNamedItem("c"));
EXPECT_EQ("c", element->attributes()->GetNamedItem("c")->name());
EXPECT_EQ("", element->attributes()->GetNamedItem("c")->value());
}
TEST_F(HTMLDecoderTest, CanParseSelfClosingTags) {
const std::string input = "<p><p>";
html_decoder_.reset(new HTMLDecoder(
document_, root_, NULL, kDOMMaxElementDepth, source_location_,
base::Bind(&MockLoadCompleteCallback::Run,
base::Unretained(&mock_load_complete_callback_)),
true, csp::kCSPRequired));
html_decoder_->DecodeChunk(input.c_str(), input.length());
html_decoder_->Finish();
dom::Element* element = root_->first_element_child();
ASSERT_TRUE(element);
EXPECT_EQ("p", element->local_name());
element = element->next_element_sibling();
ASSERT_TRUE(element);
EXPECT_EQ("p", element->local_name());
}
TEST_F(HTMLDecoderTest, CanParseNormalCharacters) {
const std::string input = "<p>text</p>";
html_decoder_.reset(new HTMLDecoder(
document_, root_, NULL, kDOMMaxElementDepth, source_location_,
base::Bind(&MockLoadCompleteCallback::Run,
base::Unretained(&mock_load_complete_callback_)),
true, csp::kCSPRequired));
html_decoder_->DecodeChunk(input.c_str(), input.length());
html_decoder_->Finish();
dom::Element* element = root_->first_element_child();
ASSERT_TRUE(element);
EXPECT_EQ("p", element->local_name());
dom::Text* text = element->first_child()->AsText();
ASSERT_TRUE(text);
EXPECT_EQ("text", text->data());
}
TEST_F(HTMLDecoderTest, ShouldIgnoreNonUTF8Input) {
unsigned char temp[] = {0xff, 0xff};
html_decoder_.reset(new HTMLDecoder(
document_, root_, NULL, kDOMMaxElementDepth, source_location_,
base::Bind(&MockLoadCompleteCallback::Run,
base::Unretained(&mock_load_complete_callback_)),
true, csp::kCSPRequired));
html_decoder_->DecodeChunk(reinterpret_cast<char*>(temp), sizeof(temp));
html_decoder_->Finish();
ASSERT_FALSE(root_->first_child());
}
// Test a decimal and hex escaped supplementary (not in BMP) character.
TEST_F(HTMLDecoderTest, CanParseEscapedCharacters) {
const std::string input = "<p>&#128169;</p><p>&#x1f4a9;</p>";
html_decoder_.reset(new HTMLDecoder(
document_, root_, NULL, kDOMMaxElementDepth, source_location_,
base::Bind(&MockLoadCompleteCallback::Run,
base::Unretained(&mock_load_complete_callback_)),
true, csp::kCSPRequired));
html_decoder_->DecodeChunk(input.c_str(), input.length());
html_decoder_->Finish();
dom::Element* element = root_->first_element_child();
ASSERT_TRUE(element);
EXPECT_EQ("p", element->local_name());
dom::Text* text = element->first_child()->AsText();
ASSERT_TRUE(text);
EXPECT_EQ("💩", text->data());
element = element->next_element_sibling();
ASSERT_TRUE(element);
EXPECT_EQ("p", element->local_name());
text = element->first_child()->AsText();
ASSERT_TRUE(text);
EXPECT_EQ("💩", text->data());
}
// Test an escaped invalid Unicode character.
TEST_F(HTMLDecoderTest, CanParseEscapedInvalidUnicodeCharacters) {
const std::string input = "<p>&#xe62b;</p>";
html_decoder_.reset(new HTMLDecoder(
document_, root_, NULL, kDOMMaxElementDepth, source_location_,
base::Bind(&MockLoadCompleteCallback::Run,
base::Unretained(&mock_load_complete_callback_)),
true, csp::kCSPRequired));
html_decoder_->DecodeChunk(input.c_str(), input.length());
html_decoder_->Finish();
dom::Element* element = root_->first_element_child();
ASSERT_TRUE(element);
EXPECT_EQ("p", element->local_name());
dom::Text* text = element->first_child()->AsText();
ASSERT_TRUE(text);
EXPECT_EQ("\xEE\x98\xAB", text->data());
}
// Test a UTF8 encoded supplementary (not in BMP) character.
TEST_F(HTMLDecoderTest, CanParseUTF8EncodedSupplementaryCharacters) {
const std::string input = "<p>💩</p>";
html_decoder_.reset(new HTMLDecoder(
document_, root_, NULL, kDOMMaxElementDepth, source_location_,
base::Bind(&MockLoadCompleteCallback::Run,
base::Unretained(&mock_load_complete_callback_)),
true, csp::kCSPRequired));
html_decoder_->DecodeChunk(input.c_str(), input.length());
html_decoder_->Finish();
dom::Element* element = root_->first_element_child();
ASSERT_TRUE(element);
EXPECT_EQ("p", element->local_name());
dom::Text* text = element->first_child()->AsText();
ASSERT_TRUE(text);
EXPECT_EQ("💩", text->data());
}
TEST_F(HTMLDecoderTest, CanParseUTF8SplitInChunks) {
const std::string input = "<p>💩</p>";
for (size_t first_chunk_size = 0; first_chunk_size < input.length();
first_chunk_size++) {
root_ = new dom::Element(document_, base::Token("element"));
html_decoder_.reset(new HTMLDecoder(
document_, root_, NULL, kDOMMaxElementDepth, source_location_,
base::Bind(&MockLoadCompleteCallback::Run,
base::Unretained(&mock_load_complete_callback_)),
true, csp::kCSPRequired));
// This could cut the input in the middle of a UTF8 character.
html_decoder_->DecodeChunk(input.c_str(), first_chunk_size);
html_decoder_->DecodeChunk(input.c_str() + first_chunk_size,
input.length() - first_chunk_size);
html_decoder_->Finish();
dom::Element* element = root_->first_element_child();
ASSERT_TRUE(element);
EXPECT_EQ("p", element->local_name());
dom::Text* text = element->first_child()->AsText();
ASSERT_TRUE(text);
EXPECT_EQ("💩", text->data());
}
}
// Misnested tags: <b><i></b></i>
// https://www.w3.org/TR/html50/syntax.html#misnested-tags:-b-i-/b-/i
//
// The current version DOES NOT handle the error as outlined in the link above.
TEST_F(HTMLDecoderTest, CanParseMisnestedTags1) {
const std::string input = "<p>1<b>2<i>3</b>4</i>5</p>";
html_decoder_.reset(new HTMLDecoder(
document_, root_, NULL, kDOMMaxElementDepth, source_location_,
base::Bind(&MockLoadCompleteCallback::Run,
base::Unretained(&mock_load_complete_callback_)),
true, csp::kCSPRequired));
html_decoder_->DecodeChunk(input.c_str(), input.length());
html_decoder_->Finish();
dom::Element* element = root_->first_element_child();
ASSERT_TRUE(element);
EXPECT_EQ("p", element->local_name());
element = element->first_element_child();
ASSERT_TRUE(element);
EXPECT_EQ("b", element->local_name());
element = element->first_element_child();
ASSERT_TRUE(element);
EXPECT_EQ("i", element->local_name());
}
// Misnested tags: <b><p></b></p>
// https://www.w3.org/TR/html50/syntax.html#misnested-tags:-b-p-/b-/p
//
// The current version DOES NOT handle the error as outlined in the link above.
TEST_F(HTMLDecoderTest, CanParseMisnestedTags2) {
const std::string input = "<b>1<p>2</b>3</p>";
html_decoder_.reset(new HTMLDecoder(
document_, root_, NULL, kDOMMaxElementDepth, source_location_,
base::Bind(&MockLoadCompleteCallback::Run,
base::Unretained(&mock_load_complete_callback_)),
true, csp::kCSPRequired));
html_decoder_->DecodeChunk(input.c_str(), input.length());
html_decoder_->Finish();
dom::Element* element = root_->first_element_child();
ASSERT_TRUE(element);
EXPECT_EQ("b", element->local_name());
element = element->next_element_sibling();
ASSERT_TRUE(element);
EXPECT_EQ("p", element->local_name());
}
TEST_F(HTMLDecoderTest, TagNamesShouldBeCaseInsensitive) {
const std::string input = "<DIV></DIV>";
html_decoder_.reset(new HTMLDecoder(
document_, root_, NULL, kDOMMaxElementDepth, source_location_,
base::Bind(&MockLoadCompleteCallback::Run,
base::Unretained(&mock_load_complete_callback_)),
true, csp::kCSPRequired));
html_decoder_->DecodeChunk(input.c_str(), input.length());
html_decoder_->Finish();
dom::Element* element = root_->first_element_child();
ASSERT_TRUE(element);
EXPECT_EQ("div", element->local_name());
}
TEST_F(HTMLDecoderTest, AttributesShouldBeCaseInsensitive) {
const std::string input = "<div A></div>";
html_decoder_.reset(new HTMLDecoder(
document_, root_, NULL, kDOMMaxElementDepth, source_location_,
base::Bind(&MockLoadCompleteCallback::Run,
base::Unretained(&mock_load_complete_callback_)),
true, csp::kCSPRequired));
html_decoder_->DecodeChunk(input.c_str(), input.length());
html_decoder_->Finish();
dom::Element* element = root_->first_element_child();
ASSERT_TRUE(element);
EXPECT_EQ("div", element->local_name());
EXPECT_TRUE(element->HasAttributes());
EXPECT_EQ(1, element->attributes()->length());
EXPECT_EQ("a", element->attributes()->Item(0)->name());
}
TEST_F(HTMLDecoderTest, LibxmlDecodingErrorShouldTerminateParsing) {
const std::string input =
"<html>"
"<head>"
"<meta content='text/html; charset=gb2312' http-equiv=Content-Type>"
"</head>"
"<body>陈绮贞</body>"
"</html>";
html_decoder_.reset(new HTMLDecoder(
document_, document_, NULL, kDOMMaxElementDepth, source_location_,
base::Bind(&MockLoadCompleteCallback::Run,
base::Unretained(&mock_load_complete_callback_)),
true, csp::kCSPRequired));
html_decoder_->DecodeChunk(input.c_str(), input.length());
html_decoder_->Finish();
root_ = document_->first_element_child();
ASSERT_TRUE(root_);
EXPECT_EQ("html", root_->local_name());
dom::Element* head = root_->first_element_child();
ASSERT_TRUE(head);
EXPECT_EQ("head", head->local_name());
}
} // namespace dom_parser
} // namespace cobalt