blob: d7cf50f9d6e6962a5594ed203f8ae95f9944af33 [file] [log] [blame]
// Copyright 2019 The Cobalt Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "cobalt/cssom/serializer.h"
#include "cobalt/cssom/active_pseudo_class.h"
#include "cobalt/cssom/after_pseudo_element.h"
#include "cobalt/cssom/attribute_selector.h"
#include "cobalt/cssom/before_pseudo_element.h"
#include "cobalt/cssom/class_selector.h"
#include "cobalt/cssom/complex_selector.h"
#include "cobalt/cssom/compound_selector.h"
#include "cobalt/cssom/empty_pseudo_class.h"
#include "cobalt/cssom/focus_pseudo_class.h"
#include "cobalt/cssom/hover_pseudo_class.h"
#include "cobalt/cssom/id_selector.h"
#include "cobalt/cssom/not_pseudo_class.h"
#include "cobalt/cssom/simple_selector.h"
#include "cobalt/cssom/type_selector.h"
namespace cobalt {
namespace cssom {
namespace {
// Used to replace an unknown, unrecognized or unrepresentable character.
constexpr int kUnicodeReplacementCharacter = 0xFFFD;
constexpr char kUnicodeReplacementCharacterUtf8[] = u8"\uFFFD";
} // namespace
Serializer::Serializer(std::string* output) : output_(output) {}
void Serializer::SerializeIdentifier(base::Token identifier) {
// https://www.w3.org/TR/cssom/#serialize-an-identifier
//
// To serialize an identifier means to create a string represented by the
// concatenation of, for each character of the identifier: For each character
// of the identifier:
int char_num = 0;
uint32_t first_char = 0;
const uint8_t* next_p = reinterpret_cast<const uint8_t*>(identifier.c_str());
while (*next_p) {
uint32_t c; // code point
const uint8_t* curr_p = next_p;
next_p += DecodeUTF8(curr_p, &c);
char_num++;
if (char_num == 1) first_char = c;
// If the character is NULL (U+0000), then the REPLACEMENT CHARACTER
// (U+FFFD).
if (c == 0x00) {
output_->append(kUnicodeReplacementCharacterUtf8);
continue;
}
// If the character is in the range [\1-\1f] (U+0001 to U+001F) or is
// U+007F, then the character escaped as code point.
if ((0x01 <= c && c <= 0x1F) || c == 0x7f) {
EscapeCodePoint(c);
continue;
}
// If the character is the first character and is in the range [0-9] (U+0030
// to U+0039), then the character escaped as code point.
bool is_numeric = ('0' <= c && c <= '9');
if (char_num == 1 && is_numeric) {
EscapeCodePoint(c);
continue;
}
// If the character is the second character and is in the range [0-9]
// (U+0030 to U+0039) and the first character is a "-" (U+002D), then the
// character escaped as code point.
if (char_num == 2 && is_numeric && first_char == '-') {
EscapeCodePoint(c);
continue;
}
// If the character is the first character and is a "-" (U+002D), and there
// is no second character, then the escaped character.
if (char_num == 1 && c == '-' && *next_p == '\0') {
EscapeCharacter(c);
continue;
}
// If the character is not handled by one of the above rules and is greater
// than or equal to U+0080, is "-" (U+002D) or "_" (U+005F), or is in one of
// the ranges [0-9] (U+0030 to U+0039), [A-Z] (U+0041 to U+005A), or \[a-z]
// (U+0061 to U+007A), then the character itself.
if (c >= 0x80 || c == '-' || c == '_' || is_numeric ||
('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z')) {
do {
output_->push_back(*curr_p);
} while (++curr_p < next_p);
continue;
}
// Otherwise, the escaped character.
EscapeCharacter(c);
}
}
void Serializer::SerializeString(const std::string& string) {
// https://www.w3.org/TR/cssom/#serialize-a-string
//
// Create a string represented by '"' (U+0022), followed by the result of
// applying the rules below to each character of the given string, followed by
// '"' (U+0022):
output_->push_back('"');
const uint8_t* p = reinterpret_cast<const uint8_t*>(string.c_str());
for (size_t n = 0, length = string.length(); n < length; ++n) {
uint8_t c = p[n];
// If the character is NULL (U+0000), then the REPLACEMENT CHARACTER
// (U+FFFD) escaped as code point.
if (c == 0x00) {
EscapeCodePoint(kUnicodeReplacementCharacter);
continue;
}
// If the character is in the range [\1-\1f] (U+0001 to U+001F) or is
// U+007F, the character escaped as code point.
if ((0x01 <= c && c <= 0x1F) || c == 0x7f) {
EscapeCodePoint(c);
continue;
}
// If the character is '"' (U+0022) or "\" (U+005C), the escaped character.
if (c == 0x22 || c == 0x5c) {
EscapeCharacter(c);
continue;
}
// Otherwise, the character itself.
output_->push_back(c);
}
// ...followed by '"' (U+0022)
output_->push_back('"');
}
void Serializer::SerializeSelectors(const Selectors& selectors) {
// https://www.w3.org/TR/cssom/#serializing-selectors
//
// To serialize a group of selectors serialize each selector in the group of
// selectors and then serialize the group.
for (auto it = selectors.begin(); it != selectors.end(); it++) {
// https://www.w3.org/TR/cssom/#serialize-a-comma-separated-list
//
// To serialize a comma-separated list concatenate all items of the list in
// list order while separating them by ", ", i.e., COMMA (U+002C) followed
// by a single SPACE (U+0020).
if (it != selectors.begin()) {
output_->append(", ");
}
SerializeSelector(**it);
}
}
void Serializer::SerializeSelector(const Selector& selector) {
const_cast<Selector*>(&selector)->Accept(this);
}
int Serializer::DecodeUTF8(const uint8_t* p, uint32_t* out_c) {
DCHECK(p && *p);
uint32_t c;
int len;
if (p[0] < 0x80) {
*out_c = p[0];
return 1;
} else if (p[0] < 0xC0) {
DLOG(ERROR) << "Bad UTF-8 first byte";
*out_c = kUnicodeReplacementCharacter;
return 1;
} else if (p[0] < 0xE0) {
c = p[0] & 0x1F;
len = 2;
} else if (p[0] < 0xF0) {
c = p[0] & 0x0F;
len = 3;
} else if (p[0] < 0xF8) {
c = p[0] & 0x07;
len = 4;
} else {
DLOG(ERROR) << "Bad UTF-8 first byte";
*out_c = kUnicodeReplacementCharacter;
return 1;
}
for (int n = 1; n < len; ++n) {
if ((p[n] & 0xC0) != 0x80) {
DLOG(ERROR) << "Bad UTF-8 byte " << n;
*out_c = kUnicodeReplacementCharacter;
return n;
}
c <<= 6;
c |= p[n] & 0x3F;
}
*out_c = c;
return len;
}
void Serializer::EscapeCodePoint(uint32_t c) {
// Highest valid code point in Unicode.
DCHECK_LE(c, 0x10FFFFu);
// To escape a character as code point means to create a string of "\"
// (U+005C), followed by the Unicode code point as the smallest possible
// number of hexadecimal digits in the range 0-9 a-f (U+0030 to U+0039 and
// U+0061 to U+0066) to represent the code point in base 16, followed by a
// single SPACE (U+0020).
output_->push_back('\\');
constexpr char kHexDigits[] = "0123456789abcdef";
char buffer[9];
int pos = sizeof(buffer);
buffer[--pos] = '\0';
while (c != 0) {
buffer[--pos] = kHexDigits[c & 0x0F];
c >>= 4;
}
if (pos + 1 == sizeof(buffer)) buffer[--pos] = '0';
output_->append(buffer + pos);
// ...followed by a single SPACE (U+0020).
output_->push_back(' ');
}
void Serializer::EscapeCharacter(uint32_t c) {
// To escape a character means to create a string of "\" (U+005C), followed by
// the character.
DCHECK_GE(c, 0x20u);
DCHECK_LE(c, 0x7Fu);
output_->push_back('\\');
output_->push_back(static_cast<char>(c));
}
void Serializer::VisitUniversalSelector(UniversalSelector* universal_selector) {
// https://www.w3.org/TR/cssom/#serialize-a-selector
// If this is a universal selector append "*" (U+002A) to s.
output_->push_back('*');
}
void Serializer::VisitTypeSelector(TypeSelector* type_selector) {
// https://www.w3.org/TR/cssom/#serialize-a-selector
// If this is a type selector append the escaped element name to s.
SerializeIdentifier(type_selector->element_name());
}
void Serializer::VisitAttributeSelector(AttributeSelector* attribute_selector) {
// https://www.w3.org/TR/cssom/#serialize-a-selector
// 1. Append "[" (U+005B) to s.
output_->push_back('[');
// 2. If the namespace prefix maps to a namespace that is not the null
// namespace (not in a namespace) append the escaped namespace prefix,
// followed by a "|" (U+007C) to s.
// [Cobalt doesn't support @namespace]
// 3. Append the escaped attribute name to s.
SerializeIdentifier(attribute_selector->attribute_name());
// 4. If there is an attribute value specified, append "=", "~=", "|=", "^=",
// "$=", or "*=" as appropriate (depending on the type of attribute selector),
// followed by the string escaped attribute value, to s.
const char* match_op = nullptr;
switch (attribute_selector->value_match_type()) {
case AttributeSelector::kNoMatch:
match_op = nullptr;
break;
case AttributeSelector::kEquals:
match_op = "=";
break;
case AttributeSelector::kIncludes:
match_op = "~=";
break;
case AttributeSelector::kDashMatch:
match_op = "|=";
break;
case AttributeSelector::kBeginsWith:
match_op = "^=";
break;
case AttributeSelector::kEndsWith:
match_op = "$=";
break;
case AttributeSelector::kContains:
match_op = "*=";
break;
}
if (match_op != nullptr) {
output_->append(match_op);
SerializeString(attribute_selector->attribute_value());
}
// 5. If the attribute selector has the case-sensitivity flag present, append
// " i" (U+0020 U+0069) to s.
// [Cobalt doesn't support the CSS4 case-sensitivity attributes.]
// 6. Append "]" (U+005D) to s.
output_->push_back(']');
}
void Serializer::VisitClassSelector(ClassSelector* class_selector) {
// https://www.w3.org/TR/cssom/#serialize-a-selector
// Append a "." (U+002E), followed by the escaped class name to s.
output_->push_back('.');
SerializeIdentifier(class_selector->class_name());
}
void Serializer::VisitIdSelector(IdSelector* id_selector) {
// https://www.w3.org/TR/cssom/#serialize-a-selector
// Append a "#" (U+0023), followed by the escaped ID to s.
output_->push_back('#');
SerializeIdentifier(id_selector->id());
}
void Serializer::VisitPseudoClass(PseudoClass* pseudo_class) {
// https://www.w3.org/TR/cssom/#serialize-a-selector
// If the pseudo-class does not accept arguments append ":" (U+003A),
// followed by the name of the pseudo-class, to s.
output_->push_back(':');
output_->append(pseudo_class->text().c_str());
}
void Serializer::VisitActivePseudoClass(
ActivePseudoClass* active_pseudo_class) {
VisitPseudoClass(active_pseudo_class);
}
void Serializer::VisitEmptyPseudoClass(EmptyPseudoClass* empty_pseudo_class) {
VisitPseudoClass(empty_pseudo_class);
}
void Serializer::VisitFocusPseudoClass(FocusPseudoClass* focus_pseudo_class) {
VisitPseudoClass(focus_pseudo_class);
}
void Serializer::VisitHoverPseudoClass(HoverPseudoClass* hover_pseudo_class) {
VisitPseudoClass(hover_pseudo_class);
}
void Serializer::VisitNotPseudoClass(NotPseudoClass* not_pseudo_class) {
// https://www.w3.org/TR/cssom/#serialize-a-simple-selector
//
// Append ":" (U+003A), followed by the name of the pseudo-class, followed by
// "(" (U+0028), followed by the value of the pseudo-class argument(s)
// determined as per below, followed by ")" (U+0029), to s.
// ...
// :not() - The result of serializing the value using the rules for
// serializing a group of selectors.
VisitPseudoClass(not_pseudo_class);
output_->push_back('(');
not_pseudo_class->selector()->Accept(this);
output_->push_back(')');
}
void Serializer::VisitPseudoElement(PseudoElement* pseudo_element) {
// https://www.w3.org/TR/cssom/#serialize-a-selector
// If this is the last part of the chain of the selector and there is a
// pseudo-element, append "::" followed by the name of the pseudo-element,
// to s.
output_->append("::");
output_->append(pseudo_element->text().c_str());
}
void Serializer::VisitAfterPseudoElement(
AfterPseudoElement* after_pseudo_element) {
VisitPseudoElement(after_pseudo_element);
}
void Serializer::VisitBeforePseudoElement(
BeforePseudoElement* before_pseudo_element) {
VisitPseudoElement(before_pseudo_element);
}
void Serializer::VisitCompoundSelector(CompoundSelector* compound_selector) {
// https://www.w3.org/TR/cssom/#serialize-a-selector
//
// 1. If there is only one simple selector in the compound selectors which is
// a universal selector, append the result of serializing the universal
// selector to s.
if (compound_selector->simple_selectors().size() == 1 &&
compound_selector->simple_selectors().front()->AsUniversalSelector()) {
compound_selector->simple_selectors().front()->Accept(this);
return;
}
// 2. Otherwise, for each simple selector in the compound selectors that is
// not a universal selector of which the namespace prefix maps to a namespace
// that is not the default namespace serialize the simple selector and append
// the result to s.
// [Cobalt doesn't support @namespace]
for (CompoundSelector::SimpleSelectors::const_iterator iter =
compound_selector->simple_selectors().begin();
iter != compound_selector->simple_selectors().end(); ++iter) {
if ((*iter)->AsUniversalSelector() == NULL) {
(*iter)->Accept(this);
}
}
}
void Serializer::VisitComplexSelector(ComplexSelector* complex_selector) {
// https://www.w3.org/TR/cssom/#serialize-a-selector
//
// If this is not the last part of the chain of the selector append a single
// SPACE (U+0020), followed by the combinator ">", "+", "~", ">>", "||", as
// appropriate, followed by another single SPACE (U+0020) if the combinator
// was not whitespace, to s.
CompoundSelector* selector = complex_selector->first_selector();
if (!selector) return;
selector->Accept(this);
Combinator* combinator = selector->right_combinator();
while (combinator) {
// The |VisitFooCombinator| methods below add the spaces before & after.
combinator->Accept(this);
selector = combinator->right_selector();
selector->Accept(this);
combinator = selector->right_combinator();
}
}
void Serializer::VisitChildCombinator(ChildCombinator* child_combinator) {
output_->append(" > ");
}
void Serializer::VisitNextSiblingCombinator(
NextSiblingCombinator* next_sibling_combinator) {
output_->append(" + ");
}
void Serializer::VisitDescendantCombinator(
DescendantCombinator* descendant_combinator) {
output_->push_back(' ');
}
void Serializer::VisitFollowingSiblingCombinator(
FollowingSiblingCombinator* following_sibling_combinator) {
output_->append(" ~ ");
}
} // namespace cssom
} // namespace cobalt