// Copyright 2019 The Cobalt Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "cobalt/cssom/serializer.h"
#include "cobalt/cssom/active_pseudo_class.h"
#include "cobalt/cssom/after_pseudo_element.h"
#include "cobalt/cssom/attribute_selector.h"
#include "cobalt/cssom/before_pseudo_element.h"
#include "cobalt/cssom/class_selector.h"
#include "cobalt/cssom/complex_selector.h"
#include "cobalt/cssom/compound_selector.h"
#include "cobalt/cssom/empty_pseudo_class.h"
#include "cobalt/cssom/focus_pseudo_class.h"
#include "cobalt/cssom/hover_pseudo_class.h"
#include "cobalt/cssom/id_selector.h"
#include "cobalt/cssom/not_pseudo_class.h"
#include "cobalt/cssom/simple_selector.h"
#include "cobalt/cssom/type_selector.h"

namespace cobalt {
namespace cssom {

namespace {
// Used to replace an unknown, unrecognized or unrepresentable character.
constexpr int kUnicodeReplacementCharacter = 0xFFFD;
constexpr char kUnicodeReplacementCharacterUtf8[] = u8"\uFFFD";
}  // namespace

Serializer::Serializer(std::string* output) : output_(output) {}

void Serializer::SerializeIdentifier(base::Token identifier) {
  // https://www.w3.org/TR/cssom/#serialize-an-identifier
  //
  // To serialize an identifier means to create a string represented by the
  // concatenation of, for each character of the identifier: For each character
  // of the identifier:
  int char_num = 0;
  uint32_t first_char = 0;
  const uint8_t* next_p = reinterpret_cast<const uint8_t*>(identifier.c_str());
  while (*next_p) {
    uint32_t c;  // code point
    const uint8_t* curr_p = next_p;
    next_p += DecodeUTF8(curr_p, &c);

    char_num++;
    if (char_num == 1) first_char = c;

    // If the character is NULL (U+0000), then the REPLACEMENT CHARACTER
    // (U+FFFD).
    if (c == 0x00) {
      output_->append(kUnicodeReplacementCharacterUtf8);
      continue;
    }

    // If the character is in the range [\1-\1f] (U+0001 to U+001F) or is
    // U+007F, then the character escaped as code point.
    if ((0x01 <= c && c <= 0x1F) || c == 0x7f) {
      EscapeCodePoint(c);
      continue;
    }

    // If the character is the first character and is in the range [0-9] (U+0030
    // to U+0039), then the character escaped as code point.
    bool is_numeric = ('0' <= c && c <= '9');
    if (char_num == 1 && is_numeric) {
      EscapeCodePoint(c);
      continue;
    }

    // If the character is the second character and is in the range [0-9]
    // (U+0030 to U+0039) and the first character is a "-" (U+002D), then the
    // character escaped as code point.
    if (char_num == 2 && is_numeric && first_char == '-') {
      EscapeCodePoint(c);
      continue;
    }

    // If the character is the first character and is a "-" (U+002D), and there
    // is no second character, then the escaped character.
    if (char_num == 1 && c == '-' && *next_p == '\0') {
      EscapeCharacter(c);
      continue;
    }

    // If the character is not handled by one of the above rules and is greater
    // than or equal to U+0080, is "-" (U+002D) or "_" (U+005F), or is in one of
    // the ranges [0-9] (U+0030 to U+0039), [A-Z] (U+0041 to U+005A), or \[a-z]
    // (U+0061 to U+007A), then the character itself.
    if (c >= 0x80 || c == '-' || c == '_' || is_numeric ||
        ('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z')) {
      do {
        output_->push_back(*curr_p);
      } while (++curr_p < next_p);
      continue;
    }

    // Otherwise, the escaped character.
    EscapeCharacter(c);
  }
}

void Serializer::SerializeString(const std::string& string) {
  // https://www.w3.org/TR/cssom/#serialize-a-string
  //
  // Create a string represented by '"' (U+0022), followed by the result of
  // applying the rules below to each character of the given string, followed by
  // '"' (U+0022):
  output_->push_back('"');

  const uint8_t* p = reinterpret_cast<const uint8_t*>(string.c_str());
  for (size_t n = 0, length = string.length(); n < length; ++n) {
    uint8_t c = p[n];

    // If the character is NULL (U+0000), then the REPLACEMENT CHARACTER
    // (U+FFFD) escaped as code point.
    if (c == 0x00) {
      EscapeCodePoint(kUnicodeReplacementCharacter);
      continue;
    }

    // If the character is in the range [\1-\1f] (U+0001 to U+001F) or is
    // U+007F, the character escaped as code point.
    if ((0x01 <= c && c <= 0x1F) || c == 0x7f) {
      EscapeCodePoint(c);
      continue;
    }

    // If the character is '"' (U+0022) or "\" (U+005C), the escaped character.
    if (c == 0x22 || c == 0x5c) {
      EscapeCharacter(c);
      continue;
    }

    // Otherwise, the character itself.
    output_->push_back(c);
  }

  // ...followed by '"' (U+0022)
  output_->push_back('"');
}

void Serializer::SerializeSelectors(const Selectors& selectors) {
  // https://www.w3.org/TR/cssom/#serializing-selectors
  //
  // To serialize a group of selectors serialize each selector in the group of
  // selectors and then serialize the group.
  for (auto it = selectors.begin(); it != selectors.end(); it++) {
    // https://www.w3.org/TR/cssom/#serialize-a-comma-separated-list
    //
    // To serialize a comma-separated list concatenate all items of the list in
    // list order while separating them by ", ", i.e., COMMA (U+002C) followed
    // by a single SPACE (U+0020).
    if (it != selectors.begin()) {
      output_->append(", ");
    }
    SerializeSelector(**it);
  }
}

void Serializer::SerializeSelector(const Selector& selector) {
  const_cast<Selector*>(&selector)->Accept(this);
}

int Serializer::DecodeUTF8(const uint8_t* p, uint32_t* out_c) {
  DCHECK(p && *p);
  uint32_t c;
  int len;
  if (p[0] < 0x80) {
    *out_c = p[0];
    return 1;
  } else if (p[0] < 0xC0) {
    DLOG(ERROR) << "Bad UTF-8 first byte";
    *out_c = kUnicodeReplacementCharacter;
    return 1;
  } else if (p[0] < 0xE0) {
    c = p[0] & 0x1F;
    len = 2;
  } else if (p[0] < 0xF0) {
    c = p[0] & 0x0F;
    len = 3;
  } else if (p[0] < 0xF8) {
    c = p[0] & 0x07;
    len = 4;
  } else {
    DLOG(ERROR) << "Bad UTF-8 first byte";
    *out_c = kUnicodeReplacementCharacter;
    return 1;
  }
  for (int n = 1; n < len; ++n) {
    if ((p[n] & 0xC0) != 0x80) {
      DLOG(ERROR) << "Bad UTF-8 byte " << n;
      *out_c = kUnicodeReplacementCharacter;
      return n;
    }
    c <<= 6;
    c |= p[n] & 0x3F;
  }
  *out_c = c;
  return len;
}

void Serializer::EscapeCodePoint(uint32_t c) {
  // Highest valid code point in Unicode.
  DCHECK_LE(c, 0x10FFFFu);

  // To escape a character as code point means to create a string of "\"
  // (U+005C), followed by the Unicode code point as the smallest possible
  // number of hexadecimal digits in the range 0-9 a-f (U+0030 to U+0039 and
  // U+0061 to U+0066) to represent the code point in base 16, followed by a
  // single SPACE (U+0020).
  output_->push_back('\\');

  constexpr char kHexDigits[] = "0123456789abcdef";
  char buffer[9];
  int pos = sizeof(buffer);
  buffer[--pos] = '\0';
  while (c != 0) {
    buffer[--pos] = kHexDigits[c & 0x0F];
    c >>= 4;
  }
  if (pos + 1 == sizeof(buffer)) buffer[--pos] = '0';
  output_->append(buffer + pos);

  // ...followed by a single SPACE (U+0020).
  output_->push_back(' ');
}

void Serializer::EscapeCharacter(uint32_t c) {
  // To escape a character means to create a string of "\" (U+005C), followed by
  // the character.
  DCHECK_GE(c, 0x20u);
  DCHECK_LE(c, 0x7Fu);
  output_->push_back('\\');
  output_->push_back(static_cast<char>(c));
}

void Serializer::VisitUniversalSelector(UniversalSelector* universal_selector) {
  UNREFERENCED_PARAMETER(universal_selector);
  // https://www.w3.org/TR/cssom/#serialize-a-selector
  // If this is a universal selector append "*" (U+002A) to s.
  output_->push_back('*');
}

void Serializer::VisitTypeSelector(TypeSelector* type_selector) {
  // https://www.w3.org/TR/cssom/#serialize-a-selector
  // If this is a type selector append the escaped element name to s.
  SerializeIdentifier(type_selector->element_name());
}

void Serializer::VisitAttributeSelector(AttributeSelector* attribute_selector) {
  // https://www.w3.org/TR/cssom/#serialize-a-selector

  // 1. Append "[" (U+005B) to s.
  output_->push_back('[');

  // 2. If the namespace prefix maps to a namespace that is not the null
  // namespace (not in a namespace) append the escaped namespace prefix,
  // followed by a "|" (U+007C) to s.
  // [Cobalt doesn't support @namespace]

  // 3. Append the escaped attribute name to s.
  SerializeIdentifier(attribute_selector->attribute_name());

  // 4. If there is an attribute value specified, append "=", "~=", "|=", "^=",
  // "$=", or "*=" as appropriate (depending on the type of attribute selector),
  // followed by the string escaped attribute value, to s.
  const char* match_op = nullptr;
  switch (attribute_selector->value_match_type()) {
    case AttributeSelector::kNoMatch:
      match_op = nullptr;
      break;
    case AttributeSelector::kEquals:
      match_op = "=";
      break;
    case AttributeSelector::kIncludes:
      match_op = "~=";
      break;
    case AttributeSelector::kDashMatch:
      match_op = "|=";
      break;
    case AttributeSelector::kBeginsWith:
      match_op = "^=";
      break;
    case AttributeSelector::kEndsWith:
      match_op = "$=";
      break;
    case AttributeSelector::kContains:
      match_op = "*=";
      break;
  }
  if (match_op != nullptr) {
    output_->append(match_op);
    SerializeString(attribute_selector->attribute_value());
  }

  // 5. If the attribute selector has the case-sensitivity flag present, append
  // " i" (U+0020 U+0069) to s.
  // [Cobalt doesn't support the CSS4 case-sensitivity attributes.]

  // 6. Append "]" (U+005D) to s.
  output_->push_back(']');
}

void Serializer::VisitClassSelector(ClassSelector* class_selector) {
  // https://www.w3.org/TR/cssom/#serialize-a-selector
  // Append a "." (U+002E), followed by the escaped class name to s.
  output_->push_back('.');
  SerializeIdentifier(class_selector->class_name());
}

void Serializer::VisitIdSelector(IdSelector* id_selector) {
  // https://www.w3.org/TR/cssom/#serialize-a-selector
  // Append a "#" (U+0023), followed by the escaped ID to s.
  output_->push_back('#');
  SerializeIdentifier(id_selector->id());
}

void Serializer::VisitPseudoClass(PseudoClass* pseudo_class) {
  // https://www.w3.org/TR/cssom/#serialize-a-selector
  // If the pseudo-class does not accept arguments append ":" (U+003A),
  // followed by the name of the pseudo-class, to s.
  output_->push_back(':');
  output_->append(pseudo_class->text().c_str());
}

void Serializer::VisitActivePseudoClass(
    ActivePseudoClass* active_pseudo_class) {
  VisitPseudoClass(active_pseudo_class);
}

void Serializer::VisitEmptyPseudoClass(EmptyPseudoClass* empty_pseudo_class) {
  VisitPseudoClass(empty_pseudo_class);
}

void Serializer::VisitFocusPseudoClass(FocusPseudoClass* focus_pseudo_class) {
  VisitPseudoClass(focus_pseudo_class);
}

void Serializer::VisitHoverPseudoClass(HoverPseudoClass* hover_pseudo_class) {
  VisitPseudoClass(hover_pseudo_class);
}

void Serializer::VisitNotPseudoClass(NotPseudoClass* not_pseudo_class) {
  // https://www.w3.org/TR/cssom/#serialize-a-simple-selector
  //
  // Append ":" (U+003A), followed by the name of the pseudo-class, followed by
  // "(" (U+0028), followed by the value of the pseudo-class argument(s)
  // determined as per below, followed by ")" (U+0029), to s.
  // ...
  // :not() - The result of serializing the value using the rules for
  // serializing a group of selectors.
  VisitPseudoClass(not_pseudo_class);
  output_->push_back('(');
  not_pseudo_class->selector()->Accept(this);
  output_->push_back(')');
}

void Serializer::VisitPseudoElement(PseudoElement* pseudo_element) {
  // https://www.w3.org/TR/cssom/#serialize-a-selector
  // If this is the last part of the chain of the selector and there is a
  // pseudo-element, append "::" followed by the name of the pseudo-element,
  // to s.
  output_->append("::");
  output_->append(pseudo_element->text().c_str());
}

void Serializer::VisitAfterPseudoElement(
    AfterPseudoElement* after_pseudo_element) {
  VisitPseudoElement(after_pseudo_element);
}

void Serializer::VisitBeforePseudoElement(
    BeforePseudoElement* before_pseudo_element) {
  VisitPseudoElement(before_pseudo_element);
}

void Serializer::VisitCompoundSelector(CompoundSelector* compound_selector) {
  // https://www.w3.org/TR/cssom/#serialize-a-selector
  //
  // 1. If there is only one simple selector in the compound selectors which is
  // a universal selector, append the result of serializing the universal
  // selector to s.
  if (compound_selector->simple_selectors().size() == 1 &&
      compound_selector->simple_selectors().front()->AsUniversalSelector()) {
    compound_selector->simple_selectors().front()->Accept(this);
    return;
  }

  // 2. Otherwise, for each simple selector in the compound selectors that is
  // not a universal selector of which the namespace prefix maps to a namespace
  // that is not the default namespace serialize the simple selector and append
  // the result to s.
  // [Cobalt doesn't support @namespace]
  for (CompoundSelector::SimpleSelectors::const_iterator iter =
           compound_selector->simple_selectors().begin();
       iter != compound_selector->simple_selectors().end(); ++iter) {
    if ((*iter)->AsUniversalSelector() == NULL) {
      (*iter)->Accept(this);
    }
  }
}

void Serializer::VisitComplexSelector(ComplexSelector* complex_selector) {
  // https://www.w3.org/TR/cssom/#serialize-a-selector
  //
  // If this is not the last part of the chain of the selector append a single
  // SPACE (U+0020), followed by the combinator ">", "+", "~", ">>", "||", as
  // appropriate, followed by another single SPACE (U+0020) if the combinator
  // was not whitespace, to s.
  CompoundSelector* selector = complex_selector->first_selector();
  if (!selector) return;
  selector->Accept(this);
  Combinator* combinator = selector->right_combinator();
  while (combinator) {
    // The |VisitFooCombinator| methods below add the spaces before & after.
    combinator->Accept(this);
    selector = combinator->right_selector();
    selector->Accept(this);
    combinator = selector->right_combinator();
  }
}

void Serializer::VisitChildCombinator(ChildCombinator* child_combinator) {
  UNREFERENCED_PARAMETER(child_combinator);
  output_->append(" > ");
}

void Serializer::VisitNextSiblingCombinator(
    NextSiblingCombinator* next_sibling_combinator) {
  UNREFERENCED_PARAMETER(next_sibling_combinator);
  output_->append(" + ");
}

void Serializer::VisitDescendantCombinator(
    DescendantCombinator* descendant_combinator) {
  UNREFERENCED_PARAMETER(descendant_combinator);
  output_->push_back(' ');
}

void Serializer::VisitFollowingSiblingCombinator(
    FollowingSiblingCombinator* following_sibling_combinator) {
  UNREFERENCED_PARAMETER(following_sibling_combinator);
  output_->append(" ~ ");
}

}  // namespace cssom
}  // namespace cobalt
