blob: 899ccefb7c8e5831e7c008fdff4f0ff7409143eb [file] [log] [blame]
// Copyright 2019 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "third_party/libxml/chromium/xml_reader.h"
#include <libxml/xmlreader.h>
#include <vector>
#include "third_party/libxml/chromium/libxml_utils.h"
using internal::XmlStringToStdString;
namespace {
// Same as XmlStringToStdString but also frees |xmlstring|.
std::string XmlStringToStdStringWithDelete(xmlChar* xmlstring) {
std::string result = XmlStringToStdString(xmlstring);
xmlFree(xmlstring);
return result;
}
enum GetAttributesQueryType { ATTRIBUTES, NAMESPACES_PREFIXES };
// Populates |names| with the names of the attributes or prefix of namespaces
// (depending on |query_type|) for the current node in |reader|.
// Returns true if attribute names/namespace prefixes were retrieved, false
// otherwise.
// Note the strings in |names| are valid as long as |reader| is valid and should
// not be deleted.
bool GetNodeAttributeNames(xmlTextReaderPtr reader,
GetAttributesQueryType query_type,
std::vector<const xmlChar*>* names) {
if (xmlTextReaderHasAttributes(reader) <= 0)
return false;
if (!xmlTextReaderMoveToFirstAttribute(reader))
return false;
do {
bool is_namespace = xmlTextReaderIsNamespaceDecl(reader) == 1;
if (query_type == NAMESPACES_PREFIXES && is_namespace) {
// Use the local name for namespaces so we don't include 'xmlns:".
names->push_back(xmlTextReaderConstLocalName(reader));
} else if (query_type == ATTRIBUTES && !is_namespace) {
// Use the fully qualified name for attributes.
names->push_back(xmlTextReaderConstName(reader));
}
} while (xmlTextReaderMoveToNextAttribute(reader) > 0);
// Move the reader from the attributes back to the containing element.
if (!xmlTextReaderMoveToElement(reader))
return false;
return true;
}
} // namespace
XmlReader::XmlReader() : reader_(nullptr) {}
XmlReader::~XmlReader() {
if (reader_)
xmlFreeTextReader(reader_);
}
bool XmlReader::Load(const std::string& input) {
const int kParseOptions = XML_PARSE_NONET; // forbid network access
// TODO(evanm): Verify it's OK to pass nullptr for the URL and encoding.
// The libxml code allows for these, but it's unclear what effect is has.
reader_ = xmlReaderForMemory(input.data(), static_cast<int>(input.size()),
nullptr, nullptr, kParseOptions);
return reader_ != nullptr;
}
bool XmlReader::LoadFile(const std::string& file_path) {
const int kParseOptions = XML_PARSE_NONET; // forbid network access
reader_ = xmlReaderForFile(file_path.c_str(), nullptr, kParseOptions);
return reader_ != nullptr;
}
bool XmlReader::Read() {
return xmlTextReaderRead(reader_) == 1;
}
// Next(), when pointing at an opening tag, advances to the node after
// the matching closing tag. Returns false on EOF or error.
bool XmlReader::Next() {
return xmlTextReaderNext(reader_) == 1;
}
// Return the depth in the tree of the current node.
int XmlReader::Depth() {
return xmlTextReaderDepth(reader_);
}
std::string XmlReader::NodeName() {
return XmlStringToStdString(xmlTextReaderConstLocalName(reader_));
}
std::string XmlReader::NodeFullName() {
return XmlStringToStdString(xmlTextReaderConstName(reader_));
}
bool XmlReader::NodeAttribute(const char* name, std::string* out) {
xmlChar* value = xmlTextReaderGetAttribute(reader_, BAD_CAST name);
if (!value)
return false;
*out = XmlStringToStdStringWithDelete(value);
return true;
}
bool XmlReader::GetAllNodeAttributes(
std::map<std::string, std::string>* attributes) {
std::vector<const xmlChar*> attribute_names;
if (!GetNodeAttributeNames(reader_, ATTRIBUTES, &attribute_names))
return false;
// Retrieve the attribute values.
for (const auto* name : attribute_names) {
(*attributes)[XmlStringToStdString(name)] = XmlStringToStdStringWithDelete(
xmlTextReaderGetAttribute(reader_, name));
}
return true;
}
bool XmlReader::GetAllDeclaredNamespaces(
std::map<std::string, std::string>* namespaces) {
std::vector<const xmlChar*> prefixes;
if (!GetNodeAttributeNames(reader_, NAMESPACES_PREFIXES, &prefixes))
return false;
// Retrieve the namespace URIs.
for (const auto* prefix : prefixes) {
bool default_namespace = xmlStrcmp(prefix, BAD_CAST "xmlns") == 0;
std::string value = XmlStringToStdStringWithDelete(
xmlTextReaderLookupNamespace(reader_, prefix));
if (value.empty() && default_namespace) {
// Default namespace is treated as an attribute for some reason.
value = XmlStringToStdStringWithDelete(
xmlTextReaderGetAttribute(reader_, prefix));
}
(*namespaces)[default_namespace ? "" : XmlStringToStdString(prefix)] =
value;
}
return true;
}
bool XmlReader::GetTextIfTextElement(std::string* content) {
if (NodeType() != XML_READER_TYPE_TEXT)
return false;
*content = XmlStringToStdString(xmlTextReaderConstValue(reader_));
return true;
}
bool XmlReader::GetTextIfCDataElement(std::string* content) {
if (NodeType() != XML_READER_TYPE_CDATA)
return false;
*content = XmlStringToStdString(xmlTextReaderConstValue(reader_));
return true;
}
bool XmlReader::IsElement() {
return NodeType() == XML_READER_TYPE_ELEMENT;
}
bool XmlReader::IsClosingElement() {
return NodeType() == XML_READER_TYPE_END_ELEMENT;
}
bool XmlReader::IsEmptyElement() {
return xmlTextReaderIsEmptyElement(reader_);
}
bool XmlReader::ReadElementContent(std::string* content) {
const int start_depth = Depth();
if (xmlTextReaderIsEmptyElement(reader_)) {
// Empty tag. We succesfully read the content, but it's
// empty.
*content = "";
// Advance past this empty tag.
if (!Read())
return false;
return true;
}
// Advance past opening element tag.
if (!Read())
return false;
// Read the content. We read up until we hit a closing tag at the
// same level as our starting point.
while (NodeType() != XML_READER_TYPE_END_ELEMENT || Depth() != start_depth) {
*content += XmlStringToStdString(xmlTextReaderConstValue(reader_));
if (!Read())
return false;
}
// Advance past ending element tag.
if (!Read())
return false;
return true;
}
bool XmlReader::SkipToElement() {
do {
switch (NodeType()) {
case XML_READER_TYPE_ELEMENT:
return true;
case XML_READER_TYPE_END_ELEMENT:
return false;
default:
// Skip all other node types.
continue;
}
} while (Read());
return false;
}
int XmlReader::NodeType() {
return xmlTextReaderNodeType(reader_);
}