blob: 1082ff22cc35dac5f252e4234a71051b50e1074e [file] [log] [blame]
/*
* parser.c : an XML 1.0 parser, namespaces and validity support are mostly
* implemented on top of the SAX interfaces
*
* References:
* The XML specification:
* http://www.w3.org/TR/REC-xml
* Original 1.0 version:
* http://www.w3.org/TR/1998/REC-xml-19980210
* XML second edition working draft
* http://www.w3.org/TR/2000/WD-xml-2e-20000814
*
* Okay this is a big file, the parser core is around 7000 lines, then it
* is followed by the progressive parser top routines, then the various
* high level APIs to call the parser and a few miscellaneous functions.
* A number of helper functions and deprecated ones have been moved to
* parserInternals.c to reduce this file size.
* As much as possible the functions are associated with their relative
* production in the XML specification. A few productions defining the
* different ranges of character are actually implanted either in
* parserInternals.h or parserInternals.c
* The DOM tree build is realized from the default SAX callbacks in
* the module SAX.c.
* The routines doing the validation checks are in valid.c and called either
* from the SAX callbacks or as standalone functions using a preparsed
* document.
*
* See Copyright for the status of this software.
*
* daniel@veillard.com
*/
#define IN_LIBXML
#include "libxml.h"
#if defined(WIN32) && !defined (__CYGWIN__)
#define XML_DIR_SEP '\\'
#else
#define XML_DIR_SEP '/'
#endif
#ifdef HAVE_STDLIB_H
#include <stdlib.h>
#endif
#ifdef HAVE_LIMITS_H
#include <limits.h>
#endif
#ifdef HAVE_STRING_H
#include <string.h>
#endif
#include <stdarg.h>
#include <libxml/xmlmemory.h>
#include <libxml/threads.h>
#include <libxml/globals.h>
#include <libxml/tree.h>
#include <libxml/parser.h>
#include <libxml/parserInternals.h>
#include <libxml/valid.h>
#include <libxml/entities.h>
#include <libxml/xmlerror.h>
#include <libxml/encoding.h>
#include <libxml/xmlIO.h>
#include <libxml/uri.h>
#ifdef LIBXML_CATALOG_ENABLED
#include <libxml/catalog.h>
#endif
#ifdef LIBXML_SCHEMAS_ENABLED
#include <libxml/xmlschemastypes.h>
#include <libxml/relaxng.h>
#endif
#ifdef HAVE_CTYPE_H
#include <ctype.h>
#endif
#ifdef HAVE_STDLIB_H
#include <stdlib.h>
#endif
#ifdef HAVE_SYS_STAT_H
#include <sys/stat.h>
#endif
#ifdef HAVE_FCNTL_H
#include <fcntl.h>
#endif
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
#ifdef HAVE_ZLIB_H
#include <zlib.h>
#endif
#ifdef HAVE_LZMA_H
#include <lzma.h>
#endif
#include "buf.h"
#include "enc.h"
static void
xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
static xmlParserCtxtPtr
xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
const xmlChar *base, xmlParserCtxtPtr pctx);
static void xmlHaltParser(xmlParserCtxtPtr ctxt);
/************************************************************************
* *
* Arbitrary limits set in the parser. See XML_PARSE_HUGE *
* *
************************************************************************/
#define XML_PARSER_BIG_ENTITY 1000
#define XML_PARSER_LOT_ENTITY 5000
/*
* XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
* replacement over the size in byte of the input indicates that you have
* and eponential behaviour. A value of 10 correspond to at least 3 entity
* replacement per byte of input.
*/
#define XML_PARSER_NON_LINEAR 10
/*
* xmlParserEntityCheck
*
* Function to check non-linear entity expansion behaviour
* This is here to detect and stop exponential linear entity expansion
* This is not a limitation of the parser but a safety
* boundary feature. It can be disabled with the XML_PARSE_HUGE
* parser option.
*/
static int
xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size,
xmlEntityPtr ent, size_t replacement)
{
size_t consumed = 0;
if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
return (0);
if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
return (1);
/*
* This may look absurd but is needed to detect
* entities problems
*/
if ((ent != NULL) && (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
(ent->content != NULL) && (ent->checked == 0)) {
unsigned long oldnbent = ctxt->nbentities;
xmlChar *rep;
ent->checked = 1;
rep = xmlStringDecodeEntities(ctxt, ent->content,
XML_SUBSTITUTE_REF, 0, 0, 0);
ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
if (rep != NULL) {
if (xmlStrchr(rep, '<'))
ent->checked |= 1;
xmlFree(rep);
rep = NULL;
}
}
if (replacement != 0) {
if (replacement < XML_MAX_TEXT_LENGTH)
return(0);
/*
* If the volume of entity copy reaches 10 times the
* amount of parsed data and over the large text threshold
* then that's very likely to be an abuse.
*/
if (ctxt->input != NULL) {
consumed = ctxt->input->consumed +
(ctxt->input->cur - ctxt->input->base);
}
consumed += ctxt->sizeentities;
if (replacement < XML_PARSER_NON_LINEAR * consumed)
return(0);
} else if (size != 0) {
/*
* Do the check based on the replacement size of the entity
*/
if (size < XML_PARSER_BIG_ENTITY)
return(0);
/*
* A limit on the amount of text data reasonably used
*/
if (ctxt->input != NULL) {
consumed = ctxt->input->consumed +
(ctxt->input->cur - ctxt->input->base);
}
consumed += ctxt->sizeentities;
if ((size < XML_PARSER_NON_LINEAR * consumed) &&
(ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
return (0);
} else if (ent != NULL) {
/*
* use the number of parsed entities in the replacement
*/
size = ent->checked / 2;
/*
* The amount of data parsed counting entities size only once
*/
if (ctxt->input != NULL) {
consumed = ctxt->input->consumed +
(ctxt->input->cur - ctxt->input->base);
}
consumed += ctxt->sizeentities;
/*
* Check the density of entities for the amount of data
* knowing an entity reference will take at least 3 bytes
*/
if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
return (0);
} else {
/*
* strange we got no data for checking
*/
if (((ctxt->lastError.code != XML_ERR_UNDECLARED_ENTITY) &&
(ctxt->lastError.code != XML_WAR_UNDECLARED_ENTITY)) ||
(ctxt->nbentities <= 10000))
return (0);
}
xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
return (1);
}
/**
* xmlParserMaxDepth:
*
* arbitrary depth limit for the XML documents that we allow to
* process. This is not a limitation of the parser but a safety
* boundary feature. It can be disabled with the XML_PARSE_HUGE
* parser option.
*/
unsigned int xmlParserMaxDepth = 256;
#define SAX2 1
#define XML_PARSER_BIG_BUFFER_SIZE 300
#define XML_PARSER_BUFFER_SIZE 100
#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
/**
* XML_PARSER_CHUNK_SIZE
*
* When calling GROW that's the minimal amount of data
* the parser expected to have received. It is not a hard
* limit but an optimization when reading strings like Names
* It is not strictly needed as long as inputs available characters
* are followed by 0, which should be provided by the I/O level
*/
#define XML_PARSER_CHUNK_SIZE 100
/*
* List of XML prefixed PI allowed by W3C specs
*/
static const char *xmlW3CPIs[] = {
"xml-stylesheet",
"xml-model",
NULL
};
/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
const xmlChar **str);
static xmlParserErrors
xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
xmlSAXHandlerPtr sax,
void *user_data, int depth, const xmlChar *URL,
const xmlChar *ID, xmlNodePtr *list);
static int
xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
const char *encoding);
#ifdef LIBXML_LEGACY_ENABLED
static void
xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
xmlNodePtr lastNode);
#endif /* LIBXML_LEGACY_ENABLED */
static xmlParserErrors
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
const xmlChar *string, void *user_data, xmlNodePtr *lst);
static int
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
/************************************************************************
* *
* Some factorized error routines *
* *
************************************************************************/
/**
* xmlErrAttributeDup:
* @ctxt: an XML parser context
* @prefix: the attribute prefix
* @localname: the attribute localname
*
* Handle a redefinition of attribute error
*/
static void
xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
const xmlChar * localname)
{
if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
(ctxt->instate == XML_PARSER_EOF))
return;
if (ctxt != NULL)
ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
if (prefix == NULL)
__xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
(const char *) localname, NULL, NULL, 0, 0,
"Attribute %s redefined\n", localname);
else
__xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
(const char *) prefix, (const char *) localname,
NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
localname);
if (ctxt != NULL) {
ctxt->wellFormed = 0;
if (ctxt->recovery == 0)
ctxt->disableSAX = 1;
}
}
/**
* xmlFatalErr:
* @ctxt: an XML parser context
* @error: the error number
* @extra: extra information string
*
* Handle a fatal parser error, i.e. violating Well-Formedness constraints
*/
static void
xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
{
const char *errmsg;
if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
(ctxt->instate == XML_PARSER_EOF))
return;
switch (error) {
case XML_ERR_INVALID_HEX_CHARREF:
errmsg = "CharRef: invalid hexadecimal value";
break;
case XML_ERR_INVALID_DEC_CHARREF:
errmsg = "CharRef: invalid decimal value";
break;
case XML_ERR_INVALID_CHARREF:
errmsg = "CharRef: invalid value";
break;
case XML_ERR_INTERNAL_ERROR:
errmsg = "internal error";
break;
case XML_ERR_PEREF_AT_EOF:
errmsg = "PEReference at end of document";
break;
case XML_ERR_PEREF_IN_PROLOG:
errmsg = "PEReference in prolog";
break;
case XML_ERR_PEREF_IN_EPILOG:
errmsg = "PEReference in epilog";
break;
case XML_ERR_PEREF_NO_NAME:
errmsg = "PEReference: no name";
break;
case XML_ERR_PEREF_SEMICOL_MISSING:
errmsg = "PEReference: expecting ';'";
break;
case XML_ERR_ENTITY_LOOP:
errmsg = "Detected an entity reference loop";
break;
case XML_ERR_ENTITY_NOT_STARTED:
errmsg = "EntityValue: \" or ' expected";
break;
case XML_ERR_ENTITY_PE_INTERNAL:
errmsg = "PEReferences forbidden in internal subset";
break;
case XML_ERR_ENTITY_NOT_FINISHED:
errmsg = "EntityValue: \" or ' expected";
break;
case XML_ERR_ATTRIBUTE_NOT_STARTED:
errmsg = "AttValue: \" or ' expected";
break;
case XML_ERR_LT_IN_ATTRIBUTE:
errmsg = "Unescaped '<' not allowed in attributes values";
break;
case XML_ERR_LITERAL_NOT_STARTED:
errmsg = "SystemLiteral \" or ' expected";
break;
case XML_ERR_LITERAL_NOT_FINISHED:
errmsg = "Unfinished System or Public ID \" or ' expected";
break;
case XML_ERR_MISPLACED_CDATA_END:
errmsg = "Sequence ']]>' not allowed in content";
break;
case XML_ERR_URI_REQUIRED:
errmsg = "SYSTEM or PUBLIC, the URI is missing";
break;
case XML_ERR_PUBID_REQUIRED:
errmsg = "PUBLIC, the Public Identifier is missing";
break;
case XML_ERR_HYPHEN_IN_COMMENT:
errmsg = "Comment must not contain '--' (double-hyphen)";
break;
case XML_ERR_PI_NOT_STARTED:
errmsg = "xmlParsePI : no target name";
break;
case XML_ERR_RESERVED_XML_NAME:
errmsg = "Invalid PI name";
break;
case XML_ERR_NOTATION_NOT_STARTED:
errmsg = "NOTATION: Name expected here";
break;
case XML_ERR_NOTATION_NOT_FINISHED:
errmsg = "'>' required to close NOTATION declaration";
break;
case XML_ERR_VALUE_REQUIRED:
errmsg = "Entity value required";
break;
case XML_ERR_URI_FRAGMENT:
errmsg = "Fragment not allowed";
break;
case XML_ERR_ATTLIST_NOT_STARTED:
errmsg = "'(' required to start ATTLIST enumeration";
break;
case XML_ERR_NMTOKEN_REQUIRED:
errmsg = "NmToken expected in ATTLIST enumeration";
break;
case XML_ERR_ATTLIST_NOT_FINISHED:
errmsg = "')' required to finish ATTLIST enumeration";
break;
case XML_ERR_MIXED_NOT_STARTED:
errmsg = "MixedContentDecl : '|' or ')*' expected";
break;
case XML_ERR_PCDATA_REQUIRED:
errmsg = "MixedContentDecl : '#PCDATA' expected";
break;
case XML_ERR_ELEMCONTENT_NOT_STARTED:
errmsg = "ContentDecl : Name or '(' expected";
break;
case XML_ERR_ELEMCONTENT_NOT_FINISHED:
errmsg = "ContentDecl : ',' '|' or ')' expected";
break;
case XML_ERR_PEREF_IN_INT_SUBSET:
errmsg =
"PEReference: forbidden within markup decl in internal subset";
break;
case XML_ERR_GT_REQUIRED:
errmsg = "expected '>'";
break;
case XML_ERR_CONDSEC_INVALID:
errmsg = "XML conditional section '[' expected";
break;
case XML_ERR_EXT_SUBSET_NOT_FINISHED:
errmsg = "Content error in the external subset";
break;
case XML_ERR_CONDSEC_INVALID_KEYWORD:
errmsg =
"conditional section INCLUDE or IGNORE keyword expected";
break;
case XML_ERR_CONDSEC_NOT_FINISHED:
errmsg = "XML conditional section not closed";
break;
case XML_ERR_XMLDECL_NOT_STARTED:
errmsg = "Text declaration '<?xml' required";
break;
case XML_ERR_XMLDECL_NOT_FINISHED:
errmsg = "parsing XML declaration: '?>' expected";
break;
case XML_ERR_EXT_ENTITY_STANDALONE:
errmsg = "external parsed entities cannot be standalone";
break;
case XML_ERR_ENTITYREF_SEMICOL_MISSING:
errmsg = "EntityRef: expecting ';'";
break;
case XML_ERR_DOCTYPE_NOT_FINISHED:
errmsg = "DOCTYPE improperly terminated";
break;
case XML_ERR_LTSLASH_REQUIRED:
errmsg = "EndTag: '</' not found";
break;
case XML_ERR_EQUAL_REQUIRED:
errmsg = "expected '='";
break;
case XML_ERR_STRING_NOT_CLOSED:
errmsg = "String not closed expecting \" or '";
break;
case XML_ERR_STRING_NOT_STARTED:
errmsg = "String not started expecting ' or \"";
break;
case XML_ERR_ENCODING_NAME:
errmsg = "Invalid XML encoding name";
break;
case XML_ERR_STANDALONE_VALUE:
errmsg = "standalone accepts only 'yes' or 'no'";
break;
case XML_ERR_DOCUMENT_EMPTY:
errmsg = "Document is empty";
break;
case XML_ERR_DOCUMENT_END:
errmsg = "Extra content at the end of the document";
break;
case XML_ERR_NOT_WELL_BALANCED:
errmsg = "chunk is not well balanced";
break;
case XML_ERR_EXTRA_CONTENT:
errmsg = "extra content at the end of well balanced chunk";
break;
case XML_ERR_VERSION_MISSING:
errmsg = "Malformed declaration expecting version";
break;
case XML_ERR_NAME_TOO_LONG:
errmsg = "Name too long use XML_PARSE_HUGE option";
break;
#if 0
case:
errmsg = "";
break;
#endif
default:
errmsg = "Unregistered error message";
}
if (ctxt != NULL)
ctxt->errNo = error;
if (info == NULL) {
__xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s\n",
errmsg);
} else {
__xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s: %s\n",
errmsg, info);
}
if (ctxt != NULL) {
ctxt->wellFormed = 0;
if (ctxt->recovery == 0)
ctxt->disableSAX = 1;
}
}
/**
* xmlFatalErrMsg:
* @ctxt: an XML parser context
* @error: the error number
* @msg: the error message
*
* Handle a fatal parser error, i.e. violating Well-Formedness constraints
*/
static void LIBXML_ATTR_FORMAT(3,0)
xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
const char *msg)
{
if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
(ctxt->instate == XML_PARSER_EOF))
return;
if (ctxt != NULL)
ctxt->errNo = error;
__xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
if (ctxt != NULL) {
ctxt->wellFormed = 0;
if (ctxt->recovery == 0)
ctxt->disableSAX = 1;
}
}
/**
* xmlWarningMsg:
* @ctxt: an XML parser context
* @error: the error number
* @msg: the error message
* @str1: extra data
* @str2: extra data
*
* Handle a warning.
*/
static void LIBXML_ATTR_FORMAT(3,0)
xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
const char *msg, const xmlChar *str1, const xmlChar *str2)
{
xmlStructuredErrorFunc schannel = NULL;
if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
(ctxt->instate == XML_PARSER_EOF))
return;
if ((ctxt != NULL) && (ctxt->sax != NULL) &&
(ctxt->sax->initialized == XML_SAX2_MAGIC))
schannel = ctxt->sax->serror;
if (ctxt != NULL) {
__xmlRaiseError(schannel,
(ctxt->sax) ? ctxt->sax->warning : NULL,
ctxt->userData,
ctxt, NULL, XML_FROM_PARSER, error,
XML_ERR_WARNING, NULL, 0,
(const char *) str1, (const char *) str2, NULL, 0, 0,
msg, (const char *) str1, (const char *) str2);
} else {
__xmlRaiseError(schannel, NULL, NULL,
ctxt, NULL, XML_FROM_PARSER, error,
XML_ERR_WARNING, NULL, 0,
(const char *) str1, (const char *) str2, NULL, 0, 0,
msg, (const char *) str1, (const char *) str2);
}
}
/**
* xmlValidityError:
* @ctxt: an XML parser context
* @error: the error number
* @msg: the error message
* @str1: extra data
*
* Handle a validity error.
*/
static void LIBXML_ATTR_FORMAT(3,0)
xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
const char *msg, const xmlChar *str1, const xmlChar *str2)
{
xmlStructuredErrorFunc schannel = NULL;
if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
(ctxt->instate == XML_PARSER_EOF))
return;
if (ctxt != NULL) {
ctxt->errNo = error;
if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
schannel = ctxt->sax->serror;
}
if (ctxt != NULL) {
__xmlRaiseError(schannel,
ctxt->vctxt.error, ctxt->vctxt.userData,
ctxt, NULL, XML_FROM_DTD, error,
XML_ERR_ERROR, NULL, 0, (const char *) str1,
(const char *) str2, NULL, 0, 0,
msg, (const char *) str1, (const char *) str2);
ctxt->valid = 0;
} else {
__xmlRaiseError(schannel, NULL, NULL,
ctxt, NULL, XML_FROM_DTD, error,
XML_ERR_ERROR, NULL, 0, (const char *) str1,
(const char *) str2, NULL, 0, 0,
msg, (const char *) str1, (const char *) str2);
}
}
/**
* xmlFatalErrMsgInt:
* @ctxt: an XML parser context
* @error: the error number
* @msg: the error message
* @val: an integer value
*
* Handle a fatal parser error, i.e. violating Well-Formedness constraints
*/
static void LIBXML_ATTR_FORMAT(3,0)
xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
const char *msg, int val)
{
if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
(ctxt->instate == XML_PARSER_EOF))
return;
if (ctxt != NULL)
ctxt->errNo = error;
__xmlRaiseError(NULL, NULL, NULL,
ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
if (ctxt != NULL) {
ctxt->wellFormed = 0;
if (ctxt->recovery == 0)
ctxt->disableSAX = 1;
}
}
/**
* xmlFatalErrMsgStrIntStr:
* @ctxt: an XML parser context
* @error: the error number
* @msg: the error message
* @str1: an string info
* @val: an integer value
* @str2: an string info
*
* Handle a fatal parser error, i.e. violating Well-Formedness constraints
*/
static void LIBXML_ATTR_FORMAT(3,0)
xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
const char *msg, const xmlChar *str1, int val,
const xmlChar *str2)
{
if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
(ctxt->instate == XML_PARSER_EOF))
return;
if (ctxt != NULL)
ctxt->errNo = error;
__xmlRaiseError(NULL, NULL, NULL,
ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
NULL, 0, (const char *) str1, (const char *) str2,
NULL, val, 0, msg, str1, val, str2);
if (ctxt != NULL) {
ctxt->wellFormed = 0;
if (ctxt->recovery == 0)
ctxt->disableSAX = 1;
}
}
/**
* xmlFatalErrMsgStr:
* @ctxt: an XML parser context
* @error: the error number
* @msg: the error message
* @val: a string value
*
* Handle a fatal parser error, i.e. violating Well-Formedness constraints
*/
static void LIBXML_ATTR_FORMAT(3,0)
xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
const char *msg, const xmlChar * val)
{
if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
(ctxt->instate == XML_PARSER_EOF))
return;
if (ctxt != NULL)
ctxt->errNo = error;
__xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
XML_FROM_PARSER, error, XML_ERR_FATAL,
NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
val);
if (ctxt != NULL) {
ctxt->wellFormed = 0;
if (ctxt->recovery == 0)
ctxt->disableSAX = 1;
}
}
/**
* xmlErrMsgStr:
* @ctxt: an XML parser context
* @error: the error number
* @msg: the error message
* @val: a string value
*
* Handle a non fatal parser error
*/
static void LIBXML_ATTR_FORMAT(3,0)
xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
const char *msg, const xmlChar * val)
{
if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
(ctxt->instate == XML_PARSER_EOF))
return;
if (ctxt != NULL)
ctxt->errNo = error;
__xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
XML_FROM_PARSER, error, XML_ERR_ERROR,
NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
val);
}
/**
* xmlNsErr:
* @ctxt: an XML parser context
* @error: the error number
* @msg: the message
* @info1: extra information string
* @info2: extra information string
*
* Handle a fatal parser error, i.e. violating Well-Formedness constraints
*/
static void LIBXML_ATTR_FORMAT(3,0)
xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
const char *msg,
const xmlChar * info1, const xmlChar * info2,
const xmlChar * info3)
{
if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
(ctxt->instate == XML_PARSER_EOF))
return;
if (ctxt != NULL)
ctxt->errNo = error;
__xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
XML_ERR_ERROR, NULL, 0, (const char *) info1,
(const char *) info2, (const char *) info3, 0, 0, msg,
info1, info2, info3);
if (ctxt != NULL)
ctxt->nsWellFormed = 0;
}
/**
* xmlNsWarn
* @ctxt: an XML parser context
* @error: the error number
* @msg: the message
* @info1: extra information string
* @info2: extra information string
*
* Handle a namespace warning error
*/
static void LIBXML_ATTR_FORMAT(3,0)
xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
const char *msg,
const xmlChar * info1, const xmlChar * info2,
const xmlChar * info3)
{
if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
(ctxt->instate == XML_PARSER_EOF))
return;
__xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
XML_ERR_WARNING, NULL, 0, (const char *) info1,
(const char *) info2, (const char *) info3, 0, 0, msg,
info1, info2, info3);
}
/************************************************************************
* *
* Library wide options *
* *
************************************************************************/
/**
* xmlHasFeature:
* @feature: the feature to be examined
*
* Examines if the library has been compiled with a given feature.
*
* Returns a non-zero value if the feature exist, otherwise zero.
* Returns zero (0) if the feature does not exist or an unknown
* unknown feature is requested, non-zero otherwise.
*/
int
xmlHasFeature(xmlFeature feature)
{
switch (feature) {
case XML_WITH_THREAD:
#ifdef LIBXML_THREAD_ENABLED
return(1);
#else
return(0);
#endif
case XML_WITH_TREE:
#ifdef LIBXML_TREE_ENABLED
return(1);
#else
return(0);
#endif
case XML_WITH_OUTPUT:
#ifdef LIBXML_OUTPUT_ENABLED
return(1);
#else
return(0);
#endif
case XML_WITH_PUSH:
#ifdef LIBXML_PUSH_ENABLED
return(1);
#else
return(0);
#endif
case XML_WITH_READER:
#ifdef LIBXML_READER_ENABLED
return(1);
#else
return(0);
#endif
case XML_WITH_PATTERN:
#ifdef LIBXML_PATTERN_ENABLED
return(1);
#else
return(0);
#endif
case XML_WITH_WRITER:
#ifdef LIBXML_WRITER_ENABLED
return(1);
#else
return(0);
#endif
case XML_WITH_SAX1:
#ifdef LIBXML_SAX1_ENABLED
return(1);
#else
return(0);
#endif
case XML_WITH_FTP:
#ifdef LIBXML_FTP_ENABLED
return(1);
#else
return(0);
#endif
case XML_WITH_HTTP:
#ifdef LIBXML_HTTP_ENABLED
return(1);
#else
return(0);
#endif
case XML_WITH_VALID:
#ifdef LIBXML_VALID_ENABLED
return(1);
#else
return(0);
#endif
case XML_WITH_HTML:
#ifdef LIBXML_HTML_ENABLED
return(1);
#else
return(0);
#endif
case XML_WITH_LEGACY:
#ifdef LIBXML_LEGACY_ENABLED
return(1);
#else
return(0);
#endif
case XML_WITH_C14N:
#ifdef LIBXML_C14N_ENABLED
return(1);
#else
return(0);
#endif
case XML_WITH_CATALOG:
#ifdef LIBXML_CATALOG_ENABLED
return(1);
#else
return(0);
#endif
case XML_WITH_XPATH:
#ifdef LIBXML_XPATH_ENABLED
return(1);
#else
return(0);
#endif
case XML_WITH_XPTR:
#ifdef LIBXML_XPTR_ENABLED
return(1);
#else
return(0);
#endif
case XML_WITH_XINCLUDE:
#ifdef LIBXML_XINCLUDE_ENABLED
return(1);
#else
return(0);
#endif
case XML_WITH_ICONV:
#ifdef LIBXML_ICONV_ENABLED
return(1);
#else
return(0);
#endif
case XML_WITH_ISO8859X:
#ifdef LIBXML_ISO8859X_ENABLED
return(1);
#else
return(0);
#endif
case XML_WITH_UNICODE:
#ifdef LIBXML_UNICODE_ENABLED
return(1);
#else
return(0);
#endif
case XML_WITH_REGEXP:
#ifdef LIBXML_REGEXP_ENABLED
return(1);
#else
return(0);
#endif
case XML_WITH_AUTOMATA:
#ifdef LIBXML_AUTOMATA_ENABLED
return(1);
#else
return(0);
#endif
case XML_WITH_EXPR:
#ifdef LIBXML_EXPR_ENABLED
return(1);
#else
return(0);
#endif
case XML_WITH_SCHEMAS:
#ifdef LIBXML_SCHEMAS_ENABLED
return(1);
#else
return(0);
#endif
case XML_WITH_SCHEMATRON:
#ifdef LIBXML_SCHEMATRON_ENABLED
return(1);
#else
return(0);
#endif
case XML_WITH_MODULES:
#ifdef LIBXML_MODULES_ENABLED
return(1);
#else
return(0);
#endif
case XML_WITH_DEBUG:
#ifdef LIBXML_DEBUG_ENABLED
return(1);
#else
return(0);
#endif
case XML_WITH_DEBUG_MEM:
#ifdef DEBUG_MEMORY_LOCATION
return(1);
#else
return(0);
#endif
case XML_WITH_DEBUG_RUN:
#ifdef LIBXML_DEBUG_RUNTIME
return(1);
#else
return(0);
#endif
case XML_WITH_ZLIB:
#ifdef LIBXML_ZLIB_ENABLED
return(1);
#else
return(0);
#endif
case XML_WITH_LZMA:
#ifdef LIBXML_LZMA_ENABLED
return(1);
#else
return(0);
#endif
case XML_WITH_ICU:
#ifdef LIBXML_ICU_ENABLED
return(1);
#else
return(0);
#endif
default:
break;
}
return(0);
}
/************************************************************************
* *
* SAX2 defaulted attributes handling *
* *
************************************************************************/
/**
* xmlDetectSAX2:
* @ctxt: an XML parser context
*
* Do the SAX2 detection and specific intialization
*/
static void
xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
if (ctxt == NULL) return;
#ifdef LIBXML_SAX1_ENABLED
if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
((ctxt->sax->startElementNs != NULL) ||
(ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
#else
ctxt->sax2 = 1;
#endif /* LIBXML_SAX1_ENABLED */
ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
(ctxt->str_xml_ns == NULL)) {
xmlErrMemory(ctxt, NULL);
}
}
typedef struct _xmlDefAttrs xmlDefAttrs;
typedef xmlDefAttrs *xmlDefAttrsPtr;
struct _xmlDefAttrs {
int nbAttrs; /* number of defaulted attributes on that element */
int maxAttrs; /* the size of the array */
const xmlChar *values[5]; /* array of localname/prefix/values/external */
};
/**
* xmlAttrNormalizeSpace:
* @src: the source string
* @dst: the target string
*
* Normalize the space in non CDATA attribute values:
* If the attribute type is not CDATA, then the XML processor MUST further
* process the normalized attribute value by discarding any leading and
* trailing space (#x20) characters, and by replacing sequences of space
* (#x20) characters by a single space (#x20) character.
* Note that the size of dst need to be at least src, and if one doesn't need
* to preserve dst (and it doesn't come from a dictionary or read-only) then
* passing src as dst is just fine.
*
* Returns a pointer to the normalized value (dst) or NULL if no conversion
* is needed.
*/
static xmlChar *
xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
{
if ((src == NULL) || (dst == NULL))
return(NULL);
while (*src == 0x20) src++;
while (*src != 0) {
if (*src == 0x20) {
while (*src == 0x20) src++;
if (*src != 0)
*dst++ = 0x20;
} else {
*dst++ = *src++;
}
}
*dst = 0;
if (dst == src)
return(NULL);
return(dst);
}
/**
* xmlAttrNormalizeSpace2:
* @src: the source string
*
* Normalize the space in non CDATA attribute values, a slightly more complex
* front end to avoid allocation problems when running on attribute values
* coming from the input.
*
* Returns a pointer to the normalized value (dst) or NULL if no conversion
* is needed.
*/
static const xmlChar *
xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
{
int i;
int remove_head = 0;
int need_realloc = 0;
const xmlChar *cur;
if ((ctxt == NULL) || (src == NULL) || (len == NULL))
return(NULL);
i = *len;
if (i <= 0)
return(NULL);
cur = src;
while (*cur == 0x20) {
cur++;
remove_head++;
}
while (*cur != 0) {
if (*cur == 0x20) {
cur++;
if ((*cur == 0x20) || (*cur == 0)) {
need_realloc = 1;
break;
}
} else
cur++;
}
if (need_realloc) {
xmlChar *ret;
ret = xmlStrndup(src + remove_head, i - remove_head + 1);
if (ret == NULL) {
xmlErrMemory(ctxt, NULL);
return(NULL);
}
xmlAttrNormalizeSpace(ret, ret);
*len = (int) XML_STRLEN((const char *)ret);
return(ret);
} else if (remove_head) {
*len -= remove_head;
XML_MEMMOVE(src, src + remove_head, 1 + *len);
return(src);
}
return(NULL);
}
/**
* xmlAddDefAttrs:
* @ctxt: an XML parser context
* @fullname: the element fullname
* @fullattr: the attribute fullname
* @value: the attribute value
*
* Add a defaulted attribute for an element
*/
static void
xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
const xmlChar *fullname,
const xmlChar *fullattr,
const xmlChar *value) {
xmlDefAttrsPtr defaults;
int len;
const xmlChar *name;
const xmlChar *prefix;
/*
* Allows to detect attribute redefinitions
*/
if (ctxt->attsSpecial != NULL) {
if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
return;
}
if (ctxt->attsDefault == NULL) {
ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
if (ctxt->attsDefault == NULL)
goto mem_error;
}
/*
* split the element name into prefix:localname , the string found
* are within the DTD and then not associated to namespace names.
*/
name = xmlSplitQName3(fullname, &len);
if (name == NULL) {
name = xmlDictLookup(ctxt->dict, fullname, -1);
prefix = NULL;
} else {
name = xmlDictLookup(ctxt->dict, name, -1);
prefix = xmlDictLookup(ctxt->dict, fullname, len);
}
/*
* make sure there is some storage
*/
defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
if (defaults == NULL) {
defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
(4 * 5) * sizeof(const xmlChar *));
if (defaults == NULL)
goto mem_error;
defaults->nbAttrs = 0;
defaults->maxAttrs = 4;
if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
defaults, NULL) < 0) {
xmlFree(defaults);
goto mem_error;
}
} else if (defaults->nbAttrs >= defaults->maxAttrs) {
xmlDefAttrsPtr temp;
temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
(2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
if (temp == NULL)
goto mem_error;
defaults = temp;
defaults->maxAttrs *= 2;
if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
defaults, NULL) < 0) {
xmlFree(defaults);
goto mem_error;
}
}
/*
* Split the element name into prefix:localname , the string found
* are within the DTD and hen not associated to namespace names.
*/
name = xmlSplitQName3(fullattr, &len);
if (name == NULL) {
name = xmlDictLookup(ctxt->dict, fullattr, -1);
prefix = NULL;
} else {
name = xmlDictLookup(ctxt->dict, name, -1);
prefix = xmlDictLookup(ctxt->dict, fullattr, len);
}
defaults->values[5 * defaults->nbAttrs] = name;
defaults->values[5 * defaults->nbAttrs + 1] = prefix;
/* intern the string and precompute the end */
len = xmlStrlen(value);
value = xmlDictLookup(ctxt->dict, value, len);
defaults->values[5 * defaults->nbAttrs + 2] = value;
defaults->values[5 * defaults->nbAttrs + 3] = value + len;
if (ctxt->external)
defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
else
defaults->values[5 * defaults->nbAttrs + 4] = NULL;
defaults->nbAttrs++;
return;
mem_error:
xmlErrMemory(ctxt, NULL);
return;
}
/**
* xmlAddSpecialAttr:
* @ctxt: an XML parser context
* @fullname: the element fullname
* @fullattr: the attribute fullname
* @type: the attribute type
*
* Register this attribute type
*/
static void
xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
const xmlChar *fullname,
const xmlChar *fullattr,
int type)
{
if (ctxt->attsSpecial == NULL) {
ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
if (ctxt->attsSpecial == NULL)
goto mem_error;
}
if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
return;
xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
(void *) (long) type);
return;
mem_error:
xmlErrMemory(ctxt, NULL);
return;
}
/**
* xmlCleanSpecialAttrCallback:
*
* Removes CDATA attributes from the special attribute table
*/
static void
xmlCleanSpecialAttrCallback(void *payload, void *data,
const xmlChar *fullname, const xmlChar *fullattr,
const xmlChar *unused ATTRIBUTE_UNUSED) {
xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
if (((long) payload) == XML_ATTRIBUTE_CDATA) {
xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
}
}
/**
* xmlCleanSpecialAttr:
* @ctxt: an XML parser context
*
* Trim the list of attributes defined to remove all those of type
* CDATA as they are not special. This call should be done when finishing
* to parse the DTD and before starting to parse the document root.
*/
static void
xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
{
if (ctxt->attsSpecial == NULL)
return;
xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
if (xmlHashSize(ctxt->attsSpecial) == 0) {
xmlHashFree(ctxt->attsSpecial, NULL);
ctxt->attsSpecial = NULL;
}
return;
}
/**
* xmlCheckLanguageID:
* @lang: pointer to the string value
*
* Checks that the value conforms to the LanguageID production:
*
* NOTE: this is somewhat deprecated, those productions were removed from
* the XML Second edition.
*
* [33] LanguageID ::= Langcode ('-' Subcode)*
* [34] Langcode ::= ISO639Code | IanaCode | UserCode
* [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
* [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
* [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
* [38] Subcode ::= ([a-z] | [A-Z])+
*
* The current REC reference the sucessors of RFC 1766, currently 5646
*
* http://www.rfc-editor.org/rfc/rfc5646.txt
* langtag = language
* ["-" script]
* ["-" region]
* *("-" variant)
* *("-" extension)
* ["-" privateuse]
* language = 2*3ALPHA ; shortest ISO 639 code
* ["-" extlang] ; sometimes followed by
* ; extended language subtags
* / 4ALPHA ; or reserved for future use
* / 5*8ALPHA ; or registered language subtag
*
* extlang = 3ALPHA ; selected ISO 639 codes
* *2("-" 3ALPHA) ; permanently reserved
*
* script = 4ALPHA ; ISO 15924 code
*
* region = 2ALPHA ; ISO 3166-1 code
* / 3DIGIT ; UN M.49 code
*
* variant = 5*8alphanum ; registered variants
* / (DIGIT 3alphanum)
*
* extension = singleton 1*("-" (2*8alphanum))
*
* ; Single alphanumerics
* ; "x" reserved for private use
* singleton = DIGIT ; 0 - 9
* / %x41-57 ; A - W
* / %x59-5A ; Y - Z
* / %x61-77 ; a - w
* / %x79-7A ; y - z
*
* it sounds right to still allow Irregular i-xxx IANA and user codes too
* The parser below doesn't try to cope with extension or privateuse
* that could be added but that's not interoperable anyway
*
* Returns 1 if correct 0 otherwise
**/
int
xmlCheckLanguageID(const xmlChar * lang)
{
const xmlChar *cur = lang, *nxt;
if (cur == NULL)
return (0);
if (((cur[0] == 'i') && (cur[1] == '-')) ||
((cur[0] == 'I') && (cur[1] == '-')) ||
((cur[0] == 'x') && (cur[1] == '-')) ||
((cur[0] == 'X') && (cur[1] == '-'))) {
/*
* Still allow IANA code and user code which were coming
* from the previous version of the XML-1.0 specification
* it's deprecated but we should not fail
*/
cur += 2;
while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
((cur[0] >= 'a') && (cur[0] <= 'z')))
cur++;
return(cur[0] == 0);
}
nxt = cur;
while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
((nxt[0] >= 'a') && (nxt[0] <= 'z')))
nxt++;
if (nxt - cur >= 4) {
/*
* Reserved
*/
if ((nxt - cur > 8) || (nxt[0] != 0))
return(0);
return(1);
}
if (nxt - cur < 2)
return(0);
/* we got an ISO 639 code */
if (nxt[0] == 0)
return(1);
if (nxt[0] != '-')
return(0);
nxt++;
cur = nxt;
/* now we can have extlang or script or region or variant */
if ((nxt[0] >= '0') && (nxt[0] <= '9'))
goto region_m49;
while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
((nxt[0] >= 'a') && (nxt[0] <= 'z')))
nxt++;
if (nxt - cur == 4)
goto script;
if (nxt - cur == 2)
goto region;
if ((nxt - cur >= 5) && (nxt - cur <= 8))
goto variant;
if (nxt - cur != 3)
return(0);
/* we parsed an extlang */
if (nxt[0] == 0)
return(1);
if (nxt[0] != '-')
return(0);
nxt++;
cur = nxt;
/* now we can have script or region or variant */
if ((nxt[0] >= '0') && (nxt[0] <= '9'))
goto region_m49;
while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
((nxt[0] >= 'a') && (nxt[0] <= 'z')))
nxt++;
if (nxt - cur == 2)
goto region;
if ((nxt - cur >= 5) && (nxt - cur <= 8))
goto variant;
if (nxt - cur != 4)
return(0);
/* we parsed a script */
script:
if (nxt[0] == 0)
return(1);
if (nxt[0] != '-')
return(0);
nxt++;
cur = nxt;
/* now we can have region or variant */
if ((nxt[0] >= '0') && (nxt[0] <= '9'))
goto region_m49;
while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
((nxt[0] >= 'a') && (nxt[0] <= 'z')))
nxt++;
if ((nxt - cur >= 5) && (nxt - cur <= 8))
goto variant;
if (nxt - cur != 2)
return(0);
/* we parsed a region */
region:
if (nxt[0] == 0)
return(1);
if (nxt[0] != '-')
return(0);
nxt++;
cur = nxt;
/* now we can just have a variant */
while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
((nxt[0] >= 'a') && (nxt[0] <= 'z')))
nxt++;
if ((nxt - cur < 5) || (nxt - cur > 8))
return(0);
/* we parsed a variant */
variant:
if (nxt[0] == 0)
return(1);
if (nxt[0] != '-')
return(0);
/* extensions and private use subtags not checked */
return (1);
region_m49:
if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
((nxt[2] >= '0') && (nxt[2] <= '9'))) {
nxt += 3;
goto region;
}
return(0);
}
/************************************************************************
* *
* Parser stacks related functions and macros *
* *
************************************************************************/
static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
const xmlChar ** str);
#ifdef SAX2
/**
* nsPush:
* @ctxt: an XML parser context
* @prefix: the namespace prefix or NULL
* @URL: the namespace name
*
* Pushes a new parser namespace on top of the ns stack
*
* Returns -1 in case of error, -2 if the namespace should be discarded
* and the index in the stack otherwise.
*/
static int
nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
{
if (ctxt->options & XML_PARSE_NSCLEAN) {
int i;
for (i = ctxt->nsNr - 2;i >= 0;i -= 2) {
if (ctxt->nsTab[i] == prefix) {
/* in scope */
if (ctxt->nsTab[i + 1] == URL)
return(-2);
/* out of scope keep it */
break;
}
}
}
if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
ctxt->nsMax = 10;
ctxt->nsNr = 0;
ctxt->nsTab = (const xmlChar **)
xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
if (ctxt->nsTab == NULL) {
xmlErrMemory(ctxt, NULL);
ctxt->nsMax = 0;
return (-1);
}
} else if (ctxt->nsNr >= ctxt->nsMax) {
const xmlChar ** tmp;
ctxt->nsMax *= 2;
tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
ctxt->nsMax * sizeof(ctxt->nsTab[0]));
if (tmp == NULL) {
xmlErrMemory(ctxt, NULL);
ctxt->nsMax /= 2;
return (-1);
}
ctxt->nsTab = tmp;
}
ctxt->nsTab[ctxt->nsNr++] = prefix;
ctxt->nsTab[ctxt->nsNr++] = URL;
return (ctxt->nsNr);
}
/**
* nsPop:
* @ctxt: an XML parser context
* @nr: the number to pop
*
* Pops the top @nr parser prefix/namespace from the ns stack
*
* Returns the number of namespaces removed
*/
static int
nsPop(xmlParserCtxtPtr ctxt, int nr)
{
int i;
if (ctxt->nsTab == NULL) return(0);
if (ctxt->nsNr < nr) {
xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
nr = ctxt->nsNr;
}
if (ctxt->nsNr <= 0)
return (0);
for (i = 0;i < nr;i++) {
ctxt->nsNr--;
ctxt->nsTab[ctxt->nsNr] = NULL;
}
return(nr);
}
#endif
static int
xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
const xmlChar **atts;
int *attallocs;
int maxatts;
if (ctxt->atts == NULL) {
maxatts = 55; /* allow for 10 attrs by default */
atts = (const xmlChar **)
xmlMalloc(maxatts * sizeof(xmlChar *));
if (atts == NULL) goto mem_error;
ctxt->atts = atts;
attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
if (attallocs == NULL) goto mem_error;
ctxt->attallocs = attallocs;
ctxt->maxatts = maxatts;
} else if (nr + 5 > ctxt->maxatts) {
maxatts = (nr + 5) * 2;
atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
maxatts * sizeof(const xmlChar *));
if (atts == NULL) goto mem_error;
ctxt->atts = atts;
attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
(maxatts / 5) * sizeof(int));
if (attallocs == NULL) goto mem_error;
ctxt->attallocs = attallocs;
ctxt->maxatts = maxatts;
}
return(ctxt->maxatts);
mem_error:
xmlErrMemory(ctxt, NULL);
return(-1);
}
/**
* inputPush:
* @ctxt: an XML parser context
* @value: the parser input
*
* Pushes a new parser input on top of the input stack
*
* Returns -1 in case of error, the index in the stack otherwise
*/
int
inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
{
if ((ctxt == NULL) || (value == NULL))
return(-1);
if (ctxt->inputNr >= ctxt->inputMax) {
ctxt->inputMax *= 2;
ctxt->inputTab =
(xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
ctxt->inputMax *
sizeof(ctxt->inputTab[0]));
if (ctxt->inputTab == NULL) {
xmlErrMemory(ctxt, NULL);
xmlFreeInputStream(value);
ctxt->inputMax /= 2;
value = NULL;
return (-1);
}
}
ctxt->inputTab[ctxt->inputNr] = value;
ctxt->input = value;
return (ctxt->inputNr++);
}
/**
* inputPop:
* @ctxt: an XML parser context
*
* Pops the top parser input from the input stack
*
* Returns the input just removed
*/
xmlParserInputPtr
inputPop(xmlParserCtxtPtr ctxt)
{
xmlParserInputPtr ret;
if (ctxt == NULL)
return(NULL);
if (ctxt->inputNr <= 0)
return (NULL);
ctxt->inputNr--;
if (ctxt->inputNr > 0)
ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
else
ctxt->input = NULL;
ret = ctxt->inputTab[ctxt->inputNr];
ctxt->inputTab[ctxt->inputNr] = NULL;
return (ret);
}
/**
* nodePush:
* @ctxt: an XML parser context
* @value: the element node
*
* Pushes a new element node on top of the node stack
*
* Returns -1 in case of error, the index in the stack otherwise
*/
int
nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
{
if (ctxt == NULL) return(0);
if (ctxt->nodeNr >= ctxt->nodeMax) {
xmlNodePtr *tmp;
tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
ctxt->nodeMax * 2 *
sizeof(ctxt->nodeTab[0]));
if (tmp == NULL) {
xmlErrMemory(ctxt, NULL);
return (-1);
}
ctxt->nodeTab = tmp;
ctxt->nodeMax *= 2;
}
if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
((ctxt->options & XML_PARSE_HUGE) == 0)) {
xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
"Excessive depth in document: %d use XML_PARSE_HUGE option\n",
xmlParserMaxDepth);
ctxt->instate = XML_PARSER_EOF;
return(-1);
}
ctxt->nodeTab[ctxt->nodeNr] = value;
ctxt->node = value;
return (ctxt->nodeNr++);
}
/**
* nodePop:
* @ctxt: an XML parser context
*
* Pops the top element node from the node stack
*
* Returns the node just removed
*/
xmlNodePtr
nodePop(xmlParserCtxtPtr ctxt)
{
xmlNodePtr ret;
if (ctxt == NULL) return(NULL);
if (ctxt->nodeNr <= 0)
return (NULL);
ctxt->nodeNr--;
if (ctxt->nodeNr > 0)
ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
else
ctxt->node = NULL;
ret = ctxt->nodeTab[ctxt->nodeNr];
ctxt->nodeTab[ctxt->nodeNr] = NULL;
return (ret);
}
#ifdef LIBXML_PUSH_ENABLED
/**
* nameNsPush:
* @ctxt: an XML parser context
* @value: the element name
* @prefix: the element prefix
* @URI: the element namespace name
*
* Pushes a new element name/prefix/URL on top of the name stack
*
* Returns -1 in case of error, the index in the stack otherwise
*/
static int
nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
const xmlChar *prefix, const xmlChar *URI, int nsNr)
{
if (ctxt->nameNr >= ctxt->nameMax) {
const xmlChar * *tmp;
void **tmp2;
ctxt->nameMax *= 2;
tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
ctxt->nameMax *
sizeof(ctxt->nameTab[0]));
if (tmp == NULL) {
ctxt->nameMax /= 2;
goto mem_error;
}
ctxt->nameTab = tmp;
tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
ctxt->nameMax * 3 *
sizeof(ctxt->pushTab[0]));
if (tmp2 == NULL) {
ctxt->nameMax /= 2;
goto mem_error;
}
ctxt->pushTab = tmp2;
}
ctxt->nameTab[ctxt->nameNr] = value;
ctxt->name = value;
ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
return (ctxt->nameNr++);
mem_error:
xmlErrMemory(ctxt, NULL);
return (-1);
}
/**
* nameNsPop:
* @ctxt: an XML parser context
*
* Pops the top element/prefix/URI name from the name stack
*
* Returns the name just removed
*/
static const xmlChar *
nameNsPop(xmlParserCtxtPtr ctxt)
{
const xmlChar *ret;
if (ctxt->nameNr <= 0)
return (NULL);
ctxt->nameNr--;
if (ctxt->nameNr > 0)
ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
else
ctxt->name = NULL;
ret = ctxt->nameTab[ctxt->nameNr];
ctxt->nameTab[ctxt->nameNr] = NULL;
return (ret);
}
#endif /* LIBXML_PUSH_ENABLED */
/**
* namePush:
* @ctxt: an XML parser context
* @value: the element name
*
* Pushes a new element name on top of the name stack
*
* Returns -1 in case of error, the index in the stack otherwise
*/
int
namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
{
if (ctxt == NULL) return (-1);
if (ctxt->nameNr >= ctxt->nameMax) {
const xmlChar * *tmp;
tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
ctxt->nameMax * 2 *
sizeof(ctxt->nameTab[0]));
if (tmp == NULL) {
goto mem_error;
}
ctxt->nameTab = tmp;
ctxt->nameMax *= 2;
}
ctxt->nameTab[ctxt->nameNr] = value;
ctxt->name = value;
return (ctxt->nameNr++);
mem_error:
xmlErrMemory(ctxt, NULL);
return (-1);
}
/**
* namePop:
* @ctxt: an XML parser context
*
* Pops the top element name from the name stack
*
* Returns the name just removed
*/
const xmlChar *
namePop(xmlParserCtxtPtr ctxt)
{
const xmlChar *ret;
if ((ctxt == NULL) || (ctxt->nameNr <= 0))
return (NULL);
ctxt->nameNr--;
if (ctxt->nameNr > 0)
ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
else
ctxt->name = NULL;
ret = ctxt->nameTab[ctxt->nameNr];
ctxt->nameTab[ctxt->nameNr] = NULL;
return (ret);
}
static int spacePush(xmlParserCtxtPtr ctxt, int val) {
if (ctxt->spaceNr >= ctxt->spaceMax) {
int *tmp;
ctxt->spaceMax *= 2;
tmp = (int *) xmlRealloc(ctxt->spaceTab,
ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
if (tmp == NULL) {
xmlErrMemory(ctxt, NULL);
ctxt->spaceMax /=2;
return(-1);
}
ctxt->spaceTab = tmp;
}
ctxt->spaceTab[ctxt->spaceNr] = val;
ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
return(ctxt->spaceNr++);
}
static int spacePop(xmlParserCtxtPtr ctxt) {
int ret;
if (ctxt->spaceNr <= 0) return(0);
ctxt->spaceNr--;
if (ctxt->spaceNr > 0)
ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
else
ctxt->space = &ctxt->spaceTab[0];
ret = ctxt->spaceTab[ctxt->spaceNr];
ctxt->spaceTab[ctxt->spaceNr] = -1;
return(ret);
}
/*
* Macros for accessing the content. Those should be used only by the parser,
* and not exported.
*
* Dirty macros, i.e. one often need to make assumption on the context to
* use them
*
* CUR_PTR return the current pointer to the xmlChar to be parsed.
* To be used with extreme caution since operations consuming
* characters may move the input buffer to a different location !
* CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
* This should be used internally by the parser
* only to compare to ASCII values otherwise it would break when
* running with UTF-8 encoding.
* RAW same as CUR but in the input buffer, bypass any token
* extraction that may have been done
* NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
* to compare on ASCII based substring.
* SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
* strings without newlines within the parser.
* NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
* defined char within the parser.
* Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
*
* NEXT Skip to the next character, this does the proper decoding
* in UTF-8 mode. It also pop-up unfinished entities on the fly.
* NEXTL(l) Skip the current unicode character of l xmlChars long.
* CUR_CHAR(l) returns the current unicode character (int), set l
* to the number of xmlChars used for the encoding [0-5].
* CUR_SCHAR same but operate on a string instead of the context
* COPY_BUF copy the current unicode char to the target buffer, increment
* the index
* GROW, SHRINK handling of input buffers
*/
#define RAW (*ctxt->input->cur)
#define CUR (*ctxt->input->cur)
#define NXT(val) ctxt->input->cur[(val)]
#define CUR_PTR ctxt->input->cur
#define CMP4( s, c1, c2, c3, c4 ) \
( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
#define CMP5( s, c1, c2, c3, c4, c5 ) \
( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
((unsigned char *) s)[ 8 ] == c9 )
#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
((unsigned char *) s)[ 9 ] == c10 )
#define SKIP(val) do { \
ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
if ((*ctxt->input->cur == 0) && \
(xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
xmlPopInput(ctxt); \
} while (0)
#define SKIPL(val) do { \
int skipl; \
for(skipl=0; skipl<val; skipl++) { \
if (*(ctxt->input->cur) == '\n') { \
ctxt->input->line++; ctxt->input->col = 1; \
} else ctxt->input->col++; \
ctxt->nbChars++; \
ctxt->input->cur++; \
} \
if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
if ((*ctxt->input->cur == 0) && \
(xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
xmlPopInput(ctxt); \
} while (0)
#define SHRINK if ((ctxt->progressive == 0) && \
(ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
(ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
xmlSHRINK (ctxt);
static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
xmlParserInputShrink(ctxt->input);
if ((*ctxt->input->cur == 0) &&
(xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
xmlPopInput(ctxt);
}
#define GROW if ((ctxt->progressive == 0) && \
(ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
xmlGROW (ctxt);
static void xmlGROW (xmlParserCtxtPtr ctxt) {
unsigned long curEnd = ctxt->input->end - ctxt->input->cur;
unsigned long curBase = ctxt->input->cur - ctxt->input->base;
if (((curEnd > (unsigned long) XML_MAX_LOOKUP_LIMIT) ||
(curBase > (unsigned long) XML_MAX_LOOKUP_LIMIT)) &&
((ctxt->input->buf) && (ctxt->input->buf->readcallback != (xmlInputReadCallback) xmlNop)) &&
((ctxt->options & XML_PARSE_HUGE) == 0)) {
xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
xmlHaltParser(ctxt);
return;
}
xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
if ((ctxt->input->cur > ctxt->input->end) ||
(ctxt->input->cur < ctxt->input->base)) {
xmlHaltParser(ctxt);
xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "cur index out of bound");
return;
}
if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0) &&
(xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
xmlPopInput(ctxt);
}
#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
#define NEXT xmlNextChar(ctxt)
#define NEXT1 { \
ctxt->input->col++; \
ctxt->input->cur++; \
ctxt->nbChars++; \
if (*ctxt->input->cur == 0) \
xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
}
#define NEXTL(l) do { \
if (*(ctxt->input->cur) == '\n') { \
ctxt->input->line++; ctxt->input->col = 1; \
} else ctxt->input->col++; \
ctxt->input->cur += l; \
} while (0)
#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
#define COPY_BUF(l,b,i,v) \
if (l == 1) b[i++] = (xmlChar) v; \
else i += xmlCopyCharMultiByte(&b[i],v)
/**
* xmlSkipBlankChars:
* @ctxt: the XML parser context
*
* skip all blanks character found at that point in the input streams.
* It pops up finished entities in the process if allowable at that point.
*
* Returns the number of space chars skipped
*/
int
xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
int res = 0;
/*
* It's Okay to use CUR/NEXT here since all the blanks are on
* the ASCII range.
*/
if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
const xmlChar *cur;
/*
* if we are in the document content, go really fast
*/
cur = ctxt->input->cur;
while (IS_BLANK_CH(*cur)) {
if (*cur == '\n') {
ctxt->input->line++; ctxt->input->col = 1;
} else {
ctxt->input->col++;
}
cur++;
res++;
if (*cur == 0) {
ctxt->input->cur = cur;
xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
cur = ctxt->input->cur;
}
}
ctxt->input->cur = cur;
} else {
int cur;
do {
cur = CUR;
while (IS_BLANK_CH(cur)) { /* CHECKED tstblanks.xml */
NEXT;
cur = CUR;
res++;
}
while ((cur == 0) && (ctxt->inputNr > 1) &&
(ctxt->instate != XML_PARSER_COMMENT)) {
xmlPopInput(ctxt);
cur = CUR;
}
/*
* Need to handle support of entities branching here
*/
if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
} while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
}
return(res);
}
/************************************************************************
* *
* Commodity functions to handle entities *
* *
************************************************************************/
/**
* xmlPopInput:
* @ctxt: an XML parser context
*
* xmlPopInput: the current input pointed by ctxt->input came to an end
* pop it and return the next char.
*
* Returns the current xmlChar in the parser context
*/
xmlChar
xmlPopInput(xmlParserCtxtPtr ctxt) {
if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
if (xmlParserDebugEntities)
xmlGenericError(xmlGenericErrorContext,
"Popping input %d\n", ctxt->inputNr);
xmlFreeInputStream(inputPop(ctxt));
if ((*ctxt->input->cur == 0) &&
(xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
return(xmlPopInput(ctxt));
return(CUR);
}
/**
* xmlPushInput:
* @ctxt: an XML parser context
* @input: an XML parser input fragment (entity, XML fragment ...).
*
* xmlPushInput: switch to a new input stream which is stacked on top
* of the previous one(s).
* Returns -1 in case of error or the index in the input stack
*/
int
xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
int ret;
if (input == NULL) return(-1);
if (xmlParserDebugEntities) {
if ((ctxt->input != NULL) && (ctxt->input->filename))
xmlGenericError(xmlGenericErrorContext,
"%s(%d): ", ctxt->input->filename,
ctxt->input->line);
xmlGenericError(xmlGenericErrorContext,
"Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
}
ret = inputPush(ctxt, input);
if (ctxt->instate == XML_PARSER_EOF)
return(-1);
GROW;
return(ret);
}
/**
* xmlParseCharRef:
* @ctxt: an XML parser context
*
* parse Reference declarations
*
* [66] CharRef ::= '&#' [0-9]+ ';' |
* '&#x' [0-9a-fA-F]+ ';'
*
* [ WFC: Legal Character ]
* Characters referred to using character references must match the
* production for Char.
*
* Returns the value parsed (as an int), 0 in case of error
*/
int
xmlParseCharRef(xmlParserCtxtPtr ctxt) {
unsigned int val = 0;
int count = 0;
unsigned int outofrange = 0;
/*
* Using RAW/CUR/NEXT is okay since we are working on ASCII range here
*/
if ((RAW == '&') && (NXT(1) == '#') &&
(NXT(2) == 'x')) {
SKIP(3);
GROW;
while (RAW != ';') { /* loop blocked by count */
if (count++ > 20) {
count = 0;
GROW;
if (ctxt->instate == XML_PARSER_EOF)
return(0);
}
if ((RAW >= '0') && (RAW <= '9'))
val = val * 16 + (CUR - '0');
else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
val = val * 16 + (CUR - 'a') + 10;
else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
val = val * 16 + (CUR - 'A') + 10;
else {
xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
val = 0;
break;
}
if (val > 0x10FFFF)
outofrange = val;
NEXT;
count++;
}
if (RAW == ';') {
/* on purpose to avoid reentrancy problems with NEXT and SKIP */
ctxt->input->col++;
ctxt->nbChars ++;
ctxt->input->cur++;
}
} else if ((RAW == '&') && (NXT(1) == '#')) {
SKIP(2);
GROW;
while (RAW != ';') { /* loop blocked by count */
if (count++ > 20) {
count = 0;
GROW;
if (ctxt->instate == XML_PARSER_EOF)
return(0);
}
if ((RAW >= '0') && (RAW <= '9'))
val = val * 10 + (CUR - '0');
else {
xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
val = 0;
break;
}
if (val > 0x10FFFF)
outofrange = val;
NEXT;
count++;
}
if (RAW == ';') {
/* on purpose to avoid reentrancy problems with NEXT and SKIP */
ctxt->input->col++;
ctxt->nbChars ++;
ctxt->input->cur++;
}
} else {
xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
}
/*
* [ WFC: Legal Character ]
* Characters referred to using character references must match the
* production for Char.
*/
if ((IS_CHAR(val) && (outofrange == 0))) {
return(val);
} else {
xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
"xmlParseCharRef: invalid xmlChar value %d\n",
val);
}
return(0);
}
/**
* xmlParseStringCharRef:
* @ctxt: an XML parser context
* @str: a pointer to an index in the string
*
* parse Reference declarations, variant parsing from a string rather
* than an an input flow.
*
* [66] CharRef ::= '&#' [0-9]+ ';' |
* '&#x' [0-9a-fA-F]+ ';'
*
* [ WFC: Legal Character ]
* Characters referred to using character references must match the
* production for Char.
*
* Returns the value parsed (as an int), 0 in case of error, str will be
* updated to the current value of the index
*/
static int
xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
const xmlChar *ptr;
xmlChar cur;
unsigned int val = 0;
unsigned int outofrange = 0;
if ((str == NULL) || (*str == NULL)) return(0);
ptr = *str;
cur = *ptr;
if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
ptr += 3;
cur = *ptr;
while (cur != ';') { /* Non input consuming loop */
if ((cur >= '0') && (cur <= '9'))
val = val * 16 + (cur - '0');
else if ((cur >= 'a') && (cur <= 'f'))
val = val * 16 + (cur - 'a') + 10;
else if ((cur >= 'A') && (cur <= 'F'))
val = val * 16 + (cur - 'A') + 10;
else {
xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
val = 0;
break;
}
if (val > 0x10FFFF)
outofrange = val;
ptr++;
cur = *ptr;
}
if (cur == ';')
ptr++;
} else if ((cur == '&') && (ptr[1] == '#')){
ptr += 2;
cur = *ptr;
while (cur != ';') { /* Non input consuming loops */
if ((cur >= '0') && (cur <= '9'))
val = val * 10 + (cur - '0');
else {
xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
val = 0;
break;
}
if (val > 0x10FFFF)
outofrange = val;
ptr++;
cur = *ptr;
}
if (cur == ';')
ptr++;
} else {
xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
return(0);
}
*str = ptr;
/*
* [ WFC: Legal Character ]
* Characters referred to using character references must match the
* production for Char.
*/
if ((IS_CHAR(val) && (outofrange == 0))) {
return(val);
} else {
xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
"xmlParseStringCharRef: invalid xmlChar value %d\n",
val);
}
return(0);
}
/**
* xmlNewBlanksWrapperInputStream:
* @ctxt: an XML parser context
* @entity: an Entity pointer
*
* Create a new input stream for wrapping
* blanks around a PEReference
*
* Returns the new input stream or NULL
*/
static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
static xmlParserInputPtr
xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
xmlParserInputPtr input;
xmlChar *buffer;
size_t length;
if (entity == NULL) {
xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
"xmlNewBlanksWrapperInputStream entity\n");
return(NULL);
}
if (xmlParserDebugEntities)
xmlGenericError(xmlGenericErrorContext,
"new blanks wrapper for entity: %s\n", entity->name);
input = xmlNewInputStream(ctxt);
if (input == NULL) {
return(NULL);
}
length = xmlStrlen(entity->name) + 5;
buffer = xmlMallocAtomic(length);
if (buffer == NULL) {
xmlErrMemory(ctxt, NULL);
xmlFree(input);
return(NULL);
}
buffer [0] = ' ';
buffer [1] = '%';
buffer [length-3] = ';';
buffer [length-2] = ' ';
buffer [length-1] = 0;
XML_MEMCPY(buffer + 2, entity->name, length - 5);
input->free = deallocblankswrapper;
input->base = buffer;
input->cur = buffer;
input->length = length;
input->end = &buffer[length];
return(input);
}
/**
* xmlParserHandlePEReference:
* @ctxt: the parser context
*
* [69] PEReference ::= '%' Name ';'
*
* [ WFC: No Recursion ]
* A parsed entity must not contain a recursive
* reference to itself, either directly or indirectly.
*
* [ WFC: Entity Declared ]
* In a document without any DTD, a document with only an internal DTD
* subset which contains no parameter entity references, or a document
* with "standalone='yes'", ... ... The declaration of a parameter
* entity must precede any reference to it...
*
* [ VC: Entity Declared ]
* In a document with an external subset or external parameter entities
* with "standalone='no'", ... ... The declaration of a parameter entity
* must precede any reference to it...
*
* [ WFC: In DTD ]
* Parameter-entity references may only appear in the DTD.
* NOTE: misleading but this is handled.
*
* A PEReference may have been detected in the current input stream
* the handling is done accordingly to
* http://www.w3.org/TR/REC-xml#entproc
* i.e.
* - Included in literal in entity values
* - Included as Parameter Entity reference within DTDs
*/
void
xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
const xmlChar *name;
xmlEntityPtr entity = NULL;
xmlParserInputPtr input;
if (RAW != '%') return;
switch(ctxt->instate) {
case XML_PARSER_CDATA_SECTION:
return;
case XML_PARSER_COMMENT:
return;
case XML_PARSER_START_TAG:
return;
case XML_PARSER_END_TAG:
return;
case XML_PARSER_EOF:
xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
return;
case XML_PARSER_PROLOG:
case XML_PARSER_START:
case XML_PARSER_MISC:
xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
return;
case XML_PARSER_ENTITY_DECL:
case XML_PARSER_CONTENT:
case XML_PARSER_ATTRIBUTE_VALUE:
case XML_PARSER_PI:
case XML_PARSER_SYSTEM_LITERAL:
case XML_PARSER_PUBLIC_LITERAL:
/* we just ignore it there */
return;
case XML_PARSER_EPILOG:
xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
return;
case XML_PARSER_ENTITY_VALUE:
/*
* NOTE: in the case of entity values, we don't do the
* substitution here since we need the literal
* entity value to be able to save the internal
* subset of the document.
* This will be handled by xmlStringDecodeEntities
*/
return;
case XML_PARSER_DTD:
/*
* [WFC: Well-Formedness Constraint: PEs in Internal Subset]
* In the internal DTD subset, parameter-entity references
* can occur only where markup declarations can occur, not
* within markup declarations.
* In that case this is handled in xmlParseMarkupDecl
*/
if ((ctxt->external == 0) && (ctxt->inputNr == 1))
return;
if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
return;
break;
case XML_PARSER_IGNORE:
return;
}
NEXT;
name = xmlParseName(ctxt);
if (xmlParserDebugEntities)
xmlGenericError(xmlGenericErrorContext,
"PEReference: %s\n", name);
if (name == NULL) {
xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
} else {
if (RAW == ';') {
NEXT;
if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
if (ctxt->instate == XML_PARSER_EOF)
return;
if (entity == NULL) {
/*
* [ WFC: Entity Declared ]
* In a document without any DTD, a document with only an
* internal DTD subset which contains no parameter entity
* references, or a document with "standalone='yes'", ...
* ... The declaration of a parameter entity must precede
* any reference to it...
*/
if ((ctxt->standalone == 1) ||
((ctxt->hasExternalSubset == 0) &&
(ctxt->hasPErefs == 0))) {
xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
"PEReference: %%%s; not found\n", name);
} else {
/*
* [ VC: Entity Declared ]
* In a document with an external subset or external
* parameter entities with "standalone='no'", ...
* ... The declaration of a parameter entity must precede
* any reference to it...
*/
if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
"PEReference: %%%s; not found\n",
name, NULL);
} else
xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
"PEReference: %%%s; not found\n",
name, NULL);
ctxt->valid = 0;
}
xmlParserEntityCheck(ctxt, 0, NULL, 0);
} else if (ctxt->input->free != deallocblankswrapper) {
input = xmlNewBlanksWrapperInputStream(ctxt, entity);
if (xmlPushInput(ctxt, input) < 0)
return;
} else {
if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
(entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
xmlChar start[4];
xmlCharEncoding enc;
/*
* Note: external parameter entities will not be loaded, it
* is not required for a non-validating parser, unless the
* option of validating, or substituting entities were
* given. Doing so is far more secure as the parser will
* only process data coming from the document entity by
* default.
*/
if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
((ctxt->options & XML_PARSE_NOENT) == 0) &&
((ctxt->options & XML_PARSE_DTDVALID) == 0) &&
((ctxt->options & XML_PARSE_DTDLOAD) == 0) &&
((ctxt->options & XML_PARSE_DTDATTR) == 0) &&
(ctxt->replaceEntities == 0) &&
(ctxt->validate == 0))
return;
/*
* handle the extra spaces added before and after
* c.f. http://www.w3.org/TR/REC-xml#as-PE
* this is done independently.
*/
input = xmlNewEntityInputStream(ctxt, entity);
if (xmlPushInput(ctxt, input) < 0)
return;
/*
* Get the 4 first bytes and decode the charset
* if enc != XML_CHAR_ENCODING_NONE
* plug some encoding conversion routines.
* Note that, since we may have some non-UTF8
* encoding (like UTF16, bug 135229), the 'length'
* is not known, but we can calculate based upon
* the amount of data in the buffer.
*/
GROW
if (ctxt->instate == XML_PARSER_EOF)
return;
if ((ctxt->input->end - ctxt->input->cur)>=4) {
start[0] = RAW;
start[1] = NXT(1);
start[2] = NXT(2);
start[3] = NXT(3);
enc = xmlDetectCharEncoding(start, 4);
if (enc != XML_CHAR_ENCODING_NONE) {
xmlSwitchEncoding(ctxt, enc);
}
}
if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
(CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
(IS_BLANK_CH(NXT(5)))) {
xmlParseTextDecl(ctxt);
}
} else {
xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
"PEReference: %s is not a parameter entity\n",
name);
}
}
} else {
xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
}
}
}
/*
* Macro used to grow the current buffer.
* buffer##_size is expected to be a size_t
* mem_error: is expected to handle memory allocation failures
*/
#define growBuffer(buffer, n) { \
xmlChar *tmp; \
size_t new_size = buffer##_size * 2 + n; \
if (new_size < buffer##_size) goto mem_error; \
tmp = (xmlChar *) xmlRealloc(buffer, new_size); \
if (tmp == NULL) goto mem_error; \
buffer = tmp; \
buffer##_size = new_size; \
}
/**
* xmlStringLenDecodeEntities:
* @ctxt: the parser context
* @str: the input string
* @len: the string length
* @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
* @end: an end marker xmlChar, 0 if none
* @end2: an end marker xmlChar, 0 if none
* @end3: an end marker xmlChar, 0 if none
*
* Takes a entity string content and process to do the adequate substitutions.
*
* [67] Reference ::= EntityRef | CharRef
*
* [69] PEReference ::= '%' Name ';'
*
* Returns A newly allocated string with the substitution done. The caller
* must deallocate it !
*/
xmlChar *
xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
int what, xmlChar end, xmlChar end2, xmlChar end3) {
xmlChar *buffer = NULL;
size_t buffer_size = 0;
size_t nbchars = 0;
xmlChar *current = NULL;
xmlChar *rep = NULL;
const xmlChar *last;
xmlEntityPtr ent;
int c,l;
if ((ctxt == NULL) || (str == NULL) || (len < 0))
return(NULL);
last = str + len;
if (((ctxt->depth > 40) &&
((ctxt->options & XML_PARSE_HUGE) == 0)) ||
(ctxt->depth > 1024)) {
xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
return(NULL);
}
/*
* allocate a translation buffer.
*/
buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
if (buffer == NULL) goto mem_error;
/*
* OK loop until we reach one of the ending char or a size limit.
* we are operating on already parsed values.
*/
if (str < last)
c = CUR_SCHAR(str, l);
else
c = 0;
while ((c != 0) && (c != end) && /* non input consuming loop */
(c != end2) && (c != end3)) {
if (c == 0) break;
if ((c == '&') && (str[1] == '#')) {
int val = xmlParseStringCharRef(ctxt, &str);
if (val != 0) {
COPY_BUF(0,buffer,nbchars,val);
}
if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
}
} else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
if (xmlParserDebugEntities)
xmlGenericError(xmlGenericErrorContext,
"String decoding Entity Reference: %.30s\n",
str);
ent = xmlParseStringEntityRef(ctxt, &str);
if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) ||
(ctxt->lastError.code == XML_ERR_INTERNAL_ERROR))
goto int_error;
xmlParserEntityCheck(ctxt, 0, ent, 0);
if (ent != NULL)
ctxt->nbentities += ent->checked / 2;
if ((ent != NULL) &&
(ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
if (ent->content != NULL) {
COPY_BUF(0,buffer,nbchars,ent->content[0]);
if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
}
} else {
xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
"predefined entity has no content\n");
}
} else if ((ent != NULL) && (ent->content != NULL)) {
ctxt->depth++;
rep = xmlStringDecodeEntities(ctxt, ent->content, what,
0, 0, 0);
ctxt->depth--;
if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) ||
(ctxt->lastError.code == XML_ERR_INTERNAL_ERROR))
goto int_error;
if (rep != NULL) {
current = rep;
while (*current != 0) { /* non input consuming loop */
buffer[nbchars++] = *current++;
if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
goto int_error;
growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
}
}
xmlFree(rep);
rep = NULL;
}
} else if (ent != NULL) {
int i = xmlStrlen(ent->name);
const xmlChar *cur = ent->name;
buffer[nbchars++] = '&';
if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
}
for (;i > 0;i--)
buffer[nbchars++] = *cur++;
buffer[nbchars++] = ';';
}
} else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
if (xmlParserDebugEntities)
xmlGenericError(xmlGenericErrorContext,
"String decoding PE Reference: %.30s\n", str);
ent = xmlParseStringPEReference(ctxt, &str);
if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
goto int_error;
xmlParserEntityCheck(ctxt, 0, ent, 0);
if (ent != NULL)
ctxt->nbentities += ent->checked / 2;
if (ent != NULL) {
if (ent->content == NULL) {
xmlLoadEntityContent(ctxt, ent);
}
ctxt->depth++;
rep = xmlStringDecodeEntities(ctxt, ent->content, what,
0, 0, 0);
ctxt->depth--;
if (rep != NULL) {
current = rep;
while (*current != 0) { /* non input consuming loop */
buffer[nbchars++] = *current++;
if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
goto int_error;
growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
}
}
xmlFree(rep);
rep = NULL;
}
}
} else {
COPY_BUF(l,buffer,nbchars,c);
str += l;
if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
}
}
if (str < last)
c = CUR_SCHAR(str, l);
else
c = 0;
}
buffer[nbchars] = 0;
return(buffer);
mem_error:
xmlErrMemory(ctxt, NULL);
int_error:
if (rep != NULL)
xmlFree(rep);
if (buffer != NULL)
xmlFree(buffer);
return(NULL);
}
/**
* xmlStringDecodeEntities:
* @ctxt: the parser context
* @str: the input string
* @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
* @end: an end marker xmlChar, 0 if none
* @end2: an end marker xmlChar, 0 if none
* @end3: an end marker xmlChar, 0 if none
*
* Takes a entity string content and process to do the adequate substitutions.
*
* [67] Reference ::= EntityRef | CharRef
*
* [69] PEReference ::= '%' Name ';'
*
* Returns A newly allocated string with the substitution done. The caller
* must deallocate it !
*/
xmlChar *
xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
xmlChar end, xmlChar end2, xmlChar end3) {
if ((ctxt == NULL) || (str == NULL)) return(NULL);
return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
end, end2, end3));
}
/************************************************************************
* *
* Commodity functions, cleanup needed ? *
* *
************************************************************************/
/**
* areBlanks:
* @ctxt: an XML parser context
* @str: a xmlChar *
* @len: the size of @str
* @blank_chars: we know the chars are blanks
*
* Is this a sequence of blank chars that one can ignore ?
*
* Returns 1 if ignorable 0 otherwise.
*/
static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
int blank_chars) {
int i, ret;
xmlNodePtr lastChild;
/*
* Don't spend time trying to differentiate them, the same callback is
* used !
*/
if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
return(0);
/*
* Check for xml:space value.
*/
if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
(*(ctxt->space) == -2))
return(0);
/*
* Check that the string is made of blanks
*/
if (blank_chars == 0) {
for (i = 0;i < len;i++)
if (!(IS_BLANK_CH(str[i]))) return(0);
}
/*
* Look if the element is mixed content in the DTD if available
*/
if (ctxt->node == NULL) return(0);
if (ctxt->myDoc != NULL) {
ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
if (ret == 0) return(1);
if (ret == 1) return(0);
}
/*
* Otherwise, heuristic :-\
*/
if ((RAW != '<') && (RAW != 0xD)) return(0);
if ((ctxt->node->children == NULL) &&
(RAW == '<') && (NXT(1) == '/')) return(0);
lastChild = xmlGetLastChild(ctxt->node);
if (lastChild == NULL) {
if ((ctxt->