blob: 02da46fc3e9bf9e56e5d02ee52dfa23b3d07fc57 [file] [log] [blame]
/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
* vim: set ts=8 sts=4 et sw=4 tw=99:
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
/*
* JS lexical scanner.
*/
#include "frontend/TokenStream.h"
#include "mozilla/PodOperations.h"
#include <stdio.h>
#include <stdarg.h>
#include <string.h>
#include "jsapi.h"
#include "jsatom.h"
#include "jscntxt.h"
#include "jsexn.h"
#include "jsnum.h"
#include "jsopcode.h"
#include "jsscript.h"
#include "frontend/BytecodeCompiler.h"
#include "js/CharacterEncoding.h"
#include "vm/Keywords.h"
#include "vm/StringBuffer.h"
using namespace js;
using namespace js::frontend;
using namespace js::unicode;
using mozilla::PodAssign;
using mozilla::PodCopy;
using mozilla::PodZero;
struct KeywordInfo {
const char *chars; /* C string with keyword text */
TokenKind tokentype;
JSOp op; /* JSOp */
JSVersion version; /* JSVersion */
};
static const KeywordInfo keywords[] = {
#define KEYWORD_INFO(keyword, name, type, op, version) \
{js_##keyword##_str, type, op, version},
FOR_EACH_JAVASCRIPT_KEYWORD(KEYWORD_INFO)
#undef KEYWORD_INFO
};
/*
* Returns a KeywordInfo for the specified characters, or NULL if the string is
* not a keyword.
*/
static const KeywordInfo *
FindKeyword(const jschar *s, size_t length)
{
JS_ASSERT(length != 0);
register size_t i;
const KeywordInfo *kw;
const char *chars;
#define JSKW_LENGTH() length
#define JSKW_AT(column) s[column]
#define JSKW_GOT_MATCH(index) i = (index); goto got_match;
#define JSKW_TEST_GUESS(index) i = (index); goto test_guess;
#define JSKW_NO_MATCH() goto no_match;
#include "jsautokw.h"
#undef JSKW_NO_MATCH
#undef JSKW_TEST_GUESS
#undef JSKW_GOT_MATCH
#undef JSKW_AT
#undef JSKW_LENGTH
got_match:
return &keywords[i];
test_guess:
kw = &keywords[i];
chars = kw->chars;
do {
if (*s++ != (unsigned char)(*chars++))
goto no_match;
} while (--length != 0);
return kw;
no_match:
return NULL;
}
bool
frontend::IsIdentifier(JSLinearString *str)
{
const jschar *chars = str->chars();
size_t length = str->length();
if (length == 0)
return false;
jschar c = *chars;
if (!IsIdentifierStart(c))
return false;
const jschar *end = chars + length;
while (++chars != end) {
c = *chars;
if (!IsIdentifierPart(c))
return false;
}
return true;
}
bool
frontend::IsKeyword(JSLinearString *str)
{
return FindKeyword(str->chars(), str->length()) != NULL;
}
TokenStream::SourceCoords::SourceCoords(JSContext *cx, uint32_t ln)
: lineStartOffsets_(cx), initialLineNum_(ln), lastLineIndex_(0)
{
// This is actually necessary! Removing it causes compile errors on
// GCC and clang. You could try declaring this:
//
// const uint32_t TokenStream::SourceCoords::MAX_PTR;
//
// which fixes the GCC/clang error, but causes bustage on Windows. Sigh.
//
uint32_t maxPtr = MAX_PTR;
// The first line begins at buffer offset 0. MAX_PTR is the sentinel. The
// appends cannot fail because |lineStartOffsets_| has statically-allocated
// elements.
JS_ASSERT(lineStartOffsets_.capacity() >= 2);
(void)lineStartOffsets_.reserve(2);
lineStartOffsets_.infallibleAppend(0);
lineStartOffsets_.infallibleAppend(maxPtr);
}
JS_ALWAYS_INLINE void
TokenStream::SourceCoords::add(uint32_t lineNum, uint32_t lineStartOffset)
{
uint32_t lineIndex = lineNumToIndex(lineNum);
uint32_t sentinelIndex = lineStartOffsets_.length() - 1;
JS_ASSERT(lineStartOffsets_[0] == 0 && lineStartOffsets_[sentinelIndex] == MAX_PTR);
if (lineIndex == sentinelIndex) {
// We haven't seen this newline before. Update lineStartOffsets_.
// We ignore any failures due to OOM -- because we always have a
// sentinel node, it'll just be like the newline wasn't present. I.e.
// the line numbers will be wrong, but the code won't crash or anything
// like that.
lineStartOffsets_[lineIndex] = lineStartOffset;
(void)lineStartOffsets_.append(MAX_PTR);
} else {
// We have seen this newline before (and ungot it). Do nothing (other
// than checking it hasn't mysteriously changed).
JS_ASSERT(lineStartOffsets_[lineIndex] == lineStartOffset);
}
}
JS_ALWAYS_INLINE void
TokenStream::SourceCoords::fill(const TokenStream::SourceCoords &other)
{
JS_ASSERT(lineStartOffsets_.back() == MAX_PTR);
JS_ASSERT(other.lineStartOffsets_.back() == MAX_PTR);
if (lineStartOffsets_.length() >= other.lineStartOffsets_.length())
return;
uint32_t sentinelIndex = lineStartOffsets_.length() - 1;
lineStartOffsets_[sentinelIndex] = other.lineStartOffsets_[sentinelIndex];
for (size_t i = sentinelIndex + 1; i < other.lineStartOffsets_.length(); i++)
(void)lineStartOffsets_.append(other.lineStartOffsets_[i]);
}
JS_ALWAYS_INLINE uint32_t
TokenStream::SourceCoords::lineIndexOf(uint32_t offset) const
{
uint32_t iMin, iMax, iMid;
if (lineStartOffsets_[lastLineIndex_] <= offset) {
// If we reach here, offset is on a line the same as or higher than
// last time. Check first for the +0, +1, +2 cases, because they
// typically cover 85--98% of cases.
if (offset < lineStartOffsets_[lastLineIndex_ + 1])
return lastLineIndex_; // lineIndex is same as last time
// If we reach here, there must be at least one more entry (plus the
// sentinel). Try it.
lastLineIndex_++;
if (offset < lineStartOffsets_[lastLineIndex_ + 1])
return lastLineIndex_; // lineIndex is one higher than last time
// The same logic applies here.
lastLineIndex_++;
if (offset < lineStartOffsets_[lastLineIndex_ + 1]) {
return lastLineIndex_; // lineIndex is two higher than last time
}
// No luck. Oh well, we have a better-than-default starting point for
// the binary search.
iMin = lastLineIndex_ + 1;
JS_ASSERT(iMin < lineStartOffsets_.length() - 1); // -1 due to the sentinel
} else {
iMin = 0;
}
// This is a binary search with deferred detection of equality, which was
// marginally faster in this case than a standard binary search.
// The -2 is because |lineStartOffsets_.length() - 1| is the sentinel, and we
// want one before that.
iMax = lineStartOffsets_.length() - 2;
while (iMax > iMin) {
iMid = (iMin + iMax) / 2;
if (offset >= lineStartOffsets_[iMid + 1])
iMin = iMid + 1; // offset is above lineStartOffsets_[iMid]
else
iMax = iMid; // offset is below or within lineStartOffsets_[iMid]
}
JS_ASSERT(iMax == iMin);
JS_ASSERT(lineStartOffsets_[iMin] <= offset && offset < lineStartOffsets_[iMin + 1]);
lastLineIndex_ = iMin;
return iMin;
}
uint32_t
TokenStream::SourceCoords::lineNum(uint32_t offset) const
{
uint32_t lineIndex = lineIndexOf(offset);
return lineIndexToNum(lineIndex);
}
uint32_t
TokenStream::SourceCoords::columnIndex(uint32_t offset) const
{
uint32_t lineIndex = lineIndexOf(offset);
uint32_t lineStartOffset = lineStartOffsets_[lineIndex];
JS_ASSERT(offset >= lineStartOffset);
return offset - lineStartOffset;
}
void
TokenStream::SourceCoords::lineNumAndColumnIndex(uint32_t offset, uint32_t *lineNum,
uint32_t *columnIndex) const
{
uint32_t lineIndex = lineIndexOf(offset);
*lineNum = lineIndexToNum(lineIndex);
uint32_t lineStartOffset = lineStartOffsets_[lineIndex];
JS_ASSERT(offset >= lineStartOffset);
*columnIndex = offset - lineStartOffset;
}
#ifdef _MSC_VER
#pragma warning(push)
#pragma warning(disable:4351)
#endif
/* Initialize members that aren't initialized in |init|. */
TokenStream::TokenStream(JSContext *cx, const CompileOptions &options,
const jschar *base, size_t length, StrictModeGetter *smg,
AutoKeepAtoms& keepAtoms)
: srcCoords(cx, options.lineno),
tokens(),
cursor(),
lookahead(),
lineno(options.lineno),
flags(),
linebase(base - options.column),
prevLinebase(NULL),
userbuf(cx, base - options.column, length + options.column), // See comment below
filename(options.filename),
sourceMap(NULL),
listenerTSData(),
tokenbuf(cx),
version(options.version),
cx(cx),
originPrincipals(JSScript::normalizeOriginPrincipals(options.principals,
options.originPrincipals)),
strictModeGetter(smg),
lastFunctionKeyword(keepAtoms),
tokenSkip(cx, &tokens),
linebaseSkip(cx, &linebase),
prevLinebaseSkip(cx, &prevLinebase)
{
// Column numbers are computed as offsets from the current line's base, so the
// initial line's base must be included in the buffer. linebase and userbuf
// were adjusted above, and if we are starting tokenization part way through
// this line then adjust the next character.
userbuf.setAddressOfNextRawChar(base);
if (originPrincipals)
JS_HoldPrincipals(originPrincipals);
JSSourceHandler listener = cx->runtime()->debugHooks.sourceHandler;
void *listenerData = cx->runtime()->debugHooks.sourceHandlerData;
if (listener)
listener(options.filename, options.lineno, base, length, &listenerTSData, listenerData);
/*
* This table holds all the token kinds that satisfy these properties:
* - A single char long.
* - Cannot be a prefix of any longer token (e.g. '+' is excluded because
* '+=' is a valid token).
* - Doesn't need tp->t_op set (e.g. this excludes '~').
*
* The few token kinds satisfying these properties cover roughly 35--45%
* of the tokens seen in practice.
*
* Nb: oneCharTokens, maybeEOL and maybeStrSpecial could be static, but
* initializing them this way is a bit easier. Don't worry, the time to
* initialize them for each TokenStream is trivial. See bug 639420.
*/
memset(oneCharTokens, 0, sizeof(oneCharTokens));
oneCharTokens[unsigned(';')] = TOK_SEMI;
oneCharTokens[unsigned(',')] = TOK_COMMA;
oneCharTokens[unsigned('?')] = TOK_HOOK;
oneCharTokens[unsigned('[')] = TOK_LB;
oneCharTokens[unsigned(']')] = TOK_RB;
oneCharTokens[unsigned('{')] = TOK_LC;
oneCharTokens[unsigned('}')] = TOK_RC;
oneCharTokens[unsigned('(')] = TOK_LP;
oneCharTokens[unsigned(')')] = TOK_RP;
/* See getChar() for an explanation of maybeEOL[]. */
memset(maybeEOL, 0, sizeof(maybeEOL));
maybeEOL[unsigned('\n')] = true;
maybeEOL[unsigned('\r')] = true;
maybeEOL[unsigned(LINE_SEPARATOR & 0xff)] = true;
maybeEOL[unsigned(PARA_SEPARATOR & 0xff)] = true;
/* See getTokenInternal() for an explanation of maybeStrSpecial[]. */
memset(maybeStrSpecial, 0, sizeof(maybeStrSpecial));
maybeStrSpecial[unsigned('"')] = true;
maybeStrSpecial[unsigned('\'')] = true;
maybeStrSpecial[unsigned('\\')] = true;
maybeStrSpecial[unsigned('\n')] = true;
maybeStrSpecial[unsigned('\r')] = true;
maybeStrSpecial[unsigned(LINE_SEPARATOR & 0xff)] = true;
maybeStrSpecial[unsigned(PARA_SEPARATOR & 0xff)] = true;
maybeStrSpecial[unsigned(EOF & 0xff)] = true;
}
#ifdef _MSC_VER
#pragma warning(pop)
#endif
TokenStream::~TokenStream()
{
if (sourceMap)
js_free(sourceMap);
if (originPrincipals)
JS_DropPrincipals(cx->runtime(), originPrincipals);
}
/* Use the fastest available getc. */
#if defined(HAVE_GETC_UNLOCKED)
# define fast_getc getc_unlocked
#elif defined(HAVE__GETC_NOLOCK)
# define fast_getc _getc_nolock
#else
# define fast_getc getc
#endif
JS_ALWAYS_INLINE void
TokenStream::updateLineInfoForEOL()
{
prevLinebase = linebase;
linebase = userbuf.addressOfNextRawChar();
lineno++;
srcCoords.add(lineno, linebase - userbuf.base());
}
JS_ALWAYS_INLINE void
TokenStream::updateFlagsForEOL()
{
flags &= ~TSF_DIRTYLINE;
flags |= TSF_EOL;
}
/* This gets the next char, normalizing all EOL sequences to '\n' as it goes. */
int32_t
TokenStream::getChar()
{
int32_t c;
if (JS_LIKELY(userbuf.hasRawChars())) {
c = userbuf.getRawChar();
/*
* Normalize the jschar if it was a newline. We need to detect any of
* these four characters: '\n' (0x000a), '\r' (0x000d),
* LINE_SEPARATOR (0x2028), PARA_SEPARATOR (0x2029). Testing for each
* one in turn is slow, so we use a single probabilistic check, and if
* that succeeds, test for them individually.
*
* We use the bottom 8 bits to index into a lookup table, succeeding
* when d&0xff is 0xa, 0xd, 0x28 or 0x29. Among ASCII chars (which
* are by the far the most common) this gives false positives for '('
* (0x0028) and ')' (0x0029). We could avoid those by incorporating
* the 13th bit of d into the lookup, but that requires extra shifting
* and masking and isn't worthwhile. See TokenStream::TokenStream()
* for the initialization of the relevant entries in the table.
*/
if (JS_UNLIKELY(maybeEOL[c & 0xff])) {
if (c == '\n')
goto eol;
if (c == '\r') {
/* if it's a \r\n sequence: treat as a single EOL, skip over the \n */
if (userbuf.hasRawChars())
userbuf.matchRawChar('\n');
goto eol;
}
if (c == LINE_SEPARATOR || c == PARA_SEPARATOR)
goto eol;
}
return c;
}
flags |= TSF_EOF;
return EOF;
eol:
updateLineInfoForEOL();
return '\n';
}
/*
* This gets the next char. It does nothing special with EOL sequences, not
* even updating the line counters. It can be used safely if (a) the
* resulting char is guaranteed to be ungotten (by ungetCharIgnoreEOL()) if
* it's an EOL, and (b) the line-related state (lineno, linebase) is not used
* before it's ungotten.
*/
int32_t
TokenStream::getCharIgnoreEOL()
{
if (JS_LIKELY(userbuf.hasRawChars()))
return userbuf.getRawChar();
flags |= TSF_EOF;
return EOF;
}
void
TokenStream::ungetChar(int32_t c)
{
if (c == EOF)
return;
JS_ASSERT(!userbuf.atStart());
userbuf.ungetRawChar();
if (c == '\n') {
#ifdef DEBUG
int32_t c2 = userbuf.peekRawChar();
JS_ASSERT(TokenBuf::isRawEOLChar(c2));
#endif
/* if it's a \r\n sequence, also unget the \r */
if (!userbuf.atStart())
userbuf.matchRawCharBackwards('\r');
JS_ASSERT(prevLinebase); /* we should never get more than one EOL char */
linebase = prevLinebase;
prevLinebase = NULL;
lineno--;
} else {
JS_ASSERT(userbuf.peekRawChar() == c);
}
}
void
TokenStream::ungetCharIgnoreEOL(int32_t c)
{
if (c == EOF)
return;
JS_ASSERT(!userbuf.atStart());
userbuf.ungetRawChar();
}
/*
* Return true iff |n| raw characters can be read from this without reading past
* EOF or a newline, and copy those characters into |cp| if so. The characters
* are not consumed: use skipChars(n) to do so after checking that the consumed
* characters had appropriate values.
*/
bool
TokenStream::peekChars(int n, jschar *cp)
{
int i, j;
int32_t c;
for (i = 0; i < n; i++) {
c = getCharIgnoreEOL();
if (c == EOF)
break;
if (c == '\n') {
ungetCharIgnoreEOL(c);
break;
}
cp[i] = (jschar)c;
}
for (j = i - 1; j >= 0; j--)
ungetCharIgnoreEOL(cp[j]);
return i == n;
}
const jschar *
TokenStream::TokenBuf::findEOLMax(const jschar *p, size_t max)
{
JS_ASSERT(base_ <= p && p <= limit_);
size_t n = 0;
while (true) {
if (p >= limit_)
break;
if (n >= max)
break;
if (TokenBuf::isRawEOLChar(*p++))
break;
n++;
}
return p;
}
void
TokenStream::advance(size_t position)
{
const jschar *end = userbuf.base() + position;
while (userbuf.addressOfNextRawChar() < end)
getChar();
Token *cur = &tokens[cursor];
cur->pos.begin = userbuf.addressOfNextRawChar() - userbuf.base();
cur->type = TOK_ERROR;
lookahead = 0;
}
void
TokenStream::tell(Position *pos)
{
pos->buf = userbuf.addressOfNextRawChar(/* allowPoisoned = */ true);
pos->flags = flags;
pos->lineno = lineno;
pos->linebase = linebase;
pos->prevLinebase = prevLinebase;
pos->lookahead = lookahead;
pos->currentToken = currentToken();
for (unsigned i = 0; i < lookahead; i++)
pos->lookaheadTokens[i] = tokens[(cursor + 1 + i) & ntokensMask];
}
void
TokenStream::seek(const Position &pos)
{
userbuf.setAddressOfNextRawChar(pos.buf, /* allowPoisoned = */ true);
flags = pos.flags;
lineno = pos.lineno;
linebase = pos.linebase;
prevLinebase = pos.prevLinebase;
lookahead = pos.lookahead;
tokens[cursor] = pos.currentToken;
for (unsigned i = 0; i < lookahead; i++)
tokens[(cursor + 1 + i) & ntokensMask] = pos.lookaheadTokens[i];
}
void
TokenStream::seek(const Position &pos, const TokenStream &other)
{
srcCoords.fill(other.srcCoords);
lastFunctionKeyword = other.lastFunctionKeyword;
seek(pos);
}
void
TokenStream::positionAfterLastFunctionKeyword(Position &pos)
{
JS_ASSERT(lastFunctionKeyword.buf > userbuf.base());
PodAssign(&pos, &lastFunctionKeyword);
}
bool
TokenStream::reportStrictModeErrorNumberVA(uint32_t offset, bool strictMode, unsigned errorNumber,
va_list args)
{
/* In strict mode code, this is an error, not merely a warning. */
unsigned flags = JSREPORT_STRICT;
if (strictMode)
flags |= JSREPORT_ERROR;
else if (cx->hasExtraWarningsOption())
flags |= JSREPORT_WARNING;
else
return true;
return reportCompileErrorNumberVA(offset, flags, errorNumber, args);
}
void
CompileError::throwError()
{
/*
* If there's a runtime exception type associated with this error
* number, set that as the pending exception. For errors occuring at
* compile time, this is very likely to be a JSEXN_SYNTAXERR.
*
* If an exception is thrown but not caught, the JSREPORT_EXCEPTION
* flag will be set in report.flags. Proper behavior for an error
* reporter is to ignore a report with this flag for all but top-level
* compilation errors. The exception will remain pending, and so long
* as the non-top-level "load", "eval", or "compile" native function
* returns false, the top-level reporter will eventually receive the
* uncaught exception report.
*/
if (!js_ErrorToException(cx, message, &report, NULL, NULL)) {
/*
* If debugErrorHook is present then we give it a chance to veto
* sending the error on to the regular error reporter.
*/
bool reportError = true;
if (JSDebugErrorHook hook = cx->runtime()->debugHooks.debugErrorHook) {
reportError = hook(cx, message, &report, cx->runtime()->debugHooks.debugErrorHookData);
}
/* Report the error */
if (reportError && cx->errorReporter)
cx->errorReporter(cx, message, &report);
}
}
CompileError::~CompileError()
{
js_free((void*)report.uclinebuf);
js_free((void*)report.linebuf);
js_free((void*)report.ucmessage);
js_free(message);
message = NULL;
if (report.messageArgs) {
if (argumentsType == ArgumentsAreASCII) {
unsigned i = 0;
while (report.messageArgs[i])
js_free((void*)report.messageArgs[i++]);
}
js_free(report.messageArgs);
}
PodZero(&report);
}
bool
TokenStream::reportCompileErrorNumberVA(uint32_t offset, unsigned flags, unsigned errorNumber,
va_list args)
{
bool warning = JSREPORT_IS_WARNING(flags);
if (warning && cx->hasWErrorOption()) {
flags &= ~JSREPORT_WARNING;
warning = false;
}
CompileError err(cx);
err.report.flags = flags;
err.report.errorNumber = errorNumber;
err.report.filename = filename;
err.report.originPrincipals = originPrincipals;
err.report.lineno = srcCoords.lineNum(offset);
err.report.column = srcCoords.columnIndex(offset);
err.argumentsType = (flags & JSREPORT_UC) ? ArgumentsAreUnicode : ArgumentsAreASCII;
if (!js_ExpandErrorArguments(cx, js_GetErrorMessage, NULL, errorNumber, &err.message,
&err.report, err.argumentsType, args))
{
return false;
}
/*
* Given a token, T, that we want to complain about: if T's (starting)
* lineno doesn't match TokenStream's lineno, that means we've scanned past
* the line that T starts on, which makes it hard to print some or all of
* T's (starting) line for context.
*
* So we don't even try, leaving report.linebuf and friends zeroed. This
* means that any error involving a multi-line token (e.g. an unterminated
* multi-line string literal) won't have a context printed.
*/
if (err.report.lineno == lineno) {
const jschar *tokenStart = userbuf.base() + offset;
// We show only a portion (a "window") of the line around the erroneous
// token -- the first char in the token, plus |windowRadius| chars
// before it and |windowRadius - 1| chars after it. This is because
// lines can be very long and printing the whole line is (a) not that
// helpful, and (b) can waste a lot of memory. See bug 634444.
static const size_t windowRadius = 60;
// Truncate at the front if necessary.
const jschar *windowBase = (linebase + windowRadius < tokenStart)
? tokenStart - windowRadius
: linebase;
uint32_t windowOffset = tokenStart - windowBase;
// Find EOL, or truncate at the back if necessary.
const jschar *windowLimit = userbuf.findEOLMax(tokenStart, windowRadius);
size_t windowLength = windowLimit - windowBase;
JS_ASSERT(windowLength <= windowRadius * 2);
// Create the windowed strings.
StringBuffer windowBuf(cx);
if (!windowBuf.append(windowBase, windowLength) || !windowBuf.append((jschar)0))
return false;
// Unicode and char versions of the window into the offending source
// line, without final \n.
err.report.uclinebuf = windowBuf.extractWellSized();
if (!err.report.uclinebuf)
return false;
TwoByteChars tbchars(err.report.uclinebuf, windowLength);
err.report.linebuf = LossyTwoByteCharsToNewLatin1CharsZ(cx, tbchars).c_str();
if (!err.report.linebuf)
return false;
err.report.tokenptr = err.report.linebuf + windowOffset;
err.report.uctokenptr = err.report.uclinebuf + windowOffset;
}
err.throwError();
return warning;
}
bool
TokenStream::reportStrictModeError(unsigned errorNumber, ...)
{
va_list args;
va_start(args, errorNumber);
bool result = reportStrictModeErrorNumberVA(currentToken().pos.begin, strictMode(),
errorNumber, args);
va_end(args);
return result;
}
bool
TokenStream::reportError(unsigned errorNumber, ...)
{
va_list args;
va_start(args, errorNumber);
bool result = reportCompileErrorNumberVA(currentToken().pos.begin, JSREPORT_ERROR, errorNumber,
args);
va_end(args);
return result;
}
bool
TokenStream::reportWarning(unsigned errorNumber, ...)
{
va_list args;
va_start(args, errorNumber);
bool result = reportCompileErrorNumberVA(currentToken().pos.begin, JSREPORT_WARNING,
errorNumber, args);
va_end(args);
return result;
}
bool
TokenStream::reportStrictWarningErrorNumberVA(uint32_t offset, unsigned errorNumber, va_list args)
{
if (!cx->hasExtraWarningsOption())
return true;
return reportCompileErrorNumberVA(offset, JSREPORT_STRICT|JSREPORT_WARNING, errorNumber, args);
}
void
TokenStream::reportAsmJSError(uint32_t offset, unsigned errorNumber, ...)
{
va_list args;
va_start(args, errorNumber);
reportCompileErrorNumberVA(offset, JSREPORT_WARNING, errorNumber, args);
va_end(args);
}
/*
* We have encountered a '\': check for a Unicode escape sequence after it.
* Return 'true' and the character code value (by value) if we found a
* Unicode escape sequence. Otherwise, return 'false'. In both cases, do not
* advance along the buffer.
*/
bool
TokenStream::peekUnicodeEscape(int *result)
{
jschar cp[5];
if (peekChars(5, cp) && cp[0] == 'u' &&
JS7_ISHEX(cp[1]) && JS7_ISHEX(cp[2]) &&
JS7_ISHEX(cp[3]) && JS7_ISHEX(cp[4]))
{
*result = (((((JS7_UNHEX(cp[1]) << 4)
+ JS7_UNHEX(cp[2])) << 4)
+ JS7_UNHEX(cp[3])) << 4)
+ JS7_UNHEX(cp[4]);
return true;
}
return false;
}
bool
TokenStream::matchUnicodeEscapeIdStart(int32_t *cp)
{
if (peekUnicodeEscape(cp) && IsIdentifierStart(*cp)) {
skipChars(5);
return true;
}
return false;
}
bool
TokenStream::matchUnicodeEscapeIdent(int32_t *cp)
{
if (peekUnicodeEscape(cp) && IsIdentifierPart(*cp)) {
skipChars(5);
return true;
}
return false;
}
/*
* Helper function which returns true if the first length(q) characters in p are
* the same as the characters in q.
*/
static bool
CharsMatch(const jschar *p, const char *q) {
while (*q) {
if (*p++ != *q++)
return false;
}
return true;
}
bool
TokenStream::getSourceMappingURL(bool isMultiline, bool shouldWarnDeprecated)
{
/* Match comments of the form "//# sourceMappingURL=<url>" or
* "/\* //# sourceMappingURL=<url> *\/"
*
* To avoid a crashing bug in IE, several JavaScript transpilers wrap single
* line comments containing a source mapping URL inside a multiline
* comment. To avoid potentially expensive lookahead and backtracking, we
* only check for this case if we encounter a '#' character.
*/
jschar peeked[18];
int32_t c;
if (peekChars(18, peeked) && CharsMatch(peeked, " sourceMappingURL=")) {
if (shouldWarnDeprecated && !reportWarning(JSMSG_DEPRECATED_SOURCE_MAP)) {
return false;
}
skipChars(18);
tokenbuf.clear();
while ((c = peekChar()) && c != EOF && !IsSpaceOrBOM2(c)) {
getChar();
/*
* Source mapping URLs can occur in both single- and multiline
* comments. If we're currently inside a multiline comment, we also
* need to recognize multiline comment terminators.
*/
if (isMultiline && c == '*' && peekChar() == '/') {
ungetChar('*');
break;
}
tokenbuf.append(c);
}
if (tokenbuf.empty())
/* The source map's URL was missing, but not quite an exception that
* we should stop and drop everything for, though. */
return true;
size_t sourceMapLength = tokenbuf.length();
if (sourceMap)
js_free(sourceMap);
sourceMap = cx->pod_malloc<jschar>(sourceMapLength + 1);
if (!sourceMap)
return false;
PodCopy(sourceMap, tokenbuf.begin(), sourceMapLength);
sourceMap[sourceMapLength] = '\0';
}
return true;
}
Token *
TokenStream::newToken(ptrdiff_t adjust)
{
cursor = (cursor + 1) & ntokensMask;
Token *tp = &tokens[cursor];
tp->pos.begin = userbuf.addressOfNextRawChar() + adjust - userbuf.base();
// NOTE: tp->pos.end is not set until the very end of getTokenInternal().
MOZ_MAKE_MEM_UNDEFINED(&tp->pos.end, sizeof(tp->pos.end));
return tp;
}
JS_ALWAYS_INLINE JSAtom *
TokenStream::atomize(JSContext *cx, CharBuffer &cb)
{
return AtomizeChars<CanGC>(cx, cb.begin(), cb.length());
}
#ifdef DEBUG
bool
IsTokenSane(Token *tp)
{
/*
* Nb: TOK_EOL should never be used in an actual Token; it should only be
* returned as a TokenKind from peekTokenSameLine().
*/
if (tp->type < TOK_ERROR || tp->type >= TOK_LIMIT || tp->type == TOK_EOL)
return false;
if (tp->pos.end < tp->pos.begin)
return false;
return true;
}
#endif
bool
TokenStream::putIdentInTokenbuf(const jschar *identStart)
{
int32_t c, qc;
const jschar *tmp = userbuf.addressOfNextRawChar();
userbuf.setAddressOfNextRawChar(identStart);
tokenbuf.clear();
for (;;) {
c = getCharIgnoreEOL();
if (!IsIdentifierPart(c)) {
if (c != '\\' || !matchUnicodeEscapeIdent(&qc))
break;
c = qc;
}
if (!tokenbuf.append(c)) {
userbuf.setAddressOfNextRawChar(tmp);
return false;
}
}
userbuf.setAddressOfNextRawChar(tmp);
return true;
}
bool
TokenStream::checkForKeyword(const jschar *s, size_t length, TokenKind *ttp, JSOp *topp)
{
JS_ASSERT(!ttp == !topp);
const KeywordInfo *kw = FindKeyword(s, length);
if (!kw)
return true;
if (kw->tokentype == TOK_RESERVED)
return reportError(JSMSG_RESERVED_ID, kw->chars);
if (kw->tokentype != TOK_STRICT_RESERVED) {
if (kw->version <= versionNumber()) {
/* Working keyword. */
if (ttp) {
*ttp = kw->tokentype;
*topp = (JSOp) kw->op;
return true;
}
return reportError(JSMSG_RESERVED_ID, kw->chars);
}
/*
* The keyword is not in this version. Treat it as an identifier,
* unless it is let or yield which we treat as TOK_STRICT_RESERVED by
* falling through to the code below (ES5 forbids them in strict mode).
*/
if (kw->tokentype != TOK_LET && kw->tokentype != TOK_YIELD)
return true;
}
/* Strict reserved word. */
return reportStrictModeError(JSMSG_RESERVED_ID, kw->chars);
}
enum FirstCharKind {
Other,
OneChar,
Ident,
Dot,
Equals,
String,
Dec,
Colon,
Plus,
HexOct,
/* These two must be last, so that |c >= Space| matches both. */
Space,
EOL
};
#define _______ Other
/*
* OneChar: 40, 41, 44, 59, 63, 91, 93, 123, 125: '(', ')', ',', ';', '?', '[', ']', '{', '}'
* Ident: 36, 65..90, 95, 97..122: '$', 'A'..'Z', '_', 'a'..'z'
* Dot: 46: '.'
* Equals: 61: '='
* String: 34, 39: '"', '\''
* Dec: 49..57: '1'..'9'
* Colon: 58: ':'
* Plus: 43: '+'
* HexOct: 48: '0'
* Space: 9, 11, 12: '\t', '\v', '\f'
* EOL: 10, 13: '\n', '\r'
*/
static const uint8_t firstCharKinds[] = {
/* 0 1 2 3 4 5 6 7 8 9 */
/* 0+ */ _______, _______, _______, _______, _______, _______, _______, _______, _______, Space,
/* 10+ */ EOL, Space, Space, EOL, _______, _______, _______, _______, _______, _______,
/* 20+ */ _______, _______, _______, _______, _______, _______, _______, _______, _______, _______,
/* 30+ */ _______, _______, Space, _______, String, _______, Ident, _______, _______, String,
/* 40+ */ OneChar, OneChar, _______, Plus, OneChar, _______, Dot, _______, HexOct, Dec,
/* 50+ */ Dec, Dec, Dec, Dec, Dec, Dec, Dec, Dec, Colon, OneChar,
/* 60+ */ _______, Equals, _______, OneChar, _______, Ident, Ident, Ident, Ident, Ident,
/* 70+ */ Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident,
/* 80+ */ Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident,
/* 90+ */ Ident, OneChar, _______, OneChar, _______, Ident, _______, Ident, Ident, Ident,
/* 100+ */ Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident,
/* 110+ */ Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident,
/* 120+ */ Ident, Ident, Ident, OneChar, _______, OneChar, _______, _______
};
#undef _______
TokenKind
TokenStream::getTokenInternal()
{
TokenKind tt;
int c, qc;
Token *tp;
FirstCharKind c1kind;
const jschar *numStart;
bool hasExp;
DecimalPoint decimalPoint;
const jschar *identStart;
bool hadUnicodeEscape;
retry:
if (JS_UNLIKELY(!userbuf.hasRawChars())) {
tp = newToken(0);
tt = TOK_EOF;
flags |= TSF_EOF;
goto out;
}
c = userbuf.getRawChar();
JS_ASSERT(c != EOF);
/*
* Chars not in the range 0..127 are rare. Getting them out of the way
* early allows subsequent checking to be faster.
*/
if (JS_UNLIKELY(c >= 128)) {
if (IsSpaceOrBOM2(c)) {
if (c == LINE_SEPARATOR || c == PARA_SEPARATOR) {
updateLineInfoForEOL();
updateFlagsForEOL();
}
goto retry;
}
tp = newToken(-1);
/* '$' and '_' don't pass IsLetter, but they're < 128 so never appear here. */
JS_STATIC_ASSERT('$' < 128 && '_' < 128);
if (IsLetter(c)) {
identStart = userbuf.addressOfNextRawChar() - 1;
hadUnicodeEscape = false;
goto identifier;
}
goto badchar;
}
/*
* Get the token kind, based on the first char. The ordering of c1kind
* comparison is based on the frequency of tokens in real code. Minified
* and non-minified code have different characteristics, mostly in that
* whitespace occurs much less in minified code. Token kinds that fall in
* the 'Other' category typically account for less than 2% of all tokens,
* so their order doesn't matter much.
*/
c1kind = FirstCharKind(firstCharKinds[c]);
/*
* Skip over whitespace chars; update line state on EOLs. Even though
* whitespace isn't very common in minified code we have to handle it first
* (and jump back to 'retry') before calling newToken().
*/
if (c1kind >= Space) {
if (c1kind == EOL) {
/* If it's a \r\n sequence: treat as a single EOL, skip over the \n. */
if (c == '\r' && userbuf.hasRawChars())
userbuf.matchRawChar('\n');
updateLineInfoForEOL();
updateFlagsForEOL();
}
goto retry;
}
tp = newToken(-1);
/*
* Look for an unambiguous single-char token.
*/
if (c1kind == OneChar) {
tt = (TokenKind)oneCharTokens[c];
goto out;
}
/*
* Look for an identifier.
*/
if (c1kind == Ident) {
identStart = userbuf.addressOfNextRawChar() - 1;
hadUnicodeEscape = false;
identifier:
for (;;) {
c = getCharIgnoreEOL();
if (c == EOF)
break;
if (!IsIdentifierPart(c)) {
if (c != '\\' || !matchUnicodeEscapeIdent(&qc))
break;
hadUnicodeEscape = true;
}
}
ungetCharIgnoreEOL(c);
/* Convert the escapes by putting into tokenbuf. */
if (hadUnicodeEscape && !putIdentInTokenbuf(identStart))
goto error;
/* Check for keywords unless parser asks us to ignore keywords. */
if (!(flags & TSF_KEYWORD_IS_NAME)) {
const jschar *chars;
size_t length;
if (hadUnicodeEscape) {
chars = tokenbuf.begin();
length = tokenbuf.length();
} else {
chars = identStart;
length = userbuf.addressOfNextRawChar() - identStart;
}
tt = TOK_NAME;
if (!checkForKeyword(chars, length, &tt, &tp->t_op))
goto error;
if (tt != TOK_NAME) {
if (tt == TOK_FUNCTION)
tell(&lastFunctionKeyword);
goto out;
}
}
/*
* Identifiers containing no Unicode escapes can be atomized directly
* from userbuf. The rest must use the escapes converted via
* tokenbuf before atomizing.
*/
JSAtom *atom;
if (!hadUnicodeEscape)
atom = AtomizeChars<CanGC>(cx, identStart, userbuf.addressOfNextRawChar() - identStart);
else
atom = atomize(cx, tokenbuf);
if (!atom)
goto error;
tp->setName(JSOP_NAME, atom->asPropertyName());
tt = TOK_NAME;
goto out;
}
if (c1kind == Dot) {
c = getCharIgnoreEOL();
if (JS7_ISDEC(c)) {
numStart = userbuf.addressOfNextRawChar() - 2;
decimalPoint = HasDecimal;
hasExp = false;
goto decimal_dot;
}
if (c == '.') {
qc = getCharIgnoreEOL();
if (qc == '.') {
tt = TOK_TRIPLEDOT;
goto out;
}
ungetCharIgnoreEOL(qc);
}
ungetCharIgnoreEOL(c);
tt = TOK_DOT;
goto out;
}
if (c1kind == Equals) {
if (matchChar('=')) {
if (matchChar('=')) {
tp->t_op = JSOP_STRICTEQ;
tt = TOK_STRICTEQ;
} else {
tp->t_op = JSOP_EQ;
tt = TOK_EQ;
}
} else if (matchChar('>')) {
tt = TOK_ARROW;
} else {
tp->t_op = JSOP_NOP;
tt = TOK_ASSIGN;
}
goto out;
}
/*
* Look for a string.
*/
if (c1kind == String) {
qc = c;
tokenbuf.clear();
while (true) {
/*
* We need to detect any of these chars: " or ', \n (or its
* equivalents), \\, EOF. We use maybeStrSpecial[] in a manner
* similar to maybeEOL[], see above. Because we detect EOL
* sequences here and put them back immediately, we can use
* getCharIgnoreEOL().
*/
c = getCharIgnoreEOL();
if (maybeStrSpecial[c & 0xff]) {
if (c == qc)
break;
if (c == '\\') {
switch (c = getChar()) {
case 'b': c = '\b'; break;
case 'f': c = '\f'; break;
case 'n': c = '\n'; break;
case 'r': c = '\r'; break;
case 't': c = '\t'; break;
case 'v': c = '\v'; break;
default:
if ('0' <= c && c < '8') {
int32_t val = JS7_UNDEC(c);
c = peekChar();
/* Strict mode code allows only \0, then a non-digit. */
if (val != 0 || JS7_ISDEC(c)) {
if (!reportStrictModeError(JSMSG_DEPRECATED_OCTAL))
goto error;
flags |= TSF_OCTAL_CHAR;
}
if ('0' <= c && c < '8') {
val = 8 * val + JS7_UNDEC(c);
getChar();
c = peekChar();
if ('0' <= c && c < '8') {
int32_t save = val;
val = 8 * val + JS7_UNDEC(c);
if (val <= 0377)
getChar();
else
val = save;
}
}
c = (jschar)val;
} else if (c == 'u') {
jschar cp[4];
if (peekChars(4, cp) &&
JS7_ISHEX(cp[0]) && JS7_ISHEX(cp[1]) &&
JS7_ISHEX(cp[2]) && JS7_ISHEX(cp[3])) {
c = (((((JS7_UNHEX(cp[0]) << 4)
+ JS7_UNHEX(cp[1])) << 4)
+ JS7_UNHEX(cp[2])) << 4)
+ JS7_UNHEX(cp[3]);
skipChars(4);
} else {
reportError(JSMSG_MALFORMED_ESCAPE, "Unicode");
goto error;
}
} else if (c == 'x') {
jschar cp[2];
if (peekChars(2, cp) &&
JS7_ISHEX(cp[0]) && JS7_ISHEX(cp[1])) {
c = (JS7_UNHEX(cp[0]) << 4) + JS7_UNHEX(cp[1]);
skipChars(2);
} else {
reportError(JSMSG_MALFORMED_ESCAPE, "hexadecimal");
goto error;
}
} else if (c == '\n') {
/*
* ES5 7.8.4: an escaped line terminator represents
* no character.
*/
continue;
}
break;
}
} else if (TokenBuf::isRawEOLChar(c) || c == EOF) {
ungetCharIgnoreEOL(c);
reportError(JSMSG_UNTERMINATED_STRING);
goto error;
}
}
if (!tokenbuf.append(c))
goto error;
}
JSAtom *atom = atomize(cx, tokenbuf);
if (!atom)
goto error;
tp->setAtom(JSOP_STRING, atom);
tt = TOK_STRING;
goto out;
}
/*
* Look for a decimal number.
*/
if (c1kind == Dec) {
numStart = userbuf.addressOfNextRawChar() - 1;
decimal:
decimalPoint = NoDecimal;
hasExp = false;
while (JS7_ISDEC(c))
c = getCharIgnoreEOL();
if (c == '.') {
decimalPoint = HasDecimal;
decimal_dot:
do {
c = getCharIgnoreEOL();
} while (JS7_ISDEC(c));
}
if (c == 'e' || c == 'E') {
hasExp = true;
c = getCharIgnoreEOL();
if (c == '+' || c == '-')
c = getCharIgnoreEOL();
if (!JS7_ISDEC(c)) {
ungetCharIgnoreEOL(c);
reportError(JSMSG_MISSING_EXPONENT);
goto error;
}
do {
c = getCharIgnoreEOL();
} while (JS7_ISDEC(c));
}
ungetCharIgnoreEOL(c);
if (c != EOF && IsIdentifierStart(c)) {
reportError(JSMSG_IDSTART_AFTER_NUMBER);
goto error;
}
/*
* Unlike identifiers and strings, numbers cannot contain escaped
* chars, so we don't need to use tokenbuf. Instead we can just
* convert the jschars in userbuf directly to the numeric value.
*/
double dval;
const jschar *dummy;
if (!((decimalPoint == HasDecimal) || hasExp)) {
if (!GetPrefixInteger(cx, numStart, userbuf.addressOfNextRawChar(), 10, &dummy, &dval))
goto error;
} else {
if (!js_strtod(cx, numStart, userbuf.addressOfNextRawChar(), &dummy, &dval))
goto error;
}
tp->setNumber(dval, decimalPoint);
tt = TOK_NUMBER;
goto out;
}
if (c1kind == Colon) {
tp->t_op = JSOP_NOP;
tt = TOK_COLON;
goto out;
}
if (c1kind == Plus) {
if (matchChar('=')) {
tp->t_op = JSOP_ADD;
tt = TOK_ADDASSIGN;
} else if (matchChar('+')) {
tt = TOK_INC;
} else {
tp->t_op = JSOP_POS;
tt = TOK_PLUS;
}
goto out;
}
/*
* Look for a hexadecimal or octal number.
*/
if (c1kind == HexOct) {
int radix;
c = getCharIgnoreEOL();
if (c == 'x' || c == 'X') {
radix = 16;
c = getCharIgnoreEOL();
if (!JS7_ISHEX(c)) {
ungetCharIgnoreEOL(c);
reportError(JSMSG_MISSING_HEXDIGITS);
goto error;
}
numStart = userbuf.addressOfNextRawChar() - 1; /* one past the '0x' */
while (JS7_ISHEX(c))
c = getCharIgnoreEOL();
} else if (JS7_ISDEC(c)) {
radix = 8;
numStart = userbuf.addressOfNextRawChar() - 1; /* one past the '0' */
while (JS7_ISDEC(c)) {
/* Octal integer literals are not permitted in strict mode code. */
if (!reportStrictModeError(JSMSG_DEPRECATED_OCTAL))
goto error;
/*
* Outside strict mode, we permit 08 and 09 as decimal numbers,
* which makes our behaviour a superset of the ECMA numeric
* grammar. We might not always be so permissive, so we warn
* about it.
*/
if (c >= '8') {
if (!reportWarning(JSMSG_BAD_OCTAL, c == '8' ? "08" : "09")) {
goto error;
}
goto decimal; /* use the decimal scanner for the rest of the number */
}
c = getCharIgnoreEOL();
}
} else {
/* '0' not followed by 'x', 'X' or a digit; scan as a decimal number. */
numStart = userbuf.addressOfNextRawChar() - 1;
goto decimal;
}
ungetCharIgnoreEOL(c);
if (c != EOF && IsIdentifierStart(c)) {
reportError(JSMSG_IDSTART_AFTER_NUMBER);
goto error;
}
double dval;
const jschar *dummy;
if (!GetPrefixInteger(cx, numStart, userbuf.addressOfNextRawChar(), radix, &dummy, &dval))
goto error;
tp->setNumber(dval, NoDecimal);
tt = TOK_NUMBER;
goto out;
}
/*
* This handles everything else.
*/
JS_ASSERT(c1kind == Other);
switch (c) {
case '\\':
hadUnicodeEscape = matchUnicodeEscapeIdStart(&qc);
if (hadUnicodeEscape) {
identStart = userbuf.addressOfNextRawChar() - 6;
goto identifier;
}
goto badchar;
case '|':
if (matchChar(c)) {
tt = TOK_OR;
} else if (matchChar('=')) {
tp->t_op = JSOP_BITOR;
tt = TOK_BITORASSIGN;
} else {
tt = TOK_BITOR;
}
break;
case '^':
if (matchChar('=')) {
tp->t_op = JSOP_BITXOR;
tt = TOK_BITXORASSIGN;
} else {
tt = TOK_BITXOR;
}
break;
case '&':
if (matchChar('&')) {
tt = TOK_AND;
} else if (matchChar('=')) {
tp->t_op = JSOP_BITAND;
tt = TOK_BITANDASSIGN;
} else {
tt = TOK_BITAND;
}
break;
case '!':
if (matchChar('=')) {
if (matchChar('=')) {
tp->t_op = JSOP_STRICTNE;
tt = TOK_STRICTNE;
} else {
tp->t_op = JSOP_NE;
tt = TOK_NE;
}
} else {
tp->t_op = JSOP_NOT;
tt = TOK_NOT;
}
break;
case '<':
/* NB: treat HTML begin-comment as comment-till-end-of-line */
if (matchChar('!')) {
if (matchChar('-')) {
if (matchChar('-')) {
flags |= TSF_IN_HTML_COMMENT;
goto skipline;
}
ungetChar('-');
}
ungetChar('!');
}
if (matchChar('<')) {
tp->t_op = JSOP_LSH;
tt = matchChar('=') ? TOK_LSHASSIGN : TOK_LSH;
} else {
if (matchChar('=')) {
tp->t_op = JSOP_LE;
tt = TOK_LE;
} else {
tp->t_op = JSOP_LT;
tt = TOK_LT;
}
}
break;
case '>':
if (matchChar('>')) {
if (matchChar('>')) {
tp->t_op = JSOP_URSH;
tt = matchChar('=') ? TOK_URSHASSIGN : TOK_URSH;
} else {
tp->t_op = JSOP_RSH;
tt = matchChar('=') ? TOK_RSHASSIGN : TOK_RSH;
}
} else {
if (matchChar('=')) {
tp->t_op = JSOP_GE;
tt = TOK_GE;
} else {
tp->t_op = JSOP_GT;
tt = TOK_GT;
}
}
break;
case '*':
tp->t_op = JSOP_MUL;
tt = matchChar('=') ? TOK_MULASSIGN : TOK_STAR;
break;
case '/':
/*
* Look for a single-line comment.
*/
if (matchChar('/')) {
c = peekChar();
if (c == '@' || c == '#') {
if (!getSourceMappingURL(false, getChar() == '@'))
goto error;
}
skipline:
/* Optimize line skipping if we are not in an HTML comment. */
if (flags & TSF_IN_HTML_COMMENT) {
while ((c = getChar()) != EOF && c != '\n') {
if (c == '-' && matchChar('-') && matchChar('>'))
flags &= ~TSF_IN_HTML_COMMENT;
}
} else {
while ((c = getChar()) != EOF && c != '\n')
continue;
}
ungetChar(c);
cursor = (cursor - 1) & ntokensMask;
goto retry;
}
/*
* Look for a multi-line comment.
*/
if (matchChar('*')) {
unsigned linenoBefore = lineno;
while ((c = getChar()) != EOF &&
!(c == '*' && matchChar('/'))) {
if (c == '@' || c == '#') {
if (!getSourceMappingURL(true, c == '@'))
goto error;
}
}
if (c == EOF) {
reportError(JSMSG_UNTERMINATED_COMMENT);
goto error;
}
if (linenoBefore != lineno)
updateFlagsForEOL();
cursor = (cursor - 1) & ntokensMask;
goto retry;
}
/*
* Look for a regexp.
*/
if (flags & TSF_OPERAND) {
tokenbuf.clear();
bool inCharClass = false;
for (;;) {
c = getChar();
if (c == '\\') {
if (!tokenbuf.append(c))
goto error;
c = getChar();
} else if (c == '[') {
inCharClass = true;
} else if (c == ']') {
inCharClass = false;
} else if (c == '/' && !inCharClass) {
/* For compat with IE, allow unescaped / in char classes. */
break;
}
if (c == '\n' || c == EOF) {
ungetChar(c);
reportError(JSMSG_UNTERMINATED_REGEXP);
goto error;
}
if (!tokenbuf.append(c))
goto error;
}
RegExpFlag reflags = NoFlags;
unsigned length = tokenbuf.length() + 1;
while (true) {
c = peekChar();
if (c == 'g' && !(reflags & GlobalFlag))
reflags = RegExpFlag(reflags | GlobalFlag);
else if (c == 'i' && !(reflags & IgnoreCaseFlag))
reflags = RegExpFlag(reflags | IgnoreCaseFlag);
else if (c == 'm' && !(reflags & MultilineFlag))
reflags = RegExpFlag(reflags | MultilineFlag);
else if (c == 'y' && !(reflags & StickyFlag))
reflags = RegExpFlag(reflags | StickyFlag);
else
break;
getChar();
length++;
}
c = peekChar();
if (JS7_ISLET(c)) {
char buf[2] = { '\0', '\0' };
tp->pos.begin += length + 1;
buf[0] = char(c);
reportError(JSMSG_BAD_REGEXP_FLAG, buf);
(void) getChar();
goto error;
}
tp->setRegExpFlags(reflags);
tt = TOK_REGEXP;
break;
}
tp->t_op = JSOP_DIV;
tt = matchChar('=') ? TOK_DIVASSIGN : TOK_DIV;
break;
case '%':
tp->t_op = JSOP_MOD;
tt = matchChar('=') ? TOK_MODASSIGN : TOK_MOD;
break;
case '~':
tp->t_op = JSOP_BITNOT;
tt = TOK_BITNOT;
break;
case '-':
if (matchChar('=')) {
tp->t_op = JSOP_SUB;
tt = TOK_SUBASSIGN;
} else if (matchChar(c)) {
if (peekChar() == '>' && !(flags & TSF_DIRTYLINE)) {
flags &= ~TSF_IN_HTML_COMMENT;
goto skipline;
}
tt = TOK_DEC;
} else {
tp->t_op = JSOP_NEG;
tt = TOK_MINUS;
}
break;
badchar:
default:
reportError(JSMSG_ILLEGAL_CHARACTER);
goto error;
}
out:
flags |= TSF_DIRTYLINE;
tp->pos.end = userbuf.addressOfNextRawChar() - userbuf.base();
tp->type = tt;
JS_ASSERT(IsTokenSane(tp));
return tt;
error:
flags |= TSF_DIRTYLINE;
tp->pos.end = userbuf.addressOfNextRawChar() - userbuf.base();
tp->type = TOK_ERROR;
JS_ASSERT(IsTokenSane(tp));
onError();
return TOK_ERROR;
}
void
TokenStream::onError()
{
flags |= TSF_HAD_ERROR;
#ifdef DEBUG
/*
* Poisoning userbuf on error establishes an invariant: once an erroneous
* token has been seen, userbuf will not be consulted again. This is true
* because the parser will either (a) deal with the TOK_ERROR token by
* aborting parsing immediately; or (b) if the TOK_ERROR token doesn't
* match what it expected, it will unget the token, and the next getToken()
* call will immediately return the just-gotten TOK_ERROR token again
* without consulting userbuf, thanks to the lookahead buffer.
*/
userbuf.poison();
#endif
}
JS_FRIEND_API(int)
js_fgets(char *buf, int size, FILE *file)
{
int n, i, c;
bool crflag;
n = size - 1;
if (n < 0)
return -1;
crflag = false;
for (i = 0; i < n && (c = fast_getc(file)) != EOF; i++) {
buf[i] = c;
if (c == '\n') { /* any \n ends a line */
i++; /* keep the \n; we know there is room for \0 */
break;
}
if (crflag) { /* \r not followed by \n ends line at the \r */
ungetc(c, file);
break; /* and overwrite c in buf with \0 */
}
crflag = (c == '\r');
}
buf[i] = '\0';
return i;
}
#ifdef DEBUG
const char *
TokenKindToString(TokenKind tt)
{
switch (tt) {
case TOK_ERROR: return "TOK_ERROR";
case TOK_EOF: return "TOK_EOF";
case TOK_EOL: return "TOK_EOL";
case TOK_SEMI: return "TOK_SEMI";
case TOK_COMMA: return "TOK_COMMA";
case TOK_HOOK: return "TOK_HOOK";
case TOK_COLON: return "TOK_COLON";
case TOK_OR: return "TOK_OR";
case TOK_AND: return "TOK_AND";
case TOK_BITOR: return "TOK_BITOR";
case TOK_BITXOR: return "TOK_BITXOR";
case TOK_BITAND: return "TOK_BITAND";
case TOK_PLUS: return "TOK_PLUS";
case TOK_MINUS: return "TOK_MINUS";
case TOK_STAR: return "TOK_STAR";
case TOK_DIV: return "TOK_DIV";
case TOK_MOD: return "TOK_MOD";
case TOK_INC: return "TOK_INC";
case TOK_DEC: return "TOK_DEC";
case TOK_DOT: return "TOK_DOT";
case TOK_TRIPLEDOT: return "TOK_TRIPLEDOT";
case TOK_LB: return "TOK_LB";
case TOK_RB: return "TOK_RB";
case TOK_LC: return "TOK_LC";
case TOK_RC: return "TOK_RC";
case TOK_LP: return "TOK_LP";
case TOK_RP: return "TOK_RP";
case TOK_ARROW: return "TOK_ARROW";
case TOK_NAME: return "TOK_NAME";
case TOK_NUMBER: return "TOK_NUMBER";
case TOK_STRING: return "TOK_STRING";
case TOK_REGEXP: return "TOK_REGEXP";
case TOK_TRUE: return "TOK_TRUE";
case TOK_FALSE: return "TOK_FALSE";
case TOK_NULL: return "TOK_NULL";
case TOK_THIS: return "TOK_THIS";
case TOK_FUNCTION: return "TOK_FUNCTION";
case TOK_IF: return "TOK_IF";
case TOK_ELSE: return "TOK_ELSE";
case TOK_SWITCH: return "TOK_SWITCH";
case TOK_CASE: return "TOK_CASE";
case TOK_DEFAULT: return "TOK_DEFAULT";
case TOK_WHILE: return "TOK_WHILE";
case TOK_DO: return "TOK_DO";
case TOK_FOR: return "TOK_FOR";
case TOK_BREAK: return "TOK_BREAK";
case TOK_CONTINUE: return "TOK_CONTINUE";
case TOK_IN: return "TOK_IN";
case TOK_VAR: return "TOK_VAR";
case TOK_CONST: return "TOK_CONST";
case TOK_WITH: return "TOK_WITH";
case TOK_RETURN: return "TOK_RETURN";
case TOK_NEW: return "TOK_NEW";
case TOK_DELETE: return "TOK_DELETE";
case TOK_TRY: return "TOK_TRY";
case TOK_CATCH: return "TOK_CATCH";
case TOK_FINALLY: return "TOK_FINALLY";
case TOK_THROW: return "TOK_THROW";
case TOK_INSTANCEOF: return "TOK_INSTANCEOF";
case TOK_DEBUGGER: return "TOK_DEBUGGER";
case TOK_YIELD: return "TOK_YIELD";
case TOK_LET: return "TOK_LET";
case TOK_RESERVED: return "TOK_RESERVED";
case TOK_STRICT_RESERVED: return "TOK_STRICT_RESERVED";
case TOK_STRICTEQ: return "TOK_STRICTEQ";
case TOK_EQ: return "TOK_EQ";
case TOK_STRICTNE: return "TOK_STRICTNE";
case TOK_NE: return "TOK_NE";
case TOK_TYPEOF: return "TOK_TYPEOF";
case TOK_VOID: return "TOK_VOID";
case TOK_NOT: return "TOK_NOT";
case TOK_BITNOT: return "TOK_BITNOT";
case TOK_LT: return "TOK_LT";
case TOK_LE: return "TOK_LE";
case TOK_GT: return "TOK_GT";
case TOK_GE: return "TOK_GE";
case TOK_LSH: return "TOK_LSH";
case TOK_RSH: return "TOK_RSH";
case TOK_URSH: return "TOK_URSH";
case TOK_ASSIGN: return "TOK_ASSIGN";
case TOK_ADDASSIGN: return "TOK_ADDASSIGN";
case TOK_SUBASSIGN: return "TOK_SUBASSIGN";
case TOK_BITORASSIGN: return "TOK_BITORASSIGN";
case TOK_BITXORASSIGN: return "TOK_BITXORASSIGN";
case TOK_BITANDASSIGN: return "TOK_BITANDASSIGN";
case TOK_LSHASSIGN: return "TOK_LSHASSIGN";
case TOK_RSHASSIGN: return "TOK_RSHASSIGN";
case TOK_URSHASSIGN: return "TOK_URSHASSIGN";
case TOK_MULASSIGN: return "TOK_MULASSIGN";
case TOK_DIVASSIGN: return "TOK_DIVASSIGN";
case TOK_MODASSIGN: return "TOK_MODASSIGN";
case TOK_EXPORT: return "TOK_EXPORT";
case TOK_IMPORT: return "TOK_IMPORT";
case TOK_LIMIT: break;
}
return "<bad TokenKind>";
}
#endif