blob: 48fdec377ce3583f0ac4816146087458c677d4c8 [file] [log] [blame]
/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
* vim: set ts=8 sts=4 et sw=4 tw=99:
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#ifndef frontend_TokenStream_h
#define frontend_TokenStream_h
/*
* JS lexical scanner interface.
*/
#include "mozilla/DebugOnly.h"
#include "mozilla/PodOperations.h"
#include <stddef.h>
#include <stdio.h>
#include <stdarg.h>
#include "jscntxt.h"
#include "jsversion.h"
#include "jsopcode.h"
#include "jsprvtd.h"
#include "jspubtd.h"
#include "js/Vector.h"
namespace js {
namespace frontend {
enum TokenKind {
TOK_ERROR = -1, /* well-known as the only code < EOF */
TOK_EOF, /* end of file */
TOK_EOL, /* end of line; only returned by peekTokenSameLine() */
TOK_SEMI, /* semicolon */
TOK_COMMA, /* comma operator */
TOK_HOOK, TOK_COLON, /* conditional (?:) */
TOK_INC, TOK_DEC, /* increment/decrement (++ --) */
TOK_DOT, /* member operator (.) */
TOK_TRIPLEDOT, /* for rest arguments (...) */
TOK_LB, TOK_RB, /* left and right brackets */
TOK_LC, TOK_RC, /* left and right curlies (braces) */
TOK_LP, TOK_RP, /* left and right parentheses */
TOK_NAME, /* identifier */
TOK_NUMBER, /* numeric constant */
TOK_STRING, /* string constant */
TOK_REGEXP, /* RegExp constant */
TOK_TRUE, /* true */
TOK_FALSE, /* false */
TOK_NULL, /* null */
TOK_THIS, /* this */
TOK_FUNCTION, /* function keyword */
TOK_IF, /* if keyword */
TOK_ELSE, /* else keyword */
TOK_SWITCH, /* switch keyword */
TOK_CASE, /* case keyword */
TOK_DEFAULT, /* default keyword */
TOK_WHILE, /* while keyword */
TOK_DO, /* do keyword */
TOK_FOR, /* for keyword */
TOK_BREAK, /* break keyword */
TOK_CONTINUE, /* continue keyword */
TOK_VAR, /* var keyword */
TOK_CONST, /* const keyword */
TOK_WITH, /* with keyword */
TOK_RETURN, /* return keyword */
TOK_NEW, /* new keyword */
TOK_DELETE, /* delete keyword */
TOK_TRY, /* try keyword */
TOK_CATCH, /* catch keyword */
TOK_FINALLY, /* finally keyword */
TOK_THROW, /* throw keyword */
TOK_DEBUGGER, /* debugger keyword */
TOK_YIELD, /* yield from generator function */
TOK_LET, /* let keyword */
TOK_EXPORT, /* export keyword */
TOK_IMPORT, /* import keyword */
TOK_RESERVED, /* reserved keywords */
TOK_STRICT_RESERVED, /* reserved keywords in strict mode */
/*
* The following token types occupy contiguous ranges to enable easy
* range-testing.
*/
/*
* Binary operators tokens, TOK_OR thru TOK_MOD. These must be in the same
* order as F(OR) and friends in FOR_EACH_PARSE_NODE_KIND in ParseNode.h.
*/
TOK_OR, /* logical or (||) */
TOK_BINOP_FIRST = TOK_OR,
TOK_AND, /* logical and (&&) */
TOK_BITOR, /* bitwise-or (|) */
TOK_BITXOR, /* bitwise-xor (^) */
TOK_BITAND, /* bitwise-and (&) */
/* Equality operation tokens, per TokenKindIsEquality */
TOK_STRICTEQ,
TOK_EQUALITY_START = TOK_STRICTEQ,
TOK_EQ,
TOK_STRICTNE,
TOK_NE,
TOK_EQUALITY_LAST = TOK_NE,
/* Relational ops (< <= > >=), per TokenKindIsRelational */
TOK_LT,
TOK_RELOP_START = TOK_LT,
TOK_LE,
TOK_GT,
TOK_GE,
TOK_RELOP_LAST = TOK_GE,
TOK_INSTANCEOF, /* instanceof keyword */
TOK_IN, /* in keyword */
/* Shift ops (<< >> >>>), per TokenKindIsShift */
TOK_LSH,
TOK_SHIFTOP_START = TOK_LSH,
TOK_RSH,
TOK_URSH,
TOK_SHIFTOP_LAST = TOK_URSH,
TOK_PLUS, /* plus */
TOK_MINUS, /* minus */
TOK_STAR, /* multiply */
TOK_DIV, /* divide */
TOK_MOD, /* modulus */
TOK_BINOP_LAST = TOK_MOD,
/* Unary operation tokens */
TOK_TYPEOF,
TOK_VOID,
TOK_NOT,
TOK_BITNOT,
TOK_ARROW, /* function arrow (=>) */
/* Assignment ops (= += -= etc.), per TokenKindIsAssignment */
TOK_ASSIGN, /* assignment ops (= += -= etc.) */
TOK_ASSIGNMENT_START = TOK_ASSIGN,
TOK_ADDASSIGN,
TOK_SUBASSIGN,
TOK_BITORASSIGN,
TOK_BITXORASSIGN,
TOK_BITANDASSIGN,
TOK_LSHASSIGN,
TOK_RSHASSIGN,
TOK_URSHASSIGN,
TOK_MULASSIGN,
TOK_DIVASSIGN,
TOK_MODASSIGN,
TOK_ASSIGNMENT_LAST = TOK_MODASSIGN,
TOK_LIMIT /* domain size */
};
inline bool
TokenKindIsBinaryOp(TokenKind tt)
{
return TOK_BINOP_FIRST <= tt && tt <= TOK_BINOP_LAST;
}
inline bool
TokenKindIsEquality(TokenKind tt)
{
return TOK_EQUALITY_START <= tt && tt <= TOK_EQUALITY_LAST;
}
inline bool
TokenKindIsRelational(TokenKind tt)
{
return TOK_RELOP_START <= tt && tt <= TOK_RELOP_LAST;
}
inline bool
TokenKindIsShift(TokenKind tt)
{
return TOK_SHIFTOP_START <= tt && tt <= TOK_SHIFTOP_LAST;
}
inline bool
TokenKindIsAssignment(TokenKind tt)
{
return TOK_ASSIGNMENT_START <= tt && tt <= TOK_ASSIGNMENT_LAST;
}
inline bool
TokenKindIsDecl(TokenKind tt)
{
#if JS_HAS_BLOCK_SCOPE
return tt == TOK_VAR || tt == TOK_LET;
#else
return tt == TOK_VAR;
#endif
}
struct TokenPos {
uint32_t begin; /* offset of the token's first char */
uint32_t end; /* offset of 1 past the token's last char */
TokenPos() {}
TokenPos(uint32_t begin, uint32_t end) : begin(begin), end(end) {}
/* Return a TokenPos that covers left, right, and anything in between. */
static TokenPos box(const TokenPos &left, const TokenPos &right) {
JS_ASSERT(left.begin <= left.end);
JS_ASSERT(left.end <= right.begin);
JS_ASSERT(right.begin <= right.end);
return TokenPos(left.begin, right.end);
}
bool operator==(const TokenPos& bpos) const {
return begin == bpos.begin && end == bpos.end;
}
bool operator!=(const TokenPos& bpos) const {
return begin != bpos.begin || end != bpos.end;
}
bool operator <(const TokenPos& bpos) const {
return begin < bpos.begin;
}
bool operator <=(const TokenPos& bpos) const {
return begin <= bpos.begin;
}
bool operator >(const TokenPos& bpos) const {
return !(*this <= bpos);
}
bool operator >=(const TokenPos& bpos) const {
return !(*this < bpos);
}
bool encloses(const TokenPos& pos) const {
return begin <= pos.begin && pos.end <= end;
}
};
enum DecimalPoint { NoDecimal = false, HasDecimal = true };
struct Token {
TokenKind type; /* char value or above enumerator */
TokenPos pos; /* token position in file */
union {
struct { /* name or string literal */
JSOp op; /* operator, for minimal parser */
union {
private:
friend struct Token;
PropertyName *name; /* non-numeric atom */
JSAtom *atom; /* potentially-numeric atom */
} n;
} s;
private:
friend struct Token;
struct {
double value; /* floating point number */
DecimalPoint decimalPoint; /* literal contains . or exponent */
} number;
RegExpFlag reflags; /* regexp flags, use tokenbuf to access
regexp chars */
} u;
/* Mutators */
/*
* FIXME: Init type early enough such that all mutators can assert
* type-safety. See bug 697000.
*/
void setName(JSOp op, PropertyName *name) {
JS_ASSERT(op == JSOP_NAME);
JS_ASSERT(!IsPoisonedPtr(name));
u.s.op = op;
u.s.n.name = name;
}
void setAtom(JSOp op, JSAtom *atom) {
JS_ASSERT(op == JSOP_STRING);
JS_ASSERT(!IsPoisonedPtr(atom));
u.s.op = op;
u.s.n.atom = atom;
}
void setRegExpFlags(js::RegExpFlag flags) {
JS_ASSERT((flags & AllFlags) == flags);
u.reflags = flags;
}
void setNumber(double n, DecimalPoint decimalPoint) {
u.number.value = n;
u.number.decimalPoint = decimalPoint;
}
/* Type-safe accessors */
PropertyName *name() const {
JS_ASSERT(type == TOK_NAME);
return u.s.n.name->asPropertyName(); /* poor-man's type verification */
}
JSAtom *atom() const {
JS_ASSERT(type == TOK_STRING);
return u.s.n.atom;
}
js::RegExpFlag regExpFlags() const {
JS_ASSERT(type == TOK_REGEXP);
JS_ASSERT((u.reflags & AllFlags) == u.reflags);
return u.reflags;
}
double number() const {
JS_ASSERT(type == TOK_NUMBER);
return u.number.value;
}
DecimalPoint decimalPoint() const {
JS_ASSERT(type == TOK_NUMBER);
return u.number.decimalPoint;
}
};
#define t_op u.s.op
enum TokenStreamFlags
{
TSF_EOF = 0x02, /* hit end of file */
TSF_EOL = 0x04, /* an EOL was hit in whitespace or a multi-line comment */
TSF_OPERAND = 0x08, /* looking for operand, not operator */
TSF_UNEXPECTED_EOF = 0x10, /* unexpected end of input, i.e. TOK_EOF not at top-level. */
TSF_KEYWORD_IS_NAME = 0x20, /* Ignore keywords and return TOK_NAME instead to the parser. */
TSF_DIRTYLINE = 0x40, /* non-whitespace since start of line */
TSF_OCTAL_CHAR = 0x80, /* observed a octal character escape */
TSF_HAD_ERROR = 0x100, /* returned TOK_ERROR from getToken */
/*
* To handle the hard case of contiguous HTML comments, we want to clear the
* TSF_DIRTYINPUT flag at the end of each such comment. But we'd rather not
* scan for --> within every //-style comment unless we have to. So we set
* TSF_IN_HTML_COMMENT when a <!-- is scanned as an HTML begin-comment, and
* clear it (and TSF_DIRTYINPUT) when we scan --> either on a clean line, or
* only if (ts->flags & TSF_IN_HTML_COMMENT), in a //-style comment.
*
* This still works as before given a malformed comment hiding hack such as:
*
* <script>
* <!-- comment hiding hack #1
* code goes here
* // --> oops, markup for script-unaware browsers goes here!
* </script>
*
* It does not cope with malformed comment hiding hacks where --> is hidden
* by C-style comments, or on a dirty line. Such cases are already broken.
*/
TSF_IN_HTML_COMMENT = 0x200
};
struct CompileError {
JSContext *cx;
JSErrorReport report;
char *message;
ErrorArgumentsType argumentsType;
CompileError(JSContext *cx)
: cx(cx), message(NULL), argumentsType(ArgumentsAreUnicode)
{
mozilla::PodZero(&report);
}
~CompileError();
void throwError();
};
inline bool
StrictModeFromContext(JSContext *cx)
{
return cx->hasOption(JSOPTION_STRICT_MODE);
}
// Ideally, tokenizing would be entirely independent of context. But the
// strict mode flag, which is in SharedContext, affects tokenizing, and
// TokenStream needs to see it.
//
// This class is a tiny back-channel from TokenStream to the strict mode flag
// that avoids exposing the rest of SharedContext to TokenStream.
//
class StrictModeGetter {
public:
virtual bool strictMode() = 0;
};
// TokenStream is the lexical scanner for Javascript source text.
//
// It takes a buffer of jschars and linearly scans it into |Token|s.
// Internally the class uses a four element circular buffer |tokens| of
// |Token|s. As an index for |tokens|, the member |cursor| points to the
// current token.
// Calls to getToken() increase |cursor| by one and return the new current
// token. If a TokenStream was just created, the current token is initialized
// with random data (i.e. not initialized). It is therefore important that
// either of the first four member functions listed below is called first.
// The circular buffer lets us go back up to two tokens from the last
// scanned token. Internally, the relative number of backward steps that were
// taken (via ungetToken()) after the last token was scanned is stored in
// |lookahead|.
//
// The following table lists in which situations it is safe to call each listed
// function. No checks are made by the functions in non-debug builds.
//
// Function Name | Precondition; changes to |lookahead|
// ------------------+---------------------------------------------------------
// getToken | none; if |lookahead > 0| then |lookahead--|
// peekToken | none; none
// peekTokenSameLine | none; none
// matchToken | none; if |lookahead > 0| and the match succeeds then
// | |lookahead--|
// consumeKnownToken | none; if |lookahead > 0| then |lookahead--|
// ungetToken | 0 <= |lookahead| <= |maxLookahead - 1|; |lookahead++|
//
// The behavior of the token scanning process (see getTokenInternal()) can be
// modified by calling one of the first four above listed member functions with
// an optional argument of type TokenStreamFlags. The two flags that do
// influence the scanning process are TSF_OPERAND and TSF_KEYWORD_IS_NAME.
// However, they will be ignored unless |lookahead == 0| holds.
// Due to constraints of the grammar, this turns out not to be a problem in
// practice. See the mozilla.dev.tech.js-engine.internals thread entitled 'Bug
// in the scanner?' for more details (https://groups.google.com/forum/?
// fromgroups=#!topic/mozilla.dev.tech.js-engine.internals/2JLH5jRcr7E).
//
// The methods seek() and tell() allow to rescan from a previous visited
// location of the buffer.
class MOZ_STACK_CLASS TokenStream
{
/* Unicode separators that are treated as line terminators, in addition to \n, \r */
enum {
LINE_SEPARATOR = 0x2028,
PARA_SEPARATOR = 0x2029
};
static const size_t ntokens = 4; /* 1 current + 2 lookahead, rounded
to power of 2 to avoid divmod by 3 */
static const unsigned maxLookahead = 2;
static const unsigned ntokensMask = ntokens - 1;
public:
typedef Vector<jschar, 32> CharBuffer;
TokenStream(JSContext *cx, const CompileOptions &options,
const jschar *base, size_t length, StrictModeGetter *smg,
AutoKeepAtoms& keepAtoms);
~TokenStream();
/* Accessors. */
JSContext *getContext() const { return cx; }
bool onCurrentLine(const TokenPos &pos) const { return srcCoords.isOnThisLine(pos.end, lineno); }
const Token &currentToken() const { return tokens[cursor]; }
bool isCurrentTokenType(TokenKind type) const {
return currentToken().type == type;
}
bool isCurrentTokenType(TokenKind type1, TokenKind type2) const {
TokenKind type = currentToken().type;
return type == type1 || type == type2;
}
const CharBuffer &getTokenbuf() const { return tokenbuf; }
const char *getFilename() const { return filename; }
unsigned getLineno() const { return lineno; }
unsigned getColumn() const { return userbuf.addressOfNextRawChar() - linebase - 1; }
JSVersion versionNumber() const { return VersionNumber(version); }
JSVersion versionWithFlags() const { return version; }
bool hadError() const { return !!(flags & TSF_HAD_ERROR); }
bool isCurrentTokenEquality() const {
return TokenKindIsEquality(currentToken().type);
}
bool isCurrentTokenRelational() const {
return TokenKindIsRelational(currentToken().type);
}
bool isCurrentTokenShift() const {
return TokenKindIsShift(currentToken().type);
}
bool isCurrentTokenAssignment() const {
return TokenKindIsAssignment(currentToken().type);
}
/* Flag methods. */
void setUnexpectedEOF(bool enabled = true) { setFlag(enabled, TSF_UNEXPECTED_EOF); }
bool isUnexpectedEOF() const { return !!(flags & TSF_UNEXPECTED_EOF); }
bool isEOF() const { return !!(flags & TSF_EOF); }
bool sawOctalEscape() const { return !!(flags & TSF_OCTAL_CHAR); }
// TokenStream-specific error reporters.
bool reportError(unsigned errorNumber, ...);
bool reportWarning(unsigned errorNumber, ...);
// General-purpose error reporters. You should avoid calling these
// directly, and instead use the more succinct alternatives (e.g.
// reportError()) in TokenStream, Parser, and BytecodeEmitter.
bool reportCompileErrorNumberVA(uint32_t offset, unsigned flags, unsigned errorNumber,
va_list args);
bool reportStrictModeErrorNumberVA(uint32_t offset, bool strictMode, unsigned errorNumber,
va_list args);
bool reportStrictWarningErrorNumberVA(uint32_t offset, unsigned errorNumber,
va_list args);
// asm.js reporter
void reportAsmJSError(uint32_t offset, unsigned errorNumber, ...);
private:
// These are private because they should only be called by the tokenizer
// while tokenizing not by, for example, BytecodeEmitter.
bool reportStrictModeError(unsigned errorNumber, ...);
bool strictMode() const { return strictModeGetter && strictModeGetter->strictMode(); }
void onError();
static JSAtom *atomize(JSContext *cx, CharBuffer &cb);
bool putIdentInTokenbuf(const jschar *identStart);
/*
* Enables flags in the associated tokenstream for the object lifetime.
* Useful for lexically-scoped flag toggles.
*/
class Flagger {
TokenStream * const parent;
unsigned flags;
public:
Flagger(TokenStream *parent, unsigned withFlags) : parent(parent), flags(withFlags) {
parent->flags |= flags;
}
~Flagger() { parent->flags &= ~flags; }
};
friend class Flagger;
void setFlag(bool enabled, TokenStreamFlags flag) {
if (enabled)
flags |= flag;
else
flags &= ~flag;
}
public:
/*
* Get the next token from the stream, make it the current token, and
* return its kind.
*/
TokenKind getToken() {
/* Check for a pushed-back token resulting from mismatching lookahead. */
if (lookahead != 0) {
lookahead--;
cursor = (cursor + 1) & ntokensMask;
TokenKind tt = currentToken().type;
JS_ASSERT(tt != TOK_EOL);
return tt;
}
return getTokenInternal();
}
/* Similar, but also sets flags. */
TokenKind getToken(unsigned withFlags) {
Flagger flagger(this, withFlags);
return getToken();
}
/*
* Push the last scanned token back into the stream.
*/
void ungetToken() {
JS_ASSERT(lookahead < maxLookahead);
lookahead++;
cursor = (cursor - 1) & ntokensMask;
}
TokenKind peekToken() {
if (lookahead != 0)
return tokens[(cursor + 1) & ntokensMask].type;
TokenKind tt = getTokenInternal();
ungetToken();
return tt;
}
TokenKind peekToken(unsigned withFlags) {
Flagger flagger(this, withFlags);
return peekToken();
}
TokenKind peekTokenSameLine(unsigned withFlags = 0) {
if (!onCurrentLine(currentToken().pos))
return TOK_EOL;
if (lookahead != 0)
return tokens[(cursor + 1) & ntokensMask].type;
/*
* This is the only place TOK_EOL is produced. No token with TOK_EOL
* is created, just a TOK_EOL TokenKind is returned.
*/
flags &= ~TSF_EOL;
TokenKind tt = getToken(withFlags);
if (flags & TSF_EOL) {
tt = TOK_EOL;
flags &= ~TSF_EOL;
}
ungetToken();
return tt;
}
/*
* Get the next token from the stream if its kind is |tt|.
*/
bool matchToken(TokenKind tt) {
if (getToken() == tt)
return true;
ungetToken();
return false;
}
bool matchToken(TokenKind tt, unsigned withFlags) {
Flagger flagger(this, withFlags);
return matchToken(tt);
}
void consumeKnownToken(TokenKind tt) {
JS_ALWAYS_TRUE(matchToken(tt));
}
class MOZ_STACK_CLASS Position {
public:
/*
* The Token fields may contain pointers to atoms, so for correct
* rooting we must ensure collection of atoms is disabled while objects
* of this class are live. Do this by requiring a dummy AutoKeepAtoms
* reference in the constructor.
*
* This class is explicity ignored by the analysis, so don't add any
* more pointers to GC things here!
*/
Position(AutoKeepAtoms&) { }
private:
Position(const Position&) MOZ_DELETE;
friend class TokenStream;
const jschar *buf;
unsigned flags;
unsigned lineno;
const jschar *linebase;
const jschar *prevLinebase;
Token currentToken;
unsigned lookahead;
Token lookaheadTokens[maxLookahead];
};
void advance(size_t position);
void tell(Position *);
void seek(const Position &pos);
void seek(const Position &pos, const TokenStream &other);
void positionAfterLastFunctionKeyword(Position &pos);
size_t positionToOffset(const Position &pos) const {
return pos.buf - userbuf.base();
}
bool hasSourceMap() const {
return sourceMap != NULL;
}
/*
* Give up responsibility for managing the sourceMap filename's memory.
*/
jschar *releaseSourceMap() {
JS_ASSERT(hasSourceMap());
jschar *sm = sourceMap;
sourceMap = NULL;
return sm;
}
/*
* If the name at s[0:length] is not a keyword in this version, return
* true with *ttp and *topp unchanged.
*
* If it is a reserved word in this version and strictness mode, and thus
* can't be present in correct code, report a SyntaxError and return false.
*
* If it is a keyword, like "if", the behavior depends on ttp/topp. If ttp
* and topp are null, report a SyntaxError ("if is a reserved identifier")
* and return false. If ttp and topp are non-null, return true with the
* keyword's TokenKind in *ttp and its JSOp in *topp.
*
* ttp and topp must be either both null or both non-null.
*/
bool checkForKeyword(const jschar *s, size_t length, TokenKind *ttp, JSOp *topp);
// This class maps a userbuf offset (which is 0-indexed) to a line number
// (which is 1-indexed) and a column index (which is 0-indexed).
class SourceCoords
{
// For a given buffer holding source code, |lineStartOffsets_| has one
// element per line of source code, plus one sentinel element. Each
// non-sentinel element holds the buffer offset for the start of the
// corresponding line of source code. For this example script:
//
// 1 // xyz [line starts at offset 0]
// 2 var x; [line starts at offset 7]
// 3 [line starts at offset 14]
// 4 var y; [line starts at offset 15]
//
// |lineStartOffsets_| is:
//
// [0, 7, 14, 15, MAX_PTR]
//
// To convert a "line number" to a "line index" (i.e. an index into
// |lineStartOffsets_|), subtract |initialLineNum_|. E.g. line 3's
// line index is (3 - initialLineNum_), which is 2. Therefore
// lineStartOffsets_[2] holds the buffer offset for the start of line 3,
// which is 14. (Note that |initialLineNum_| is often 1, but not
// always.)
//
// The first element is always 0, and the last element is always the
// MAX_PTR sentinel.
//
// offset-to-line/column lookups are O(log n) in the worst case (binary
// search), but in practice they're heavily clustered and we do better
// than that by using the previous lookup's result (lastLineIndex_) as
// a starting point.
//
// Checking if an offset lies within a particular line number
// (isOnThisLine()) is O(1).
//
Vector<uint32_t, 128> lineStartOffsets_;
uint32_t initialLineNum_;
// This is mutable because it's modified on every search, but that fact
// isn't visible outside this class.
mutable uint32_t lastLineIndex_;
uint32_t lineIndexOf(uint32_t offset) const;
static const uint32_t MAX_PTR = UINT32_MAX;
uint32_t lineIndexToNum(uint32_t lineIndex) const { return lineIndex + initialLineNum_; }
uint32_t lineNumToIndex(uint32_t lineNum) const { return lineNum - initialLineNum_; }
public:
SourceCoords(JSContext *cx, uint32_t ln);
void add(uint32_t lineNum, uint32_t lineStartOffset);
void fill(const SourceCoords &other);
bool isOnThisLine(uint32_t offset, uint32_t lineNum) const {
uint32_t lineIndex = lineNumToIndex(lineNum);
JS_ASSERT(lineIndex + 1 < lineStartOffsets_.length()); // +1 due to sentinel
return lineStartOffsets_[lineIndex] <= offset &&
offset < lineStartOffsets_[lineIndex + 1];
}
uint32_t lineNum(uint32_t offset) const;
uint32_t columnIndex(uint32_t offset) const;
void lineNumAndColumnIndex(uint32_t offset, uint32_t *lineNum, uint32_t *columnIndex) const;
};
SourceCoords srcCoords;
private:
/*
* This is the low-level interface to the JS source code buffer. It just
* gets raw chars, basically. TokenStreams functions are layered on top
* and do some extra stuff like converting all EOL sequences to '\n',
* tracking the line number, and setting the TSF_EOF flag. (The "raw" in
* "raw chars" refers to the lack of EOL sequence normalization.)
*/
class TokenBuf {
public:
TokenBuf(JSContext *cx, const jschar *buf, size_t length)
: base_(buf), limit_(buf + length), ptr(buf),
skipBase(cx, &base_), skipLimit(cx, &limit_), skipPtr(cx, &ptr)
{ }
bool hasRawChars() const {
return ptr < limit_;
}
bool atStart() const {
return ptr == base_;
}
const jschar *base() const {
return base_;
}
const jschar *limit() const {
return limit_;
}
jschar getRawChar() {
return *ptr++; /* this will NULL-crash if poisoned */
}
jschar peekRawChar() const {
return *ptr; /* this will NULL-crash if poisoned */
}
bool matchRawChar(jschar c) {
if (*ptr == c) { /* this will NULL-crash if poisoned */
ptr++;
return true;
}
return false;
}
bool matchRawCharBackwards(jschar c) {
JS_ASSERT(ptr); /* make sure haven't been poisoned */
if (*(ptr - 1) == c) {
ptr--;
return true;
}
return false;
}
void ungetRawChar() {
JS_ASSERT(ptr); /* make sure haven't been poisoned */
ptr--;
}
const jschar *addressOfNextRawChar(bool allowPoisoned = false) const {
JS_ASSERT_IF(!allowPoisoned, ptr); /* make sure haven't been poisoned */
return ptr;
}
/* Use this with caution! */
void setAddressOfNextRawChar(const jschar *a, bool allowPoisoned = false) {
JS_ASSERT_IF(!allowPoisoned, a);
ptr = a;
}
#ifdef DEBUG
/* Poison the TokenBuf so it cannot be accessed again. */
void poison() {
ptr = NULL;
}
#endif
static bool isRawEOLChar(int32_t c) {
return (c == '\n' || c == '\r' || c == LINE_SEPARATOR || c == PARA_SEPARATOR);
}
// Finds the next EOL, but stops once 'max' jschars have been scanned
// (*including* the starting jschar).
const jschar *findEOLMax(const jschar *p, size_t max);
private:
const jschar *base_; /* base of buffer */
const jschar *limit_; /* limit for quick bounds check */
const jschar *ptr; /* next char to get */
// We are not yet moving strings
SkipRoot skipBase, skipLimit, skipPtr;
};
TokenKind getTokenInternal(); /* doesn't check for pushback or error flag. */
int32_t getChar();
int32_t getCharIgnoreEOL();
void ungetChar(int32_t c);
void ungetCharIgnoreEOL(int32_t c);
Token *newToken(ptrdiff_t adjust);
bool peekUnicodeEscape(int32_t *c);
bool matchUnicodeEscapeIdStart(int32_t *c);
bool matchUnicodeEscapeIdent(int32_t *c);
bool peekChars(int n, jschar *cp);
bool getSourceMappingURL(bool isMultiline, bool shouldWarnDeprecated);
// |expect| cannot be an EOL char.
bool matchChar(int32_t expect) {
MOZ_ASSERT(!TokenBuf::isRawEOLChar(expect));
return JS_LIKELY(userbuf.hasRawChars()) &&
userbuf.matchRawChar(expect);
}
void consumeKnownChar(int32_t expect) {
mozilla::DebugOnly<int32_t> c = getChar();
JS_ASSERT(c == expect);
}
int32_t peekChar() {
int32_t c = getChar();
ungetChar(c);
return c;
}
void skipChars(int n) {
while (--n >= 0)
getChar();
}
void updateLineInfoForEOL();
void updateFlagsForEOL();
Token tokens[ntokens];/* circular token buffer */
unsigned cursor; /* index of last parsed token */
unsigned lookahead; /* count of lookahead tokens */
unsigned lineno; /* current line number */
unsigned flags; /* flags -- see above */
const jschar *linebase; /* start of current line; points into userbuf */
const jschar *prevLinebase; /* start of previous line; NULL if on the first line */
TokenBuf userbuf; /* user input buffer */
const char *filename; /* input filename or null */
jschar *sourceMap; /* source map's filename or null */
void *listenerTSData;/* listener data for this TokenStream */
CharBuffer tokenbuf; /* current token string buffer */
int8_t oneCharTokens[128]; /* table of one-char tokens */
bool maybeEOL[256]; /* probabilistic EOL lookup table */
bool maybeStrSpecial[256];/* speeds up string scanning */
JSVersion version; /* (i.e. to identify keywords) */
JSContext *const cx;
JSPrincipals *const originPrincipals;
StrictModeGetter *strictModeGetter; /* used to test for strict mode */
Position lastFunctionKeyword; /* used as a starting point for reparsing strict functions */
/*
* The tokens array stores pointers to JSAtoms. These are rooted by the
* atoms table using AutoKeepAtoms in the Parser. This SkipRoot tells the
* exact rooting analysis to ignore the atoms in the tokens array.
*/
SkipRoot tokenSkip;
// Bug 846011
SkipRoot linebaseSkip;
SkipRoot prevLinebaseSkip;
};
/*
* Steal one JSREPORT_* bit (see jsapi.h) to tell that arguments to the error
* message have const jschar* type, not const char*.
*/
#define JSREPORT_UC 0x100
} /* namespace frontend */
} /* namespace js */
extern JS_FRIEND_API(int)
js_fgets(char *buf, int size, FILE *file);
#ifdef DEBUG
extern const char *
TokenKindToString(js::frontend::TokenKind tt);
#endif
#endif /* frontend_TokenStream_h */