src/third_party/mozjs/js/src/frontend/TokenStream.h - cobalt - Git at Google

 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
  * vim: set ts=8 sts=4 et sw=4 tw=99:
  * This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

 #ifndef frontend_TokenStream_h
 #define frontend_TokenStream_h

 /*
  * JS lexical scanner interface.
  */

 #include "mozilla/DebugOnly.h"
 #include "mozilla/PodOperations.h"

 #include <stddef.h>
 #include <stdio.h>
 #include <stdarg.h>
 #include "jscntxt.h"
 #include "jsversion.h"
 #include "jsopcode.h"
 #include "jsprvtd.h"
 #include "jspubtd.h"

 #include "js/Vector.h"

 namespace js {
 namespace frontend {

 enum TokenKind {
     TOK_ERROR = -1,                /* well-known as the only code < EOF */
     TOK_EOF,                       /* end of file */
     TOK_EOL,                       /* end of line; only returned by peekTokenSameLine() */
     TOK_SEMI,                      /* semicolon */
     TOK_COMMA,                     /* comma operator */
     TOK_HOOK, TOK_COLON,           /* conditional (?:) */
     TOK_INC, TOK_DEC,              /* increment/decrement (++ --) */
     TOK_DOT,                       /* member operator (.) */
     TOK_TRIPLEDOT,                 /* for rest arguments (...) */
     TOK_LB, TOK_RB,                /* left and right brackets */
     TOK_LC, TOK_RC,                /* left and right curlies (braces) */
     TOK_LP, TOK_RP,                /* left and right parentheses */
     TOK_NAME,                      /* identifier */
     TOK_NUMBER,                    /* numeric constant */
     TOK_STRING,                    /* string constant */
     TOK_REGEXP,                    /* RegExp constant */
     TOK_TRUE,                      /* true */
     TOK_FALSE,                     /* false */
     TOK_NULL,                      /* null */
     TOK_THIS,                      /* this */
     TOK_FUNCTION,                  /* function keyword */
     TOK_IF,                        /* if keyword */
     TOK_ELSE,                      /* else keyword */
     TOK_SWITCH,                    /* switch keyword */
     TOK_CASE,                      /* case keyword */
     TOK_DEFAULT,                   /* default keyword */
     TOK_WHILE,                     /* while keyword */
     TOK_DO,                        /* do keyword */
     TOK_FOR,                       /* for keyword */
     TOK_BREAK,                     /* break keyword */
     TOK_CONTINUE,                  /* continue keyword */
     TOK_VAR,                       /* var keyword */
     TOK_CONST,                     /* const keyword */
     TOK_WITH,                      /* with keyword */
     TOK_RETURN,                    /* return keyword */
     TOK_NEW,                       /* new keyword */
     TOK_DELETE,                    /* delete keyword */
     TOK_TRY,                       /* try keyword */
     TOK_CATCH,                     /* catch keyword */
     TOK_FINALLY,                   /* finally keyword */
     TOK_THROW,                     /* throw keyword */
     TOK_DEBUGGER,                  /* debugger keyword */
     TOK_YIELD,                     /* yield from generator function */
     TOK_LET,                       /* let keyword */
     TOK_EXPORT,                    /* export keyword */
     TOK_IMPORT,                    /* import keyword */
     TOK_RESERVED,                  /* reserved keywords */
     TOK_STRICT_RESERVED,           /* reserved keywords in strict mode */

     /*
      * The following token types occupy contiguous ranges to enable easy
      * range-testing.
      */

     /*
      * Binary operators tokens, TOK_OR thru TOK_MOD. These must be in the same
      * order as F(OR) and friends in FOR_EACH_PARSE_NODE_KIND in ParseNode.h.
      */
     TOK_OR,                        /* logical or (||) */
     TOK_BINOP_FIRST = TOK_OR,
     TOK_AND,                       /* logical and (&&) */
     TOK_BITOR,                     /* bitwise-or (|) */
     TOK_BITXOR,                    /* bitwise-xor (^) */
     TOK_BITAND,                    /* bitwise-and (&) */

     /* Equality operation tokens, per TokenKindIsEquality */
     TOK_STRICTEQ,
     TOK_EQUALITY_START = TOK_STRICTEQ,
     TOK_EQ,
     TOK_STRICTNE,
     TOK_NE,
     TOK_EQUALITY_LAST = TOK_NE,

     /* Relational ops (< <= > >=), per TokenKindIsRelational */
     TOK_LT,
     TOK_RELOP_START = TOK_LT,
     TOK_LE,
     TOK_GT,
     TOK_GE,
     TOK_RELOP_LAST = TOK_GE,

     TOK_INSTANCEOF,                /* instanceof keyword */
     TOK_IN,                        /* in keyword */

     /* Shift ops (<< >> >>>), per TokenKindIsShift */
     TOK_LSH,
     TOK_SHIFTOP_START = TOK_LSH,
     TOK_RSH,
     TOK_URSH,
     TOK_SHIFTOP_LAST = TOK_URSH,

     TOK_PLUS,                      /* plus */
     TOK_MINUS,                     /* minus */
     TOK_STAR,                      /* multiply */
     TOK_DIV,                       /* divide */
     TOK_MOD,                       /* modulus */
     TOK_BINOP_LAST = TOK_MOD,

     /* Unary operation tokens */
     TOK_TYPEOF,
     TOK_VOID,
     TOK_NOT,
     TOK_BITNOT,

     TOK_ARROW,                     /* function arrow (=>) */

     /* Assignment ops (= += -= etc.), per TokenKindIsAssignment */
     TOK_ASSIGN,                    /* assignment ops (= += -= etc.) */
     TOK_ASSIGNMENT_START = TOK_ASSIGN,
     TOK_ADDASSIGN,
     TOK_SUBASSIGN,
     TOK_BITORASSIGN,
     TOK_BITXORASSIGN,
     TOK_BITANDASSIGN,
     TOK_LSHASSIGN,
     TOK_RSHASSIGN,
     TOK_URSHASSIGN,
     TOK_MULASSIGN,
     TOK_DIVASSIGN,
     TOK_MODASSIGN,
     TOK_ASSIGNMENT_LAST = TOK_MODASSIGN,

     TOK_LIMIT                      /* domain size */
 };

 inline bool
 TokenKindIsBinaryOp(TokenKind tt)
 {
     return TOK_BINOP_FIRST <= tt && tt <= TOK_BINOP_LAST;
 }

 inline bool
 TokenKindIsEquality(TokenKind tt)
 {
     return TOK_EQUALITY_START <= tt && tt <= TOK_EQUALITY_LAST;
 }

 inline bool
 TokenKindIsRelational(TokenKind tt)
 {
     return TOK_RELOP_START <= tt && tt <= TOK_RELOP_LAST;
 }

 inline bool
 TokenKindIsShift(TokenKind tt)
 {
     return TOK_SHIFTOP_START <= tt && tt <= TOK_SHIFTOP_LAST;
 }

 inline bool
 TokenKindIsAssignment(TokenKind tt)
 {
     return TOK_ASSIGNMENT_START <= tt && tt <= TOK_ASSIGNMENT_LAST;
 }

 inline bool
 TokenKindIsDecl(TokenKind tt)
 {
 #if JS_HAS_BLOCK_SCOPE
     return tt == TOK_VAR || tt == TOK_LET;
 #else
     return tt == TOK_VAR;
 #endif
 }

 struct TokenPos {
     uint32_t          begin;          /* offset of the token's first char */
     uint32_t          end;            /* offset of 1 past the token's last char */

     TokenPos() {}
     TokenPos(uint32_t begin, uint32_t end) : begin(begin), end(end) {}

     /* Return a TokenPos that covers left, right, and anything in between. */
     static TokenPos box(const TokenPos &left, const TokenPos &right) {
         JS_ASSERT(left.begin <= left.end);
         JS_ASSERT(left.end <= right.begin);
         JS_ASSERT(right.begin <= right.end);
         return TokenPos(left.begin, right.end);
     }

     bool operator==(const TokenPos& bpos) const {
         return begin == bpos.begin && end == bpos.end;
     }

     bool operator!=(const TokenPos& bpos) const {
         return begin != bpos.begin || end != bpos.end;
     }

     bool operator <(const TokenPos& bpos) const {
         return begin < bpos.begin;
     }

     bool operator <=(const TokenPos& bpos) const {
         return begin <= bpos.begin;
     }

     bool operator >(const TokenPos& bpos) const {
         return !(*this <= bpos);
     }

     bool operator >=(const TokenPos& bpos) const {
         return !(*this < bpos);
     }

     bool encloses(const TokenPos& pos) const {
         return begin <= pos.begin && pos.end <= end;
     }
 };

 enum DecimalPoint { NoDecimal = false, HasDecimal = true };

 struct Token {
     TokenKind           type;           /* char value or above enumerator */
     TokenPos            pos;            /* token position in file */
     union {
         struct {                        /* name or string literal */
             JSOp        op;             /* operator, for minimal parser */
             union {
               private:
                 friend struct Token;
                 PropertyName *name;     /* non-numeric atom */
                 JSAtom       *atom;     /* potentially-numeric atom */
             } n;
         } s;

       private:
         friend struct Token;
         struct {
             double       value;         /* floating point number */
             DecimalPoint decimalPoint;  /* literal contains . or exponent */
         } number;
         RegExpFlag      reflags;        /* regexp flags, use tokenbuf to access
                                            regexp chars */
     } u;

     /* Mutators */

     /*
      * FIXME: Init type early enough such that all mutators can assert
      *        type-safety.  See bug 697000.
      */

     void setName(JSOp op, PropertyName *name) {
         JS_ASSERT(op == JSOP_NAME);
         JS_ASSERT(!IsPoisonedPtr(name));
         u.s.op = op;
         u.s.n.name = name;
     }

     void setAtom(JSOp op, JSAtom *atom) {
         JS_ASSERT(op == JSOP_STRING);
         JS_ASSERT(!IsPoisonedPtr(atom));
         u.s.op = op;
         u.s.n.atom = atom;
     }

     void setRegExpFlags(js::RegExpFlag flags) {
         JS_ASSERT((flags & AllFlags) == flags);
         u.reflags = flags;
     }

     void setNumber(double n, DecimalPoint decimalPoint) {
         u.number.value = n;
         u.number.decimalPoint = decimalPoint;
     }

     /* Type-safe accessors */

     PropertyName *name() const {
         JS_ASSERT(type == TOK_NAME);
         return u.s.n.name->asPropertyName(); /* poor-man's type verification */
     }

     JSAtom *atom() const {
         JS_ASSERT(type == TOK_STRING);
         return u.s.n.atom;
     }

     js::RegExpFlag regExpFlags() const {
         JS_ASSERT(type == TOK_REGEXP);
         JS_ASSERT((u.reflags & AllFlags) == u.reflags);
         return u.reflags;
     }

     double number() const {
         JS_ASSERT(type == TOK_NUMBER);
         return u.number.value;
     }

     DecimalPoint decimalPoint() const {
         JS_ASSERT(type == TOK_NUMBER);
         return u.number.decimalPoint;
     }
 };

 #define t_op            u.s.op

 enum TokenStreamFlags
 {
     TSF_EOF = 0x02,             /* hit end of file */
     TSF_EOL = 0x04,             /* an EOL was hit in whitespace or a multi-line comment */
     TSF_OPERAND = 0x08,         /* looking for operand, not operator */
     TSF_UNEXPECTED_EOF = 0x10,  /* unexpected end of input, i.e. TOK_EOF not at top-level. */
     TSF_KEYWORD_IS_NAME = 0x20, /* Ignore keywords and return TOK_NAME instead to the parser. */
     TSF_DIRTYLINE = 0x40,       /* non-whitespace since start of line */
     TSF_OCTAL_CHAR = 0x80,      /* observed a octal character escape */
     TSF_HAD_ERROR = 0x100,      /* returned TOK_ERROR from getToken */

     /*
      * To handle the hard case of contiguous HTML comments, we want to clear the
      * TSF_DIRTYINPUT flag at the end of each such comment.  But we'd rather not
      * scan for --> within every //-style comment unless we have to.  So we set
      * TSF_IN_HTML_COMMENT when a <!-- is scanned as an HTML begin-comment, and
      * clear it (and TSF_DIRTYINPUT) when we scan --> either on a clean line, or
      * only if (ts->flags & TSF_IN_HTML_COMMENT), in a //-style comment.
      *
      * This still works as before given a malformed comment hiding hack such as:
      *
      *    <script>
      *      <!-- comment hiding hack #1
      *      code goes here
      *      // --> oops, markup for script-unaware browsers goes here!
      *    </script>
      *
      * It does not cope with malformed comment hiding hacks where --> is hidden
      * by C-style comments, or on a dirty line.  Such cases are already broken.
      */
     TSF_IN_HTML_COMMENT = 0x200
 };

 struct CompileError {
     JSContext *cx;
     JSErrorReport report;
     char *message;
     ErrorArgumentsType argumentsType;
     CompileError(JSContext *cx)
       : cx(cx), message(NULL), argumentsType(ArgumentsAreUnicode)
     {
         mozilla::PodZero(&report);
     }
     ~CompileError();
     void throwError();
 };

 inline bool
 StrictModeFromContext(JSContext *cx)
 {
     return cx->hasOption(JSOPTION_STRICT_MODE);
 }

 // Ideally, tokenizing would be entirely independent of context.  But the
 // strict mode flag, which is in SharedContext, affects tokenizing, and
 // TokenStream needs to see it.
 //
 // This class is a tiny back-channel from TokenStream to the strict mode flag
 // that avoids exposing the rest of SharedContext to TokenStream.
 //
 class StrictModeGetter {
   public:
     virtual bool strictMode() = 0;
 };

 // TokenStream is the lexical scanner for Javascript source text.
 //
 // It takes a buffer of jschars and linearly scans it into |Token|s.
 // Internally the class uses a four element circular buffer |tokens| of
 // |Token|s. As an index for |tokens|, the member |cursor| points to the
 // current token.
 // Calls to getToken() increase |cursor| by one and return the new current
 // token. If a TokenStream was just created, the current token is initialized
 // with random data (i.e. not initialized). It is therefore important that
 // either of the first four member functions listed below is called first.
 // The circular buffer lets us go back up to two tokens from the last
 // scanned token. Internally, the relative number of backward steps that were
 // taken (via ungetToken()) after the last token was scanned is stored in
 // |lookahead|.
 //
 // The following table lists in which situations it is safe to call each listed
 // function. No checks are made by the functions in non-debug builds.
 //
 // Function Name     | Precondition; changes to |lookahead|
 // ------------------+---------------------------------------------------------
 // getToken          | none; if |lookahead > 0| then |lookahead--|
 // peekToken         | none; none
 // peekTokenSameLine | none; none
 // matchToken        | none; if |lookahead > 0| and the match succeeds then
 //                   |       |lookahead--|
 // consumeKnownToken | none; if |lookahead > 0| then |lookahead--|
 // ungetToken        | 0 <= |lookahead| <= |maxLookahead - 1|; |lookahead++|
 //
 // The behavior of the token scanning process (see getTokenInternal()) can be
 // modified by calling one of the first four above listed member functions with
 // an optional argument of type TokenStreamFlags. The two flags that do
 // influence the scanning process are TSF_OPERAND and TSF_KEYWORD_IS_NAME.
 // However, they will be ignored unless |lookahead == 0| holds.
 // Due to constraints of the grammar, this turns out not to be a problem in
 // practice. See the mozilla.dev.tech.js-engine.internals thread entitled 'Bug
 // in the scanner?' for more details (https://groups.google.com/forum/?
 // fromgroups=#!topic/mozilla.dev.tech.js-engine.internals/2JLH5jRcr7E).
 //
 // The methods seek() and tell() allow to rescan from a previous visited
 // location of the buffer.
 class MOZ_STACK_CLASS TokenStream
 {
     /* Unicode separators that are treated as line terminators, in addition to \n, \r */
     enum {
         LINE_SEPARATOR = 0x2028,
         PARA_SEPARATOR = 0x2029
     };

     static const size_t ntokens = 4;                /* 1 current + 2 lookahead, rounded
                                                        to power of 2 to avoid divmod by 3 */
     static const unsigned maxLookahead = 2;
     static const unsigned ntokensMask = ntokens - 1;

   public:
     typedef Vector<jschar, 32> CharBuffer;

     TokenStream(JSContext *cx, const CompileOptions &options,
                 const jschar *base, size_t length, StrictModeGetter *smg,
                 AutoKeepAtoms& keepAtoms);

     ~TokenStream();

     /* Accessors. */
     JSContext *getContext() const { return cx; }
     bool onCurrentLine(const TokenPos &pos) const { return srcCoords.isOnThisLine(pos.end, lineno); }
     const Token &currentToken() const { return tokens[cursor]; }
     bool isCurrentTokenType(TokenKind type) const {
         return currentToken().type == type;
     }
     bool isCurrentTokenType(TokenKind type1, TokenKind type2) const {
         TokenKind type = currentToken().type;
         return type == type1 || type == type2;
     }
     const CharBuffer &getTokenbuf() const { return tokenbuf; }
     const char *getFilename() const { return filename; }
     unsigned getLineno() const { return lineno; }
     unsigned getColumn() const { return userbuf.addressOfNextRawChar() - linebase - 1; }
     JSVersion versionNumber() const { return VersionNumber(version); }
     JSVersion versionWithFlags() const { return version; }
     bool hadError() const { return !!(flags & TSF_HAD_ERROR); }

     bool isCurrentTokenEquality() const {
         return TokenKindIsEquality(currentToken().type);
     }

     bool isCurrentTokenRelational() const {
         return TokenKindIsRelational(currentToken().type);
     }

     bool isCurrentTokenShift() const {
         return TokenKindIsShift(currentToken().type);
     }

     bool isCurrentTokenAssignment() const {
         return TokenKindIsAssignment(currentToken().type);
     }

     /* Flag methods. */
     void setUnexpectedEOF(bool enabled = true) { setFlag(enabled, TSF_UNEXPECTED_EOF); }

     bool isUnexpectedEOF() const { return !!(flags & TSF_UNEXPECTED_EOF); }
     bool isEOF() const { return !!(flags & TSF_EOF); }
     bool sawOctalEscape() const { return !!(flags & TSF_OCTAL_CHAR); }

     // TokenStream-specific error reporters.
     bool reportError(unsigned errorNumber, ...);
     bool reportWarning(unsigned errorNumber, ...);

     // General-purpose error reporters.  You should avoid calling these
     // directly, and instead use the more succinct alternatives (e.g.
     // reportError()) in TokenStream, Parser, and BytecodeEmitter.
     bool reportCompileErrorNumberVA(uint32_t offset, unsigned flags, unsigned errorNumber,
                                     va_list args);
     bool reportStrictModeErrorNumberVA(uint32_t offset, bool strictMode, unsigned errorNumber,
                                        va_list args);
     bool reportStrictWarningErrorNumberVA(uint32_t offset, unsigned errorNumber,
                                           va_list args);

     // asm.js reporter
     void reportAsmJSError(uint32_t offset, unsigned errorNumber, ...);

   private:
     // These are private because they should only be called by the tokenizer
     // while tokenizing not by, for example, BytecodeEmitter.
     bool reportStrictModeError(unsigned errorNumber, ...);
     bool strictMode() const { return strictModeGetter && strictModeGetter->strictMode(); }

     void onError();
     static JSAtom *atomize(JSContext *cx, CharBuffer &cb);
     bool putIdentInTokenbuf(const jschar *identStart);

     /*
      * Enables flags in the associated tokenstream for the object lifetime.
      * Useful for lexically-scoped flag toggles.
      */
     class Flagger {
         TokenStream * const parent;
         unsigned       flags;
       public:
         Flagger(TokenStream *parent, unsigned withFlags) : parent(parent), flags(withFlags) {
             parent->flags |= flags;
         }

         ~Flagger() { parent->flags &= ~flags; }
     };
     friend class Flagger;

     void setFlag(bool enabled, TokenStreamFlags flag) {
         if (enabled)
             flags |= flag;
         else
             flags &= ~flag;
     }

   public:
     /*
      * Get the next token from the stream, make it the current token, and
      * return its kind.
      */
     TokenKind getToken() {
         /* Check for a pushed-back token resulting from mismatching lookahead. */
         if (lookahead != 0) {
             lookahead--;
             cursor = (cursor + 1) & ntokensMask;
             TokenKind tt = currentToken().type;
             JS_ASSERT(tt != TOK_EOL);
             return tt;
         }

         return getTokenInternal();
     }

     /* Similar, but also sets flags. */
     TokenKind getToken(unsigned withFlags) {
         Flagger flagger(this, withFlags);
         return getToken();
     }

     /*
      * Push the last scanned token back into the stream.
      */
     void ungetToken() {
         JS_ASSERT(lookahead < maxLookahead);
         lookahead++;
         cursor = (cursor - 1) & ntokensMask;
     }

     TokenKind peekToken() {
         if (lookahead != 0)
             return tokens[(cursor + 1) & ntokensMask].type;
         TokenKind tt = getTokenInternal();
         ungetToken();
         return tt;
     }

     TokenKind peekToken(unsigned withFlags) {
         Flagger flagger(this, withFlags);
         return peekToken();
     }

     TokenKind peekTokenSameLine(unsigned withFlags = 0) {
         if (!onCurrentLine(currentToken().pos))
             return TOK_EOL;

         if (lookahead != 0)
             return tokens[(cursor + 1) & ntokensMask].type;

         /*
          * This is the only place TOK_EOL is produced.  No token with TOK_EOL
          * is created, just a TOK_EOL TokenKind is returned.
          */
         flags &= ~TSF_EOL;
         TokenKind tt = getToken(withFlags);
         if (flags & TSF_EOL) {
             tt = TOK_EOL;
             flags &= ~TSF_EOL;
         }
         ungetToken();
         return tt;
     }

     /*
      * Get the next token from the stream if its kind is |tt|.
      */
     bool matchToken(TokenKind tt) {
         if (getToken() == tt)
             return true;
         ungetToken();
         return false;
     }

     bool matchToken(TokenKind tt, unsigned withFlags) {
         Flagger flagger(this, withFlags);
         return matchToken(tt);
     }

     void consumeKnownToken(TokenKind tt) {
         JS_ALWAYS_TRUE(matchToken(tt));
     }

     class MOZ_STACK_CLASS Position {
       public:
         /*
          * The Token fields may contain pointers to atoms, so for correct
          * rooting we must ensure collection of atoms is disabled while objects
          * of this class are live.  Do this by requiring a dummy AutoKeepAtoms
          * reference in the constructor.
          *
          * This class is explicity ignored by the analysis, so don't add any
          * more pointers to GC things here!
          */
         Position(AutoKeepAtoms&) { }
       private:
         Position(const Position&) MOZ_DELETE;
         friend class TokenStream;
         const jschar *buf;
         unsigned flags;
         unsigned lineno;
         const jschar *linebase;
         const jschar *prevLinebase;
         Token currentToken;
         unsigned lookahead;
         Token lookaheadTokens[maxLookahead];
     };

     void advance(size_t position);
     void tell(Position *);
     void seek(const Position &pos);
     void seek(const Position &pos, const TokenStream &other);
     void positionAfterLastFunctionKeyword(Position &pos);

     size_t positionToOffset(const Position &pos) const {
         return pos.buf - userbuf.base();
     }

     bool hasSourceMap() const {
         return sourceMap != NULL;
     }

     /*
      * Give up responsibility for managing the sourceMap filename's memory.
      */
     jschar *releaseSourceMap() {
         JS_ASSERT(hasSourceMap());
         jschar *sm = sourceMap;
         sourceMap = NULL;
         return sm;
     }

     /*
      * If the name at s[0:length] is not a keyword in this version, return
      * true with *ttp and *topp unchanged.
      *
      * If it is a reserved word in this version and strictness mode, and thus
      * can't be present in correct code, report a SyntaxError and return false.
      *
      * If it is a keyword, like "if", the behavior depends on ttp/topp. If ttp
      * and topp are null, report a SyntaxError ("if is a reserved identifier")
      * and return false. If ttp and topp are non-null, return true with the
      * keyword's TokenKind in *ttp and its JSOp in *topp.
      *
      * ttp and topp must be either both null or both non-null.
      */
     bool checkForKeyword(const jschar *s, size_t length, TokenKind *ttp, JSOp *topp);

     // This class maps a userbuf offset (which is 0-indexed) to a line number
     // (which is 1-indexed) and a column index (which is 0-indexed).
     class SourceCoords
     {
         // For a given buffer holding source code, |lineStartOffsets_| has one
         // element per line of source code, plus one sentinel element.  Each
         // non-sentinel element holds the buffer offset for the start of the
         // corresponding line of source code.  For this example script:
         //
         // 1  // xyz            [line starts at offset 0]
         // 2  var x;            [line starts at offset 7]
         // 3                    [line starts at offset 14]
         // 4  var y;            [line starts at offset 15]
         //
         // |lineStartOffsets_| is:
         //
         //   [0, 7, 14, 15, MAX_PTR]
         //
         // To convert a "line number" to a "line index" (i.e. an index into
         // |lineStartOffsets_|), subtract |initialLineNum_|.  E.g. line 3's
         // line index is (3 - initialLineNum_), which is 2.  Therefore
         // lineStartOffsets_[2] holds the buffer offset for the start of line 3,
         // which is 14.  (Note that |initialLineNum_| is often 1, but not
         // always.)
         //
         // The first element is always 0, and the last element is always the
         // MAX_PTR sentinel.
         //
         // offset-to-line/column lookups are O(log n) in the worst case (binary
         // search), but in practice they're heavily clustered and we do better
         // than that by using the previous lookup's result (lastLineIndex_) as
         // a starting point.
         //
         // Checking if an offset lies within a particular line number
         // (isOnThisLine()) is O(1).
         //
         Vector<uint32_t, 128> lineStartOffsets_;
         uint32_t            initialLineNum_;

         // This is mutable because it's modified on every search, but that fact
         // isn't visible outside this class.
         mutable uint32_t    lastLineIndex_;

         uint32_t lineIndexOf(uint32_t offset) const;

         static const uint32_t MAX_PTR = UINT32_MAX;

         uint32_t lineIndexToNum(uint32_t lineIndex) const { return lineIndex + initialLineNum_; }
         uint32_t lineNumToIndex(uint32_t lineNum)   const { return lineNum   - initialLineNum_; }

       public:
         SourceCoords(JSContext *cx, uint32_t ln);

         void add(uint32_t lineNum, uint32_t lineStartOffset);
         void fill(const SourceCoords &other);

         bool isOnThisLine(uint32_t offset, uint32_t lineNum) const {
             uint32_t lineIndex = lineNumToIndex(lineNum);
             JS_ASSERT(lineIndex + 1 < lineStartOffsets_.length());  // +1 due to sentinel
             return lineStartOffsets_[lineIndex] <= offset &&
                    offset < lineStartOffsets_[lineIndex + 1];
         }

         uint32_t lineNum(uint32_t offset) const;
         uint32_t columnIndex(uint32_t offset) const;
         void lineNumAndColumnIndex(uint32_t offset, uint32_t *lineNum, uint32_t *columnIndex) const;
     };

     SourceCoords srcCoords;

   private:
     /*
      * This is the low-level interface to the JS source code buffer.  It just
      * gets raw chars, basically.  TokenStreams functions are layered on top
      * and do some extra stuff like converting all EOL sequences to '\n',
      * tracking the line number, and setting the TSF_EOF flag.  (The "raw" in
      * "raw chars" refers to the lack of EOL sequence normalization.)
      */
     class TokenBuf {
       public:
         TokenBuf(JSContext *cx, const jschar *buf, size_t length)
           : base_(buf), limit_(buf + length), ptr(buf),
             skipBase(cx, &base_), skipLimit(cx, &limit_), skipPtr(cx, &ptr)
         { }

         bool hasRawChars() const {
             return ptr < limit_;
         }

         bool atStart() const {
             return ptr == base_;
         }

         const jschar *base() const {
             return base_;
         }

         const jschar *limit() const {
             return limit_;
         }

         jschar getRawChar() {
             return *ptr++;      /* this will NULL-crash if poisoned */
         }

         jschar peekRawChar() const {
             return *ptr;        /* this will NULL-crash if poisoned */
         }

         bool matchRawChar(jschar c) {
             if (*ptr == c) {    /* this will NULL-crash if poisoned */
                 ptr++;
                 return true;
             }
             return false;
         }

         bool matchRawCharBackwards(jschar c) {
             JS_ASSERT(ptr);     /* make sure haven't been poisoned */
             if (*(ptr - 1) == c) {
                 ptr--;
                 return true;
             }
             return false;
         }

         void ungetRawChar() {
             JS_ASSERT(ptr);     /* make sure haven't been poisoned */
             ptr--;
         }

         const jschar *addressOfNextRawChar(bool allowPoisoned = false) const {
             JS_ASSERT_IF(!allowPoisoned, ptr);     /* make sure haven't been poisoned */
             return ptr;
         }

         /* Use this with caution! */
         void setAddressOfNextRawChar(const jschar *a, bool allowPoisoned = false) {
             JS_ASSERT_IF(!allowPoisoned, a);
             ptr = a;
         }

 #ifdef DEBUG
         /* Poison the TokenBuf so it cannot be accessed again. */
         void poison() {
             ptr = NULL;
         }
 #endif

         static bool isRawEOLChar(int32_t c) {
             return (c == '\n' || c == '\r' || c == LINE_SEPARATOR || c == PARA_SEPARATOR);
         }

         // Finds the next EOL, but stops once 'max' jschars have been scanned
         // (*including* the starting jschar).
         const jschar *findEOLMax(const jschar *p, size_t max);

       private:
         const jschar *base_;            /* base of buffer */
         const jschar *limit_;           /* limit for quick bounds check */
         const jschar *ptr;              /* next char to get */

         // We are not yet moving strings
         SkipRoot skipBase, skipLimit, skipPtr;
     };

     TokenKind getTokenInternal();     /* doesn't check for pushback or error flag. */

     int32_t getChar();
     int32_t getCharIgnoreEOL();
     void ungetChar(int32_t c);
     void ungetCharIgnoreEOL(int32_t c);
     Token *newToken(ptrdiff_t adjust);
     bool peekUnicodeEscape(int32_t *c);
     bool matchUnicodeEscapeIdStart(int32_t *c);
     bool matchUnicodeEscapeIdent(int32_t *c);
     bool peekChars(int n, jschar *cp);
     bool getSourceMappingURL(bool isMultiline, bool shouldWarnDeprecated);

     // |expect| cannot be an EOL char.
     bool matchChar(int32_t expect) {
         MOZ_ASSERT(!TokenBuf::isRawEOLChar(expect));
         return JS_LIKELY(userbuf.hasRawChars()) &&
                userbuf.matchRawChar(expect);
     }

     void consumeKnownChar(int32_t expect) {
         mozilla::DebugOnly<int32_t> c = getChar();
         JS_ASSERT(c == expect);
     }

     int32_t peekChar() {
         int32_t c = getChar();
         ungetChar(c);
         return c;
     }

     void skipChars(int n) {
         while (--n >= 0)
             getChar();
     }

     void updateLineInfoForEOL();
     void updateFlagsForEOL();

     Token               tokens[ntokens];/* circular token buffer */
     unsigned            cursor;         /* index of last parsed token */
     unsigned            lookahead;      /* count of lookahead tokens */
     unsigned            lineno;         /* current line number */
     unsigned            flags;          /* flags -- see above */
     const jschar        *linebase;      /* start of current line;  points into userbuf */
     const jschar        *prevLinebase;  /* start of previous line;  NULL if on the first line */
     TokenBuf            userbuf;        /* user input buffer */
     const char          *filename;      /* input filename or null */
     jschar              *sourceMap;     /* source map's filename or null */
     void                *listenerTSData;/* listener data for this TokenStream */
     CharBuffer          tokenbuf;       /* current token string buffer */
     int8_t              oneCharTokens[128];  /* table of one-char tokens */
     bool                maybeEOL[256];       /* probabilistic EOL lookup table */
     bool                maybeStrSpecial[256];/* speeds up string scanning */
     JSVersion           version;        /* (i.e. to identify keywords) */
     JSContext           *const cx;
     JSPrincipals        *const originPrincipals;
     StrictModeGetter    *strictModeGetter; /* used to test for strict mode */
     Position            lastFunctionKeyword; /* used as a starting point for reparsing strict functions */

     /*
      * The tokens array stores pointers to JSAtoms. These are rooted by the
      * atoms table using AutoKeepAtoms in the Parser. This SkipRoot tells the
      * exact rooting analysis to ignore the atoms in the tokens array.
      */
     SkipRoot            tokenSkip;

     // Bug 846011
     SkipRoot            linebaseSkip;
     SkipRoot            prevLinebaseSkip;
 };

 /*
  * Steal one JSREPORT_* bit (see jsapi.h) to tell that arguments to the error
  * message have const jschar* type, not const char*.
  */
 #define JSREPORT_UC 0x100

 } /* namespace frontend */
 } /* namespace js */

 extern JS_FRIEND_API(int)
 js_fgets(char *buf, int size, FILE *file);

 #ifdef DEBUG
 extern const char *
 TokenKindToString(js::frontend::TokenKind tt);
 #endif

 #endif /* frontend_TokenStream_h */