| /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- |
| * vim: set ts=8 sts=4 et sw=4 tw=99: |
| * This Source Code Form is subject to the terms of the Mozilla Public |
| * License, v. 2.0. If a copy of the MPL was not distributed with this |
| * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
| |
| // JS lexical scanner. |
| |
| #include "frontend/TokenStream.h" |
| |
| #include "mozilla/IntegerTypeTraits.h" |
| #include "mozilla/PodOperations.h" |
| #include "mozilla/UniquePtr.h" |
| |
| #include <ctype.h> |
| #include <stdarg.h> |
| #include <stdio.h> |
| #include <string.h> |
| |
| #include "jsatom.h" |
| #include "jscntxt.h" |
| #include "jscompartment.h" |
| #include "jsexn.h" |
| #include "jsnum.h" |
| |
| #include "frontend/BytecodeCompiler.h" |
| #include "js/CharacterEncoding.h" |
| #include "vm/HelperThreads.h" |
| #include "vm/Keywords.h" |
| #include "vm/StringBuffer.h" |
| |
| using namespace js; |
| using namespace js::frontend; |
| using namespace js::unicode; |
| |
| using mozilla::Maybe; |
| using mozilla::PodAssign; |
| using mozilla::PodCopy; |
| using mozilla::PodZero; |
| using mozilla::UniquePtr; |
| |
| struct KeywordInfo { |
| const char* chars; // C string with keyword text |
| TokenKind tokentype; |
| }; |
| |
| static const KeywordInfo keywords[] = { |
| #define KEYWORD_INFO(keyword, name, type) \ |
| {js_##keyword##_str, type}, |
| FOR_EACH_JAVASCRIPT_KEYWORD(KEYWORD_INFO) |
| #undef KEYWORD_INFO |
| }; |
| |
| // Returns a KeywordInfo for the specified characters, or nullptr if the string |
| // is not a keyword. |
| template <typename CharT> |
| static const KeywordInfo* |
| FindKeyword(const CharT* s, size_t length) |
| { |
| MOZ_ASSERT(length != 0); |
| |
| size_t i; |
| const KeywordInfo* kw; |
| const char* chars; |
| |
| #define JSKW_LENGTH() length |
| #define JSKW_AT(column) s[column] |
| #define JSKW_GOT_MATCH(index) i = (index); goto got_match; |
| #define JSKW_TEST_GUESS(index) i = (index); goto test_guess; |
| #define JSKW_NO_MATCH() goto no_match; |
| #include "jsautokw.h" |
| #undef JSKW_NO_MATCH |
| #undef JSKW_TEST_GUESS |
| #undef JSKW_GOT_MATCH |
| #undef JSKW_AT |
| #undef JSKW_LENGTH |
| |
| got_match: |
| return &keywords[i]; |
| |
| test_guess: |
| kw = &keywords[i]; |
| chars = kw->chars; |
| do { |
| if (*s++ != (unsigned char)(*chars++)) |
| goto no_match; |
| } while (--length != 0); |
| return kw; |
| |
| no_match: |
| return nullptr; |
| } |
| |
| static const KeywordInfo* |
| FindKeyword(JSLinearString* str) |
| { |
| JS::AutoCheckCannotGC nogc; |
| return str->hasLatin1Chars() |
| ? FindKeyword(str->latin1Chars(nogc), str->length()) |
| : FindKeyword(str->twoByteChars(nogc), str->length()); |
| } |
| |
| template <typename CharT> |
| static bool |
| IsIdentifier(const CharT* chars, size_t length) |
| { |
| if (length == 0) |
| return false; |
| |
| if (!IsIdentifierStart(*chars)) |
| return false; |
| |
| const CharT* end = chars + length; |
| while (++chars != end) { |
| if (!IsIdentifierPart(*chars)) |
| return false; |
| } |
| |
| return true; |
| } |
| |
| bool |
| frontend::IsIdentifier(JSLinearString* str) |
| { |
| JS::AutoCheckCannotGC nogc; |
| return str->hasLatin1Chars() |
| ? ::IsIdentifier(str->latin1Chars(nogc), str->length()) |
| : ::IsIdentifier(str->twoByteChars(nogc), str->length()); |
| } |
| |
| bool |
| frontend::IsIdentifier(const char16_t* chars, size_t length) |
| { |
| return ::IsIdentifier(chars, length); |
| } |
| |
| bool |
| frontend::IsKeyword(JSLinearString* str) |
| { |
| return FindKeyword(str) != nullptr; |
| } |
| |
| TokenStream::SourceCoords::SourceCoords(ExclusiveContext* cx, uint32_t ln) |
| : lineStartOffsets_(cx), initialLineNum_(ln), lastLineIndex_(0) |
| { |
| // This is actually necessary! Removing it causes compile errors on |
| // GCC and clang. You could try declaring this: |
| // |
| // const uint32_t TokenStream::SourceCoords::MAX_PTR; |
| // |
| // which fixes the GCC/clang error, but causes bustage on Windows. Sigh. |
| // |
| uint32_t maxPtr = MAX_PTR; |
| |
| // The first line begins at buffer offset 0. MAX_PTR is the sentinel. The |
| // appends cannot fail because |lineStartOffsets_| has statically-allocated |
| // elements. |
| MOZ_ASSERT(lineStartOffsets_.capacity() >= 2); |
| MOZ_ALWAYS_TRUE(lineStartOffsets_.reserve(2)); |
| lineStartOffsets_.infallibleAppend(0); |
| lineStartOffsets_.infallibleAppend(maxPtr); |
| } |
| |
| MOZ_ALWAYS_INLINE bool |
| TokenStream::SourceCoords::add(uint32_t lineNum, uint32_t lineStartOffset) |
| { |
| uint32_t lineIndex = lineNumToIndex(lineNum); |
| uint32_t sentinelIndex = lineStartOffsets_.length() - 1; |
| |
| MOZ_ASSERT(lineStartOffsets_[0] == 0 && lineStartOffsets_[sentinelIndex] == MAX_PTR); |
| |
| if (lineIndex == sentinelIndex) { |
| // We haven't seen this newline before. Update lineStartOffsets_ |
| // only if lineStartOffsets_.append succeeds, to keep sentinel. |
| // Otherwise return false to tell TokenStream about OOM. |
| uint32_t maxPtr = MAX_PTR; |
| if (!lineStartOffsets_.append(maxPtr)) |
| return false; |
| |
| lineStartOffsets_[lineIndex] = lineStartOffset; |
| } else { |
| // We have seen this newline before (and ungot it). Do nothing (other |
| // than checking it hasn't mysteriously changed). |
| // This path can be executed after hitting OOM, so check lineIndex. |
| MOZ_ASSERT_IF(lineIndex < sentinelIndex, lineStartOffsets_[lineIndex] == lineStartOffset); |
| } |
| return true; |
| } |
| |
| MOZ_ALWAYS_INLINE bool |
| TokenStream::SourceCoords::fill(const TokenStream::SourceCoords& other) |
| { |
| MOZ_ASSERT(lineStartOffsets_.back() == MAX_PTR); |
| MOZ_ASSERT(other.lineStartOffsets_.back() == MAX_PTR); |
| |
| if (lineStartOffsets_.length() >= other.lineStartOffsets_.length()) |
| return true; |
| |
| uint32_t sentinelIndex = lineStartOffsets_.length() - 1; |
| lineStartOffsets_[sentinelIndex] = other.lineStartOffsets_[sentinelIndex]; |
| |
| for (size_t i = sentinelIndex + 1; i < other.lineStartOffsets_.length(); i++) { |
| if (!lineStartOffsets_.append(other.lineStartOffsets_[i])) |
| return false; |
| } |
| return true; |
| } |
| |
| MOZ_ALWAYS_INLINE uint32_t |
| TokenStream::SourceCoords::lineIndexOf(uint32_t offset) const |
| { |
| uint32_t iMin, iMax, iMid; |
| |
| if (lineStartOffsets_[lastLineIndex_] <= offset) { |
| // If we reach here, offset is on a line the same as or higher than |
| // last time. Check first for the +0, +1, +2 cases, because they |
| // typically cover 85--98% of cases. |
| if (offset < lineStartOffsets_[lastLineIndex_ + 1]) |
| return lastLineIndex_; // lineIndex is same as last time |
| |
| // If we reach here, there must be at least one more entry (plus the |
| // sentinel). Try it. |
| lastLineIndex_++; |
| if (offset < lineStartOffsets_[lastLineIndex_ + 1]) |
| return lastLineIndex_; // lineIndex is one higher than last time |
| |
| // The same logic applies here. |
| lastLineIndex_++; |
| if (offset < lineStartOffsets_[lastLineIndex_ + 1]) { |
| return lastLineIndex_; // lineIndex is two higher than last time |
| } |
| |
| // No luck. Oh well, we have a better-than-default starting point for |
| // the binary search. |
| iMin = lastLineIndex_ + 1; |
| MOZ_ASSERT(iMin < lineStartOffsets_.length() - 1); // -1 due to the sentinel |
| |
| } else { |
| iMin = 0; |
| } |
| |
| // This is a binary search with deferred detection of equality, which was |
| // marginally faster in this case than a standard binary search. |
| // The -2 is because |lineStartOffsets_.length() - 1| is the sentinel, and we |
| // want one before that. |
| iMax = lineStartOffsets_.length() - 2; |
| while (iMax > iMin) { |
| iMid = iMin + (iMax - iMin) / 2; |
| if (offset >= lineStartOffsets_[iMid + 1]) |
| iMin = iMid + 1; // offset is above lineStartOffsets_[iMid] |
| else |
| iMax = iMid; // offset is below or within lineStartOffsets_[iMid] |
| } |
| MOZ_ASSERT(iMax == iMin); |
| MOZ_ASSERT(lineStartOffsets_[iMin] <= offset && offset < lineStartOffsets_[iMin + 1]); |
| lastLineIndex_ = iMin; |
| return iMin; |
| } |
| |
| uint32_t |
| TokenStream::SourceCoords::lineNum(uint32_t offset) const |
| { |
| uint32_t lineIndex = lineIndexOf(offset); |
| return lineIndexToNum(lineIndex); |
| } |
| |
| uint32_t |
| TokenStream::SourceCoords::columnIndex(uint32_t offset) const |
| { |
| uint32_t lineIndex = lineIndexOf(offset); |
| uint32_t lineStartOffset = lineStartOffsets_[lineIndex]; |
| MOZ_ASSERT(offset >= lineStartOffset); |
| return offset - lineStartOffset; |
| } |
| |
| void |
| TokenStream::SourceCoords::lineNumAndColumnIndex(uint32_t offset, uint32_t* lineNum, |
| uint32_t* columnIndex) const |
| { |
| uint32_t lineIndex = lineIndexOf(offset); |
| *lineNum = lineIndexToNum(lineIndex); |
| uint32_t lineStartOffset = lineStartOffsets_[lineIndex]; |
| MOZ_ASSERT(offset >= lineStartOffset); |
| *columnIndex = offset - lineStartOffset; |
| } |
| |
| #ifdef _MSC_VER |
| #pragma warning(push) |
| #pragma warning(disable:4351) |
| #endif |
| |
| TokenStream::TokenStream(ExclusiveContext* cx, const ReadOnlyCompileOptions& options, |
| const char16_t* base, size_t length, StrictModeGetter* smg) |
| : srcCoords(cx, options.lineno), |
| options_(options), |
| tokens(), |
| cursor(), |
| lookahead(), |
| lineno(options.lineno), |
| flags(), |
| linebase(0), |
| prevLinebase(size_t(-1)), |
| userbuf(cx, base, length, options.column), |
| filename(options.filename()), |
| displayURL_(nullptr), |
| sourceMapURL_(nullptr), |
| tokenbuf(cx), |
| cx(cx), |
| mutedErrors(options.mutedErrors()), |
| strictModeGetter(smg) |
| { |
| // Nb: the following tables could be static, but initializing them here is |
| // much easier. Don't worry, the time to initialize them for each |
| // TokenStream is trivial. See bug 639420. |
| |
| // See Parser::assignExpr() for an explanation of isExprEnding[]. |
| memset(isExprEnding, 0, sizeof(isExprEnding)); |
| isExprEnding[TOK_COMMA] = 1; |
| isExprEnding[TOK_SEMI] = 1; |
| isExprEnding[TOK_COLON] = 1; |
| isExprEnding[TOK_RP] = 1; |
| isExprEnding[TOK_RB] = 1; |
| isExprEnding[TOK_RC] = 1; |
| } |
| |
| #ifdef _MSC_VER |
| #pragma warning(pop) |
| #endif |
| |
| bool |
| TokenStream::checkOptions() |
| { |
| // Constrain starting columns to half of the range of a signed 32-bit value, |
| // to avoid overflow. |
| if (options().column >= mozilla::MaxValue<int32_t>::value / 2 + 1) { |
| reportErrorNoOffset(JSMSG_BAD_COLUMN_NUMBER); |
| return false; |
| } |
| |
| return true; |
| } |
| |
| TokenStream::~TokenStream() |
| { |
| } |
| |
| // Use the fastest available getc. |
| #if defined(HAVE_GETC_UNLOCKED) |
| # define fast_getc getc_unlocked |
| #elif defined(HAVE__GETC_NOLOCK) |
| # define fast_getc _getc_nolock |
| #else |
| # define fast_getc getc |
| #endif |
| |
| MOZ_ALWAYS_INLINE void |
| TokenStream::updateLineInfoForEOL() |
| { |
| prevLinebase = linebase; |
| linebase = userbuf.offset(); |
| lineno++; |
| if (!srcCoords.add(lineno, linebase)) |
| flags.hitOOM = true; |
| } |
| |
| MOZ_ALWAYS_INLINE void |
| TokenStream::updateFlagsForEOL() |
| { |
| flags.isDirtyLine = false; |
| } |
| |
| // This gets the next char, normalizing all EOL sequences to '\n' as it goes. |
| int32_t |
| TokenStream::getChar() |
| { |
| int32_t c; |
| if (MOZ_LIKELY(userbuf.hasRawChars())) { |
| c = userbuf.getRawChar(); |
| |
| // Normalize the char16_t if it was a newline. |
| if (MOZ_UNLIKELY(c == '\n')) |
| goto eol; |
| if (MOZ_UNLIKELY(c == '\r')) { |
| // If it's a \r\n sequence: treat as a single EOL, skip over the \n. |
| if (MOZ_LIKELY(userbuf.hasRawChars())) |
| userbuf.matchRawChar('\n'); |
| goto eol; |
| } |
| if (MOZ_UNLIKELY(c == LINE_SEPARATOR || c == PARA_SEPARATOR)) |
| goto eol; |
| |
| return c; |
| } |
| |
| flags.isEOF = true; |
| return EOF; |
| |
| eol: |
| updateLineInfoForEOL(); |
| return '\n'; |
| } |
| |
| // This gets the next char. It does nothing special with EOL sequences, not |
| // even updating the line counters. It can be used safely if (a) the |
| // resulting char is guaranteed to be ungotten (by ungetCharIgnoreEOL()) if |
| // it's an EOL, and (b) the line-related state (lineno, linebase) is not used |
| // before it's ungotten. |
| int32_t |
| TokenStream::getCharIgnoreEOL() |
| { |
| if (MOZ_LIKELY(userbuf.hasRawChars())) |
| return userbuf.getRawChar(); |
| |
| flags.isEOF = true; |
| return EOF; |
| } |
| |
| void |
| TokenStream::ungetChar(int32_t c) |
| { |
| if (c == EOF) |
| return; |
| MOZ_ASSERT(!userbuf.atStart()); |
| userbuf.ungetRawChar(); |
| if (c == '\n') { |
| #ifdef DEBUG |
| int32_t c2 = userbuf.peekRawChar(); |
| MOZ_ASSERT(TokenBuf::isRawEOLChar(c2)); |
| #endif |
| |
| // If it's a \r\n sequence, also unget the \r. |
| if (!userbuf.atStart()) |
| userbuf.matchRawCharBackwards('\r'); |
| |
| MOZ_ASSERT(prevLinebase != size_t(-1)); // we should never get more than one EOL char |
| linebase = prevLinebase; |
| prevLinebase = size_t(-1); |
| lineno--; |
| } else { |
| MOZ_ASSERT(userbuf.peekRawChar() == c); |
| } |
| } |
| |
| void |
| TokenStream::ungetCharIgnoreEOL(int32_t c) |
| { |
| if (c == EOF) |
| return; |
| MOZ_ASSERT(!userbuf.atStart()); |
| userbuf.ungetRawChar(); |
| } |
| |
| // Return true iff |n| raw characters can be read from this without reading past |
| // EOF or a newline, and copy those characters into |cp| if so. The characters |
| // are not consumed: use skipChars(n) to do so after checking that the consumed |
| // characters had appropriate values. |
| bool |
| TokenStream::peekChars(int n, char16_t* cp) |
| { |
| int i, j; |
| int32_t c; |
| |
| for (i = 0; i < n; i++) { |
| c = getCharIgnoreEOL(); |
| if (c == EOF) |
| break; |
| if (c == '\n') { |
| ungetCharIgnoreEOL(c); |
| break; |
| } |
| cp[i] = char16_t(c); |
| } |
| for (j = i - 1; j >= 0; j--) |
| ungetCharIgnoreEOL(cp[j]); |
| return i == n; |
| } |
| |
| size_t |
| TokenStream::TokenBuf::findEOLMax(size_t start, size_t max) |
| { |
| const char16_t* p = rawCharPtrAt(start); |
| |
| size_t n = 0; |
| while (true) { |
| if (p >= limit_) |
| break; |
| if (n >= max) |
| break; |
| n++; |
| if (TokenBuf::isRawEOLChar(*p++)) |
| break; |
| } |
| return start + n; |
| } |
| |
| bool |
| TokenStream::advance(size_t position) |
| { |
| const char16_t* end = userbuf.rawCharPtrAt(position); |
| while (userbuf.addressOfNextRawChar() < end) |
| getChar(); |
| |
| Token* cur = &tokens[cursor]; |
| cur->pos.begin = userbuf.offset(); |
| MOZ_MAKE_MEM_UNDEFINED(&cur->type, sizeof(cur->type)); |
| lookahead = 0; |
| |
| if (flags.hitOOM) |
| return reportError(JSMSG_OUT_OF_MEMORY); |
| |
| return true; |
| } |
| |
| void |
| TokenStream::tell(Position* pos) |
| { |
| pos->buf = userbuf.addressOfNextRawChar(/* allowPoisoned = */ true); |
| pos->flags = flags; |
| pos->lineno = lineno; |
| pos->linebase = linebase; |
| pos->prevLinebase = prevLinebase; |
| pos->lookahead = lookahead; |
| pos->currentToken = currentToken(); |
| for (unsigned i = 0; i < lookahead; i++) |
| pos->lookaheadTokens[i] = tokens[(cursor + 1 + i) & ntokensMask]; |
| } |
| |
| void |
| TokenStream::seek(const Position& pos) |
| { |
| userbuf.setAddressOfNextRawChar(pos.buf, /* allowPoisoned = */ true); |
| flags = pos.flags; |
| lineno = pos.lineno; |
| linebase = pos.linebase; |
| prevLinebase = pos.prevLinebase; |
| lookahead = pos.lookahead; |
| |
| tokens[cursor] = pos.currentToken; |
| for (unsigned i = 0; i < lookahead; i++) |
| tokens[(cursor + 1 + i) & ntokensMask] = pos.lookaheadTokens[i]; |
| } |
| |
| bool |
| TokenStream::seek(const Position& pos, const TokenStream& other) |
| { |
| if (!srcCoords.fill(other.srcCoords)) |
| return false; |
| seek(pos); |
| return true; |
| } |
| |
| bool |
| TokenStream::reportStrictModeErrorNumberVA(uint32_t offset, bool strictMode, unsigned errorNumber, |
| va_list args) |
| { |
| // In strict mode code, this is an error, not merely a warning. |
| unsigned flags; |
| if (strictMode) |
| flags = JSREPORT_ERROR; |
| else if (options().extraWarningsOption) |
| flags = JSREPORT_WARNING | JSREPORT_STRICT; |
| else |
| return true; |
| |
| return reportCompileErrorNumberVA(offset, flags, errorNumber, args); |
| } |
| |
| void |
| CompileError::throwError(JSContext* cx) |
| { |
| // If there's a runtime exception type associated with this error |
| // number, set that as the pending exception. For errors occuring at |
| // compile time, this is very likely to be a JSEXN_SYNTAXERR. |
| // |
| // If an exception is thrown but not caught, the JSREPORT_EXCEPTION |
| // flag will be set in report.flags. Proper behavior for an error |
| // reporter is to ignore a report with this flag for all but top-level |
| // compilation errors. The exception will remain pending, and so long |
| // as the non-top-level "load", "eval", or "compile" native function |
| // returns false, the top-level reporter will eventually receive the |
| // uncaught exception report. |
| if (!ErrorToException(cx, message, &report, nullptr, nullptr)) |
| CallErrorReporter(cx, message, &report); |
| } |
| |
| CompileError::~CompileError() |
| { |
| js_free((void*)report.linebuf()); |
| js_free((void*)report.ucmessage); |
| js_free(message); |
| message = nullptr; |
| |
| if (report.messageArgs) { |
| if (argumentsType == ArgumentsAreASCII) { |
| unsigned i = 0; |
| while (report.messageArgs[i]) |
| js_free((void*)report.messageArgs[i++]); |
| } |
| js_free(report.messageArgs); |
| } |
| |
| PodZero(&report); |
| } |
| |
| bool |
| TokenStream::reportCompileErrorNumberVA(uint32_t offset, unsigned flags, unsigned errorNumber, |
| va_list args) |
| { |
| bool warning = JSREPORT_IS_WARNING(flags); |
| |
| if (warning && options().werrorOption) { |
| flags &= ~JSREPORT_WARNING; |
| warning = false; |
| } |
| |
| // On the main thread, report the error immediately. When compiling off |
| // thread, save the error so that the main thread can report it later. |
| CompileError tempErr; |
| CompileError& err = cx->isJSContext() ? tempErr : cx->addPendingCompileError(); |
| |
| err.report.flags = flags; |
| err.report.errorNumber = errorNumber; |
| err.report.filename = filename; |
| err.report.isMuted = mutedErrors; |
| if (offset == NoOffset) { |
| err.report.lineno = 0; |
| err.report.column = 0; |
| } else { |
| err.report.lineno = srcCoords.lineNum(offset); |
| err.report.column = srcCoords.columnIndex(offset); |
| } |
| |
| // If we have no location information, try to get one from the caller. |
| bool callerFilename = false; |
| if (offset != NoOffset && !err.report.filename && cx->isJSContext()) { |
| NonBuiltinFrameIter iter(cx->asJSContext(), |
| FrameIter::ALL_CONTEXTS, FrameIter::GO_THROUGH_SAVED, |
| FrameIter::FOLLOW_DEBUGGER_EVAL_PREV_LINK, |
| cx->compartment()->principals()); |
| if (!iter.done() && iter.scriptFilename()) { |
| callerFilename = true; |
| err.report.filename = iter.scriptFilename(); |
| err.report.lineno = iter.computeLine(&err.report.column); |
| } |
| } |
| |
| err.argumentsType = (flags & JSREPORT_UC) ? ArgumentsAreUnicode : ArgumentsAreASCII; |
| |
| if (!ExpandErrorArgumentsVA(cx, GetErrorMessage, nullptr, errorNumber, &err.message, |
| &err.report, err.argumentsType, args)) |
| { |
| return false; |
| } |
| |
| // Given a token, T, that we want to complain about: if T's (starting) |
| // lineno doesn't match TokenStream's lineno, that means we've scanned past |
| // the line that T starts on, which makes it hard to print some or all of |
| // T's (starting) line for context. |
| // |
| // So we don't even try, leaving report.linebuf and friends zeroed. This |
| // means that any error involving a multi-line token (e.g. an unterminated |
| // multi-line string literal) won't have a context printed. |
| if (offset != NoOffset && err.report.lineno == lineno && !callerFilename) { |
| // We show only a portion (a "window") of the line around the erroneous |
| // token -- the first char in the token, plus |windowRadius| chars |
| // before it and |windowRadius - 1| chars after it. This is because |
| // lines can be very long and printing the whole line is (a) not that |
| // helpful, and (b) can waste a lot of memory. See bug 634444. |
| static const size_t windowRadius = 60; |
| |
| // The window must start within the current line, no earlier than |
| // windowRadius characters before offset. |
| size_t windowStart = (offset - linebase > windowRadius) ? |
| offset - windowRadius : |
| linebase; |
| |
| // The window must start within the portion of the current line |
| // that we actually have in our buffer. |
| if (windowStart < userbuf.startOffset()) |
| windowStart = userbuf.startOffset(); |
| |
| // The window must end within the current line, no later than |
| // windowRadius after offset. |
| size_t windowEnd = userbuf.findEOLMax(offset, windowRadius); |
| size_t windowLength = windowEnd - windowStart; |
| MOZ_ASSERT(windowLength <= windowRadius * 2); |
| |
| // Create the windowed strings. |
| StringBuffer windowBuf(cx); |
| if (!windowBuf.append(userbuf.rawCharPtrAt(windowStart), windowLength) || |
| !windowBuf.append('\0')) |
| { |
| return false; |
| } |
| |
| // The window into the offending source line, without final \n. |
| mozilla::UniquePtr<char16_t[], JS::FreePolicy> linebuf(windowBuf.stealChars()); |
| if (!linebuf) |
| return false; |
| |
| err.report.initLinebuf(linebuf.release(), windowLength, offset - windowStart); |
| } |
| |
| if (cx->isJSContext()) |
| err.throwError(cx->asJSContext()); |
| |
| return warning; |
| } |
| |
| bool |
| TokenStream::reportStrictModeError(unsigned errorNumber, ...) |
| { |
| va_list args; |
| va_start(args, errorNumber); |
| bool result = reportStrictModeErrorNumberVA(currentToken().pos.begin, strictMode(), |
| errorNumber, args); |
| va_end(args); |
| return result; |
| } |
| |
| bool |
| TokenStream::reportError(unsigned errorNumber, ...) |
| { |
| va_list args; |
| va_start(args, errorNumber); |
| bool result = reportCompileErrorNumberVA(currentToken().pos.begin, JSREPORT_ERROR, errorNumber, |
| args); |
| va_end(args); |
| return result; |
| } |
| |
| bool |
| TokenStream::reportErrorNoOffset(unsigned errorNumber, ...) |
| { |
| va_list args; |
| va_start(args, errorNumber); |
| bool result = reportCompileErrorNumberVA(NoOffset, JSREPORT_ERROR, errorNumber, |
| args); |
| va_end(args); |
| return result; |
| } |
| |
| bool |
| TokenStream::reportWarning(unsigned errorNumber, ...) |
| { |
| va_list args; |
| va_start(args, errorNumber); |
| bool result = reportCompileErrorNumberVA(currentToken().pos.begin, JSREPORT_WARNING, |
| errorNumber, args); |
| va_end(args); |
| return result; |
| } |
| |
| bool |
| TokenStream::reportStrictWarningErrorNumberVA(uint32_t offset, unsigned errorNumber, va_list args) |
| { |
| if (!options().extraWarningsOption) |
| return true; |
| |
| return reportCompileErrorNumberVA(offset, JSREPORT_STRICT|JSREPORT_WARNING, errorNumber, args); |
| } |
| |
| void |
| TokenStream::reportAsmJSError(uint32_t offset, unsigned errorNumber, ...) |
| { |
| va_list args; |
| va_start(args, errorNumber); |
| unsigned flags = options().throwOnAsmJSValidationFailureOption |
| ? JSREPORT_ERROR |
| : JSREPORT_WARNING; |
| reportCompileErrorNumberVA(offset, flags, errorNumber, args); |
| va_end(args); |
| } |
| |
| // We have encountered a '\': check for a Unicode escape sequence after it. |
| // Return 'true' and the character code value (by value) if we found a |
| // Unicode escape sequence. Otherwise, return 'false'. In both cases, do not |
| // advance along the buffer. |
| bool |
| TokenStream::peekUnicodeEscape(int* result) |
| { |
| char16_t cp[5]; |
| |
| if (peekChars(5, cp) && cp[0] == 'u' && |
| JS7_ISHEX(cp[1]) && JS7_ISHEX(cp[2]) && |
| JS7_ISHEX(cp[3]) && JS7_ISHEX(cp[4])) |
| { |
| *result = (((((JS7_UNHEX(cp[1]) << 4) |
| + JS7_UNHEX(cp[2])) << 4) |
| + JS7_UNHEX(cp[3])) << 4) |
| + JS7_UNHEX(cp[4]); |
| return true; |
| } |
| return false; |
| } |
| |
| bool |
| TokenStream::matchUnicodeEscapeIdStart(int32_t* cp) |
| { |
| if (peekUnicodeEscape(cp) && IsIdentifierStart(*cp)) { |
| skipChars(5); |
| return true; |
| } |
| return false; |
| } |
| |
| bool |
| TokenStream::matchUnicodeEscapeIdent(int32_t* cp) |
| { |
| if (peekUnicodeEscape(cp) && IsIdentifierPart(*cp)) { |
| skipChars(5); |
| return true; |
| } |
| return false; |
| } |
| |
| // Helper function which returns true if the first length(q) characters in p are |
| // the same as the characters in q. |
| static bool |
| CharsMatch(const char16_t* p, const char* q) { |
| while (*q) { |
| if (*p++ != *q++) |
| return false; |
| } |
| return true; |
| } |
| |
| bool |
| TokenStream::getDirectives(bool isMultiline, bool shouldWarnDeprecated) |
| { |
| // Match directive comments used in debugging, such as "//# sourceURL" and |
| // "//# sourceMappingURL". Use of "//@" instead of "//#" is deprecated. |
| // |
| // To avoid a crashing bug in IE, several JavaScript transpilers wrap single |
| // line comments containing a source mapping URL inside a multiline |
| // comment. To avoid potentially expensive lookahead and backtracking, we |
| // only check for this case if we encounter a '#' character. |
| |
| if (!getDisplayURL(isMultiline, shouldWarnDeprecated)) |
| return false; |
| if (!getSourceMappingURL(isMultiline, shouldWarnDeprecated)) |
| return false; |
| |
| return true; |
| } |
| |
| bool |
| TokenStream::getDirective(bool isMultiline, bool shouldWarnDeprecated, |
| const char* directive, int directiveLength, |
| const char* errorMsgPragma, |
| UniquePtr<char16_t[], JS::FreePolicy>* destination) |
| { |
| MOZ_ASSERT(directiveLength <= 18); |
| char16_t peeked[18]; |
| int32_t c; |
| |
| if (peekChars(directiveLength, peeked) && CharsMatch(peeked, directive)) { |
| if (shouldWarnDeprecated && |
| !reportWarning(JSMSG_DEPRECATED_PRAGMA, errorMsgPragma)) |
| return false; |
| |
| skipChars(directiveLength); |
| tokenbuf.clear(); |
| |
| while ((c = peekChar()) && c != EOF && !IsSpaceOrBOM2(c)) { |
| getChar(); |
| // Debugging directives can occur in both single- and multi-line |
| // comments. If we're currently inside a multi-line comment, we also |
| // need to recognize multi-line comment terminators. |
| if (isMultiline && c == '*' && peekChar() == '/') { |
| ungetChar('*'); |
| break; |
| } |
| if (!tokenbuf.append(c)) |
| return false; |
| } |
| |
| if (tokenbuf.empty()) { |
| // The directive's URL was missing, but this is not quite an |
| // exception that we should stop and drop everything for. |
| return true; |
| } |
| |
| size_t length = tokenbuf.length(); |
| |
| *destination = cx->make_pod_array<char16_t>(length + 1); |
| if (!*destination) |
| return false; |
| |
| PodCopy(destination->get(), tokenbuf.begin(), length); |
| (*destination)[length] = '\0'; |
| } |
| |
| return true; |
| } |
| |
| bool |
| TokenStream::getDisplayURL(bool isMultiline, bool shouldWarnDeprecated) |
| { |
| // Match comments of the form "//# sourceURL=<url>" or |
| // "/\* //# sourceURL=<url> *\/" |
| // |
| // Note that while these are labeled "sourceURL" in the source text, |
| // internally we refer to it as a "displayURL" to distinguish what the |
| // developer would like to refer to the source as from the source's actual |
| // URL. |
| |
| return getDirective(isMultiline, shouldWarnDeprecated, " sourceURL=", 11, |
| "sourceURL", &displayURL_); |
| } |
| |
| bool |
| TokenStream::getSourceMappingURL(bool isMultiline, bool shouldWarnDeprecated) |
| { |
| // Match comments of the form "//# sourceMappingURL=<url>" or |
| // "/\* //# sourceMappingURL=<url> *\/" |
| |
| return getDirective(isMultiline, shouldWarnDeprecated, " sourceMappingURL=", 18, |
| "sourceMappingURL", &sourceMapURL_); |
| } |
| |
| MOZ_ALWAYS_INLINE Token* |
| TokenStream::newToken(ptrdiff_t adjust) |
| { |
| cursor = (cursor + 1) & ntokensMask; |
| Token* tp = &tokens[cursor]; |
| tp->pos.begin = userbuf.offset() + adjust; |
| |
| // NOTE: tp->pos.end is not set until the very end of getTokenInternal(). |
| MOZ_MAKE_MEM_UNDEFINED(&tp->pos.end, sizeof(tp->pos.end)); |
| |
| return tp; |
| } |
| |
| MOZ_ALWAYS_INLINE JSAtom* |
| TokenStream::atomize(ExclusiveContext* cx, CharBuffer& cb) |
| { |
| return AtomizeChars(cx, cb.begin(), cb.length()); |
| } |
| |
| #ifdef DEBUG |
| static bool |
| IsTokenSane(Token* tp) |
| { |
| // Nb: TOK_EOL should never be used in an actual Token; it should only be |
| // returned as a TokenKind from peekTokenSameLine(). |
| if (tp->type < 0 || tp->type >= TOK_LIMIT || tp->type == TOK_EOL) |
| return false; |
| |
| if (tp->pos.end < tp->pos.begin) |
| return false; |
| |
| return true; |
| } |
| #endif |
| |
| bool |
| TokenStream::putIdentInTokenbuf(const char16_t* identStart) |
| { |
| int32_t c, qc; |
| const char16_t* tmp = userbuf.addressOfNextRawChar(); |
| userbuf.setAddressOfNextRawChar(identStart); |
| |
| tokenbuf.clear(); |
| for (;;) { |
| c = getCharIgnoreEOL(); |
| if (!IsIdentifierPart(c)) { |
| if (c != '\\' || !matchUnicodeEscapeIdent(&qc)) |
| break; |
| c = qc; |
| } |
| if (!tokenbuf.append(c)) { |
| userbuf.setAddressOfNextRawChar(tmp); |
| return false; |
| } |
| } |
| userbuf.setAddressOfNextRawChar(tmp); |
| return true; |
| } |
| |
| bool |
| TokenStream::checkForKeyword(const KeywordInfo* kw, TokenKind* ttp) |
| { |
| if (kw->tokentype == TOK_RESERVED) |
| return reportError(JSMSG_RESERVED_ID, kw->chars); |
| |
| if (kw->tokentype == TOK_STRICT_RESERVED) |
| return reportStrictModeError(JSMSG_RESERVED_ID, kw->chars); |
| |
| // Treat 'let' as an identifier and contextually a keyword in sloppy mode. |
| // It is always a keyword in strict mode. |
| if (kw->tokentype == TOK_LET && !strictMode()) |
| return true; |
| |
| // Working keyword. |
| if (ttp) { |
| *ttp = kw->tokentype; |
| return true; |
| } |
| |
| return reportError(JSMSG_RESERVED_ID, kw->chars); |
| } |
| |
| bool |
| TokenStream::checkForKeyword(JSAtom* atom, TokenKind* ttp) |
| { |
| const KeywordInfo* kw = FindKeyword(atom); |
| if (!kw) |
| return true; |
| |
| return checkForKeyword(kw, ttp); |
| } |
| |
| enum FirstCharKind { |
| // A char16_t has the 'OneChar' kind if it, by itself, constitutes a valid |
| // token that cannot also be a prefix of a longer token. E.g. ';' has the |
| // OneChar kind, but '+' does not, because '++' and '+=' are valid longer tokens |
| // that begin with '+'. |
| // |
| // The few token kinds satisfying these properties cover roughly 35--45% |
| // of the tokens seen in practice. |
| // |
| // We represent the 'OneChar' kind with any positive value less than |
| // TOK_LIMIT. This representation lets us associate each one-char token |
| // char16_t with a TokenKind and thus avoid a subsequent char16_t-to-TokenKind |
| // conversion. |
| OneChar_Min = 0, |
| OneChar_Max = TOK_LIMIT - 1, |
| |
| Space = TOK_LIMIT, |
| Ident, |
| Dec, |
| String, |
| EOL, |
| BasePrefix, |
| Other, |
| |
| LastCharKind = Other |
| }; |
| |
| // OneChar: 40, 41, 44, 58, 59, 63, 91, 93, 123, 125, 126: |
| // '(', ')', ',', ':', ';', '?', '[', ']', '{', '}', '~' |
| // Ident: 36, 65..90, 95, 97..122: '$', 'A'..'Z', '_', 'a'..'z' |
| // Dot: 46: '.' |
| // Equals: 61: '=' |
| // String: 34, 39: '"', '\'' |
| // Dec: 49..57: '1'..'9' |
| // Plus: 43: '+' |
| // BasePrefix: 48: '0' |
| // Space: 9, 11, 12, 32: '\t', '\v', '\f', ' ' |
| // EOL: 10, 13: '\n', '\r' |
| // |
| #define T_COMMA TOK_COMMA |
| #define T_COLON TOK_COLON |
| #define T_BITNOT TOK_BITNOT |
| #define Templat String |
| #define _______ Other |
| static const uint8_t firstCharKinds[] = { |
| /* 0 1 2 3 4 5 6 7 8 9 */ |
| /* 0+ */ _______, _______, _______, _______, _______, _______, _______, _______, _______, Space, |
| /* 10+ */ EOL, Space, Space, EOL, _______, _______, _______, _______, _______, _______, |
| /* 20+ */ _______, _______, _______, _______, _______, _______, _______, _______, _______, _______, |
| /* 30+ */ _______, _______, Space, _______, String, _______, Ident, _______, _______, String, |
| /* 40+ */ TOK_LP, TOK_RP, _______, _______, T_COMMA,_______, _______, _______,BasePrefix, Dec, |
| /* 50+ */ Dec, Dec, Dec, Dec, Dec, Dec, Dec, Dec, T_COLON,TOK_SEMI, |
| /* 60+ */ _______, _______, _______,TOK_HOOK, _______, Ident, Ident, Ident, Ident, Ident, |
| /* 70+ */ Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident, |
| /* 80+ */ Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident, |
| /* 90+ */ Ident, TOK_LB, _______, TOK_RB, _______, Ident, Templat, Ident, Ident, Ident, |
| /* 100+ */ Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident, |
| /* 110+ */ Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident, |
| /* 120+ */ Ident, Ident, Ident, TOK_LC, _______, TOK_RC,T_BITNOT, _______ |
| }; |
| #undef T_COMMA |
| #undef T_COLON |
| #undef T_BITNOT |
| #undef Templat |
| #undef _______ |
| |
| static_assert(LastCharKind < (1 << (sizeof(firstCharKinds[0]) * 8)), |
| "Elements of firstCharKinds[] are too small"); |
| |
| bool |
| TokenStream::getTokenInternal(TokenKind* ttp, Modifier modifier) |
| { |
| int c, qc; |
| Token* tp; |
| FirstCharKind c1kind; |
| const char16_t* numStart; |
| bool hasExp; |
| DecimalPoint decimalPoint; |
| const char16_t* identStart; |
| bool hadUnicodeEscape; |
| |
| // Check if in the middle of a template string. Have to get this out of |
| // the way first. |
| if (MOZ_UNLIKELY(modifier == TemplateTail)) { |
| if (!getStringOrTemplateToken('`', &tp)) |
| goto error; |
| goto out; |
| } |
| |
| retry: |
| if (MOZ_UNLIKELY(!userbuf.hasRawChars())) { |
| tp = newToken(0); |
| tp->type = TOK_EOF; |
| flags.isEOF = true; |
| goto out; |
| } |
| |
| c = userbuf.getRawChar(); |
| MOZ_ASSERT(c != EOF); |
| |
| // Chars not in the range 0..127 are rare. Getting them out of the way |
| // early allows subsequent checking to be faster. |
| if (MOZ_UNLIKELY(c >= 128)) { |
| if (IsSpaceOrBOM2(c)) { |
| if (c == LINE_SEPARATOR || c == PARA_SEPARATOR) { |
| updateLineInfoForEOL(); |
| updateFlagsForEOL(); |
| } |
| |
| goto retry; |
| } |
| |
| tp = newToken(-1); |
| |
| static_assert('$' < 128, |
| "IdentifierStart contains '$', but as !IsLetter('$'), " |
| "ensure that '$' is never handled here"); |
| static_assert('_' < 128, |
| "IdentifierStart contains '_', but as !IsLetter('_'), " |
| "ensure that '_' is never handled here"); |
| if (IsLetter(c)) { |
| identStart = userbuf.addressOfNextRawChar() - 1; |
| hadUnicodeEscape = false; |
| goto identifier; |
| } |
| |
| goto badchar; |
| } |
| |
| // Get the token kind, based on the first char. The ordering of c1kind |
| // comparison is based on the frequency of tokens in real code -- Parsemark |
| // (which represents typical JS code on the web) and the Unreal demo (which |
| // represents asm.js code). |
| // |
| // Parsemark Unreal |
| // OneChar 32.9% 39.7% |
| // Space 25.0% 0.6% |
| // Ident 19.2% 36.4% |
| // Dec 7.2% 5.1% |
| // String 7.9% 0.0% |
| // EOL 1.7% 0.0% |
| // BasePrefix 0.4% 4.9% |
| // Other 5.7% 13.3% |
| // |
| // The ordering is based mostly only Parsemark frequencies, with Unreal |
| // frequencies used to break close categories (e.g. |Dec| and |String|). |
| // |Other| is biggish, but no other token kind is common enough for it to |
| // be worth adding extra values to FirstCharKind. |
| // |
| c1kind = FirstCharKind(firstCharKinds[c]); |
| |
| // Look for an unambiguous single-char token. |
| // |
| if (c1kind <= OneChar_Max) { |
| tp = newToken(-1); |
| tp->type = TokenKind(c1kind); |
| goto out; |
| } |
| |
| // Skip over non-EOL whitespace chars. |
| // |
| if (c1kind == Space) |
| goto retry; |
| |
| // Look for an identifier. |
| // |
| if (c1kind == Ident) { |
| tp = newToken(-1); |
| identStart = userbuf.addressOfNextRawChar() - 1; |
| hadUnicodeEscape = false; |
| |
| identifier: |
| for (;;) { |
| c = getCharIgnoreEOL(); |
| if (c == EOF) |
| break; |
| if (!IsIdentifierPart(c)) { |
| if (c != '\\' || !matchUnicodeEscapeIdent(&qc)) |
| break; |
| hadUnicodeEscape = true; |
| } |
| } |
| ungetCharIgnoreEOL(c); |
| |
| // Identifiers containing no Unicode escapes can be processed directly |
| // from userbuf. The rest must use the escapes converted via tokenbuf |
| // before atomizing. |
| const char16_t* chars; |
| size_t length; |
| if (hadUnicodeEscape) { |
| if (!putIdentInTokenbuf(identStart)) |
| goto error; |
| |
| chars = tokenbuf.begin(); |
| length = tokenbuf.length(); |
| } else { |
| chars = identStart; |
| length = userbuf.addressOfNextRawChar() - identStart; |
| } |
| |
| // Represent keywords as keyword tokens unless told otherwise. |
| if (modifier != KeywordIsName) { |
| if (const KeywordInfo* kw = FindKeyword(chars, length)) { |
| // That said, keywords can't contain escapes. (Contexts where |
| // keywords are treated as names, that also sometimes treat |
| // keywords as keywords, must manually check this requirement.) |
| if (hadUnicodeEscape) { |
| reportError(JSMSG_ESCAPED_KEYWORD); |
| goto error; |
| } |
| |
| tp->type = TOK_NAME; |
| if (!checkForKeyword(kw, &tp->type)) |
| goto error; |
| if (tp->type != TOK_NAME) |
| goto out; |
| } |
| } |
| |
| JSAtom* atom = AtomizeChars(cx, chars, length); |
| if (!atom) |
| goto error; |
| tp->type = TOK_NAME; |
| tp->setName(atom->asPropertyName()); |
| goto out; |
| } |
| |
| // Look for a decimal number. |
| // |
| if (c1kind == Dec) { |
| tp = newToken(-1); |
| numStart = userbuf.addressOfNextRawChar() - 1; |
| |
| decimal: |
| decimalPoint = NoDecimal; |
| hasExp = false; |
| while (JS7_ISDEC(c)) |
| c = getCharIgnoreEOL(); |
| |
| if (c == '.') { |
| decimalPoint = HasDecimal; |
| decimal_dot: |
| do { |
| c = getCharIgnoreEOL(); |
| } while (JS7_ISDEC(c)); |
| } |
| if (c == 'e' || c == 'E') { |
| hasExp = true; |
| c = getCharIgnoreEOL(); |
| if (c == '+' || c == '-') |
| c = getCharIgnoreEOL(); |
| if (!JS7_ISDEC(c)) { |
| ungetCharIgnoreEOL(c); |
| reportError(JSMSG_MISSING_EXPONENT); |
| goto error; |
| } |
| do { |
| c = getCharIgnoreEOL(); |
| } while (JS7_ISDEC(c)); |
| } |
| ungetCharIgnoreEOL(c); |
| |
| if (c != EOF && IsIdentifierStart(c)) { |
| reportError(JSMSG_IDSTART_AFTER_NUMBER); |
| goto error; |
| } |
| |
| // Unlike identifiers and strings, numbers cannot contain escaped |
| // chars, so we don't need to use tokenbuf. Instead we can just |
| // convert the char16_t characters in userbuf to the numeric value. |
| double dval; |
| if (!((decimalPoint == HasDecimal) || hasExp)) { |
| if (!GetDecimalInteger(cx, numStart, userbuf.addressOfNextRawChar(), &dval)) |
| goto error; |
| } else { |
| const char16_t* dummy; |
| if (!js_strtod(cx, numStart, userbuf.addressOfNextRawChar(), &dummy, &dval)) |
| goto error; |
| } |
| tp->type = TOK_NUMBER; |
| tp->setNumber(dval, decimalPoint); |
| goto out; |
| } |
| |
| // Look for a string or a template string. |
| // |
| if (c1kind == String) { |
| if (!getStringOrTemplateToken(c, &tp)) |
| goto error; |
| goto out; |
| } |
| |
| // Skip over EOL chars, updating line state along the way. |
| // |
| if (c1kind == EOL) { |
| // If it's a \r\n sequence: treat as a single EOL, skip over the \n. |
| if (c == '\r' && userbuf.hasRawChars()) |
| userbuf.matchRawChar('\n'); |
| updateLineInfoForEOL(); |
| updateFlagsForEOL(); |
| goto retry; |
| } |
| |
| // Look for a hexadecimal, octal, or binary number. |
| // |
| if (c1kind == BasePrefix) { |
| tp = newToken(-1); |
| int radix; |
| c = getCharIgnoreEOL(); |
| if (c == 'x' || c == 'X') { |
| radix = 16; |
| c = getCharIgnoreEOL(); |
| if (!JS7_ISHEX(c)) { |
| ungetCharIgnoreEOL(c); |
| reportError(JSMSG_MISSING_HEXDIGITS); |
| goto error; |
| } |
| numStart = userbuf.addressOfNextRawChar() - 1; // one past the '0x' |
| while (JS7_ISHEX(c)) |
| c = getCharIgnoreEOL(); |
| } else if (c == 'b' || c == 'B') { |
| radix = 2; |
| c = getCharIgnoreEOL(); |
| if (c != '0' && c != '1') { |
| ungetCharIgnoreEOL(c); |
| reportError(JSMSG_MISSING_BINARY_DIGITS); |
| goto error; |
| } |
| numStart = userbuf.addressOfNextRawChar() - 1; // one past the '0b' |
| while (c == '0' || c == '1') |
| c = getCharIgnoreEOL(); |
| } else if (c == 'o' || c == 'O') { |
| radix = 8; |
| c = getCharIgnoreEOL(); |
| if (c < '0' || c > '7') { |
| ungetCharIgnoreEOL(c); |
| reportError(JSMSG_MISSING_OCTAL_DIGITS); |
| goto error; |
| } |
| numStart = userbuf.addressOfNextRawChar() - 1; // one past the '0o' |
| while ('0' <= c && c <= '7') |
| c = getCharIgnoreEOL(); |
| } else if (JS7_ISDEC(c)) { |
| radix = 8; |
| numStart = userbuf.addressOfNextRawChar() - 1; // one past the '0' |
| while (JS7_ISDEC(c)) { |
| // Octal integer literals are not permitted in strict mode code. |
| if (!reportStrictModeError(JSMSG_DEPRECATED_OCTAL)) |
| goto error; |
| |
| // Outside strict mode, we permit 08 and 09 as decimal numbers, |
| // which makes our behaviour a superset of the ECMA numeric |
| // grammar. We might not always be so permissive, so we warn |
| // about it. |
| if (c >= '8') { |
| if (!reportWarning(JSMSG_BAD_OCTAL, c == '8' ? "08" : "09")) { |
| goto error; |
| } |
| goto decimal; // use the decimal scanner for the rest of the number |
| } |
| c = getCharIgnoreEOL(); |
| } |
| } else { |
| // '0' not followed by 'x', 'X' or a digit; scan as a decimal number. |
| numStart = userbuf.addressOfNextRawChar() - 1; |
| goto decimal; |
| } |
| ungetCharIgnoreEOL(c); |
| |
| if (c != EOF && IsIdentifierStart(c)) { |
| reportError(JSMSG_IDSTART_AFTER_NUMBER); |
| goto error; |
| } |
| |
| double dval; |
| const char16_t* dummy; |
| if (!GetPrefixInteger(cx, numStart, userbuf.addressOfNextRawChar(), radix, &dummy, &dval)) |
| goto error; |
| tp->type = TOK_NUMBER; |
| tp->setNumber(dval, NoDecimal); |
| goto out; |
| } |
| |
| // This handles everything else. |
| // |
| MOZ_ASSERT(c1kind == Other); |
| tp = newToken(-1); |
| switch (c) { |
| case '.': |
| c = getCharIgnoreEOL(); |
| if (JS7_ISDEC(c)) { |
| numStart = userbuf.addressOfNextRawChar() - 2; |
| decimalPoint = HasDecimal; |
| hasExp = false; |
| goto decimal_dot; |
| } |
| if (c == '.') { |
| if (matchChar('.')) { |
| tp->type = TOK_TRIPLEDOT; |
| goto out; |
| } |
| } |
| ungetCharIgnoreEOL(c); |
| tp->type = TOK_DOT; |
| goto out; |
| |
| case '=': |
| if (matchChar('=')) |
| tp->type = matchChar('=') ? TOK_STRICTEQ : TOK_EQ; |
| else if (matchChar('>')) |
| tp->type = TOK_ARROW; |
| else |
| tp->type = TOK_ASSIGN; |
| goto out; |
| |
| case '+': |
| if (matchChar('+')) |
| tp->type = TOK_INC; |
| else |
| tp->type = matchChar('=') ? TOK_ADDASSIGN : TOK_ADD; |
| goto out; |
| |
| case '\\': |
| hadUnicodeEscape = matchUnicodeEscapeIdStart(&qc); |
| if (hadUnicodeEscape) { |
| identStart = userbuf.addressOfNextRawChar() - 6; |
| goto identifier; |
| } |
| goto badchar; |
| |
| case '|': |
| if (matchChar('|')) |
| tp->type = TOK_OR; |
| else |
| tp->type = matchChar('=') ? TOK_BITORASSIGN : TOK_BITOR; |
| goto out; |
| |
| case '^': |
| tp->type = matchChar('=') ? TOK_BITXORASSIGN : TOK_BITXOR; |
| goto out; |
| |
| case '&': |
| if (matchChar('&')) |
| tp->type = TOK_AND; |
| else |
| tp->type = matchChar('=') ? TOK_BITANDASSIGN : TOK_BITAND; |
| goto out; |
| |
| case '!': |
| if (matchChar('=')) |
| tp->type = matchChar('=') ? TOK_STRICTNE : TOK_NE; |
| else |
| tp->type = TOK_NOT; |
| goto out; |
| |
| case '<': |
| // NB: treat HTML begin-comment as comment-till-end-of-line. |
| if (matchChar('!')) { |
| if (matchChar('-')) { |
| if (matchChar('-')) |
| goto skipline; |
| ungetChar('-'); |
| } |
| ungetChar('!'); |
| } |
| if (matchChar('<')) { |
| tp->type = matchChar('=') ? TOK_LSHASSIGN : TOK_LSH; |
| } else { |
| tp->type = matchChar('=') ? TOK_LE : TOK_LT; |
| } |
| goto out; |
| |
| case '>': |
| if (matchChar('>')) { |
| if (matchChar('>')) |
| tp->type = matchChar('=') ? TOK_URSHASSIGN : TOK_URSH; |
| else |
| tp->type = matchChar('=') ? TOK_RSHASSIGN : TOK_RSH; |
| } else { |
| tp->type = matchChar('=') ? TOK_GE : TOK_GT; |
| } |
| goto out; |
| |
| case '*': |
| #ifdef JS_HAS_EXPONENTIATION |
| if (matchChar('*')) |
| tp->type = matchChar('=') ? TOK_POWASSIGN : TOK_POW; |
| else |
| #endif |
| tp->type = matchChar('=') ? TOK_MULASSIGN : TOK_MUL; |
| goto out; |
| |
| case '/': |
| // Look for a single-line comment. |
| if (matchChar('/')) { |
| c = peekChar(); |
| if (c == '@' || c == '#') { |
| bool shouldWarn = getChar() == '@'; |
| if (!getDirectives(false, shouldWarn)) |
| goto error; |
| } |
| |
| skipline: |
| while ((c = getChar()) != EOF && c != '\n') |
| continue; |
| ungetChar(c); |
| cursor = (cursor - 1) & ntokensMask; |
| goto retry; |
| } |
| |
| // Look for a multi-line comment. |
| if (matchChar('*')) { |
| unsigned linenoBefore = lineno; |
| while ((c = getChar()) != EOF && |
| !(c == '*' && matchChar('/'))) { |
| if (c == '@' || c == '#') { |
| bool shouldWarn = c == '@'; |
| if (!getDirectives(true, shouldWarn)) |
| goto error; |
| } |
| } |
| if (c == EOF) { |
| reportError(JSMSG_UNTERMINATED_COMMENT); |
| goto error; |
| } |
| if (linenoBefore != lineno) |
| updateFlagsForEOL(); |
| cursor = (cursor - 1) & ntokensMask; |
| goto retry; |
| } |
| |
| // Look for a regexp. |
| if (modifier == Operand) { |
| tokenbuf.clear(); |
| |
| bool inCharClass = false; |
| for (;;) { |
| c = getChar(); |
| if (c == '\\') { |
| if (!tokenbuf.append(c)) |
| goto error; |
| c = getChar(); |
| } else if (c == '[') { |
| inCharClass = true; |
| } else if (c == ']') { |
| inCharClass = false; |
| } else if (c == '/' && !inCharClass) { |
| // For compat with IE, allow unescaped / in char classes. |
| break; |
| } |
| if (c == '\n' || c == EOF) { |
| ungetChar(c); |
| reportError(JSMSG_UNTERMINATED_REGEXP); |
| goto error; |
| } |
| if (!tokenbuf.append(c)) |
| goto error; |
| } |
| |
| RegExpFlag reflags = NoFlags; |
| unsigned length = tokenbuf.length() + 1; |
| while (true) { |
| c = peekChar(); |
| if (c == 'g' && !(reflags & GlobalFlag)) |
| reflags = RegExpFlag(reflags | GlobalFlag); |
| else if (c == 'i' && !(reflags & IgnoreCaseFlag)) |
| reflags = RegExpFlag(reflags | IgnoreCaseFlag); |
| else if (c == 'm' && !(reflags & MultilineFlag)) |
| reflags = RegExpFlag(reflags | MultilineFlag); |
| else if (c == 'y' && !(reflags & StickyFlag)) |
| reflags = RegExpFlag(reflags | StickyFlag); |
| else |
| break; |
| getChar(); |
| length++; |
| } |
| |
| c = peekChar(); |
| if (JS7_ISLET(c)) { |
| char buf[2] = { '\0', '\0' }; |
| tp->pos.begin += length + 1; |
| buf[0] = char(c); |
| reportError(JSMSG_BAD_REGEXP_FLAG, buf); |
| (void) getChar(); |
| goto error; |
| } |
| tp->type = TOK_REGEXP; |
| tp->setRegExpFlags(reflags); |
| goto out; |
| } |
| |
| tp->type = matchChar('=') ? TOK_DIVASSIGN : TOK_DIV; |
| goto out; |
| |
| case '%': |
| tp->type = matchChar('=') ? TOK_MODASSIGN : TOK_MOD; |
| goto out; |
| |
| case '-': |
| if (matchChar('-')) { |
| if (peekChar() == '>' && !flags.isDirtyLine) |
| goto skipline; |
| tp->type = TOK_DEC; |
| } else { |
| tp->type = matchChar('=') ? TOK_SUBASSIGN : TOK_SUB; |
| } |
| goto out; |
| |
| badchar: |
| default: |
| reportError(JSMSG_ILLEGAL_CHARACTER); |
| goto error; |
| } |
| |
| MOZ_CRASH("should have jumped to |out| or |error|"); |
| |
| out: |
| if (flags.hitOOM) |
| return reportError(JSMSG_OUT_OF_MEMORY); |
| |
| flags.isDirtyLine = true; |
| tp->pos.end = userbuf.offset(); |
| #ifdef DEBUG |
| // Save the modifier used to get this token, so that if an ungetToken() |
| // occurs and then the token is re-gotten (or peeked, etc.), we can assert |
| // that both gets have used the same modifiers. |
| tp->modifier = modifier; |
| tp->modifierException = NoException; |
| #endif |
| MOZ_ASSERT(IsTokenSane(tp)); |
| *ttp = tp->type; |
| return true; |
| |
| error: |
| if (flags.hitOOM) |
| return reportError(JSMSG_OUT_OF_MEMORY); |
| |
| flags.isDirtyLine = true; |
| tp->pos.end = userbuf.offset(); |
| MOZ_MAKE_MEM_UNDEFINED(&tp->type, sizeof(tp->type)); |
| flags.hadError = true; |
| #ifdef DEBUG |
| // Poisoning userbuf on error establishes an invariant: once an erroneous |
| // token has been seen, userbuf will not be consulted again. This is true |
| // because the parser will deal with the illegal token by aborting parsing |
| // immediately. |
| userbuf.poison(); |
| #endif |
| MOZ_MAKE_MEM_UNDEFINED(ttp, sizeof(*ttp)); |
| return false; |
| } |
| |
| bool |
| TokenStream::getBracedUnicode(uint32_t* cp) |
| { |
| consumeKnownChar('{'); |
| |
| bool first = true; |
| int32_t c; |
| uint32_t code = 0; |
| while (true) { |
| c = getCharIgnoreEOL(); |
| if (c == EOF) |
| return false; |
| if (c == '}') { |
| if (first) |
| return false; |
| break; |
| } |
| |
| if (!JS7_ISHEX(c)) |
| return false; |
| |
| code = (code << 4) | JS7_UNHEX(c); |
| if (code > 0x10FFFF) |
| return false; |
| first = false; |
| } |
| |
| *cp = code; |
| return true; |
| } |
| |
| bool |
| TokenStream::getStringOrTemplateToken(int untilChar, Token** tp) |
| { |
| int c; |
| int nc = -1; |
| |
| bool parsingTemplate = (untilChar == '`'); |
| |
| *tp = newToken(-1); |
| tokenbuf.clear(); |
| |
| // We need to detect any of these chars: " or ', \n (or its |
| // equivalents), \\, EOF. Because we detect EOL sequences here and |
| // put them back immediately, we can use getCharIgnoreEOL(). |
| while ((c = getCharIgnoreEOL()) != untilChar) { |
| if (c == EOF) { |
| ungetCharIgnoreEOL(c); |
| reportError(JSMSG_UNTERMINATED_STRING); |
| return false; |
| } |
| |
| if (c == '\\') { |
| switch (c = getChar()) { |
| case 'b': c = '\b'; break; |
| case 'f': c = '\f'; break; |
| case 'n': c = '\n'; break; |
| case 'r': c = '\r'; break; |
| case 't': c = '\t'; break; |
| case 'v': c = '\v'; break; |
| |
| case '\n': |
| // ES5 7.8.4: an escaped line terminator represents |
| // no character. |
| continue; |
| |
| // Unicode character specification. |
| case 'u': { |
| if (peekChar() == '{') { |
| uint32_t code; |
| if (!getBracedUnicode(&code)) { |
| reportError(JSMSG_MALFORMED_ESCAPE, "Unicode"); |
| return false; |
| } |
| |
| MOZ_ASSERT(code <= 0x10FFFF); |
| if (code < 0x10000) { |
| c = code; |
| } else { |
| if (!tokenbuf.append((code - 0x10000) / 1024 + 0xD800)) |
| return false; |
| c = ((code - 0x10000) % 1024) + 0xDC00; |
| } |
| break; |
| } |
| |
| char16_t cp[4]; |
| if (peekChars(4, cp) && |
| JS7_ISHEX(cp[0]) && JS7_ISHEX(cp[1]) && JS7_ISHEX(cp[2]) && JS7_ISHEX(cp[3])) |
| { |
| c = JS7_UNHEX(cp[0]); |
| c = (c << 4) + JS7_UNHEX(cp[1]); |
| c = (c << 4) + JS7_UNHEX(cp[2]); |
| c = (c << 4) + JS7_UNHEX(cp[3]); |
| skipChars(4); |
| } else { |
| reportError(JSMSG_MALFORMED_ESCAPE, "Unicode"); |
| return false; |
| } |
| break; |
| } |
| |
| // Hexadecimal character specification. |
| case 'x': { |
| char16_t cp[2]; |
| if (peekChars(2, cp) && JS7_ISHEX(cp[0]) && JS7_ISHEX(cp[1])) { |
| c = (JS7_UNHEX(cp[0]) << 4) + JS7_UNHEX(cp[1]); |
| skipChars(2); |
| } else { |
| reportError(JSMSG_MALFORMED_ESCAPE, "hexadecimal"); |
| return false; |
| } |
| break; |
| } |
| |
| default: |
| // Octal character specification. |
| if (JS7_ISOCT(c)) { |
| int32_t val = JS7_UNOCT(c); |
| |
| c = peekChar(); |
| |
| // Strict mode code allows only \0, then a non-digit. |
| if (val != 0 || JS7_ISDEC(c)) { |
| if (parsingTemplate) { |
| reportError(JSMSG_DEPRECATED_OCTAL); |
| return false; |
| } |
| if (!reportStrictModeError(JSMSG_DEPRECATED_OCTAL)) |
| return false; |
| flags.sawOctalEscape = true; |
| } |
| |
| if (JS7_ISOCT(c)) { |
| val = 8 * val + JS7_UNOCT(c); |
| getChar(); |
| c = peekChar(); |
| if (JS7_ISOCT(c)) { |
| int32_t save = val; |
| val = 8 * val + JS7_UNOCT(c); |
| if (val <= 0xFF) |
| getChar(); |
| else |
| val = save; |
| } |
| } |
| |
| c = char16_t(val); |
| } |
| break; |
| } |
| } else if (TokenBuf::isRawEOLChar(c)) { |
| if (!parsingTemplate) { |
| ungetCharIgnoreEOL(c); |
| reportError(JSMSG_UNTERMINATED_STRING); |
| return false; |
| } |
| if (c == '\r') { |
| c = '\n'; |
| if (userbuf.peekRawChar() == '\n') |
| skipChars(1); |
| } |
| updateLineInfoForEOL(); |
| updateFlagsForEOL(); |
| } else if (parsingTemplate && c == '$') { |
| if ((nc = getCharIgnoreEOL()) == '{') |
| break; |
| ungetCharIgnoreEOL(nc); |
| } |
| |
| if (!tokenbuf.append(c)) { |
| ReportOutOfMemory(cx); |
| return false; |
| } |
| } |
| |
| JSAtom* atom = atomize(cx, tokenbuf); |
| if (!atom) |
| return false; |
| |
| if (!parsingTemplate) { |
| (*tp)->type = TOK_STRING; |
| } else { |
| if (c == '$' && nc == '{') |
| (*tp)->type = TOK_TEMPLATE_HEAD; |
| else |
| (*tp)->type = TOK_NO_SUBS_TEMPLATE; |
| } |
| |
| (*tp)->setAtom(atom); |
| return true; |
| } |
| |
| JS_FRIEND_API(int) |
| js_fgets(char* buf, int size, FILE* file) |
| { |
| int n, i, c; |
| bool crflag; |
| |
| n = size - 1; |
| if (n < 0) |
| return -1; |
| |
| crflag = false; |
| for (i = 0; i < n && (c = fast_getc(file)) != EOF; i++) { |
| buf[i] = c; |
| if (c == '\n') { // any \n ends a line |
| i++; // keep the \n; we know there is room for \0 |
| break; |
| } |
| if (crflag) { // \r not followed by \n ends line at the \r |
| ungetc(c, file); |
| break; // and overwrite c in buf with \0 |
| } |
| crflag = (c == '\r'); |
| } |
| |
| buf[i] = '\0'; |
| return i; |
| } |
| |
| const char* |
| frontend::TokenKindToDesc(TokenKind tt) |
| { |
| switch (tt) { |
| #define EMIT_CASE(name, desc) case TOK_##name: return desc; |
| FOR_EACH_TOKEN_KIND(EMIT_CASE) |
| #undef EMIT_CASE |
| case TOK_LIMIT: |
| MOZ_ASSERT_UNREACHABLE("TOK_LIMIT should not be passed."); |
| break; |
| } |
| |
| return "<bad TokenKind>"; |
| } |
| |
| #ifdef DEBUG |
| const char* |
| TokenKindToString(TokenKind tt) |
| { |
| switch (tt) { |
| #define EMIT_CASE(name, desc) case TOK_##name: return "TOK_" #name; |
| FOR_EACH_TOKEN_KIND(EMIT_CASE) |
| #undef EMIT_CASE |
| case TOK_LIMIT: break; |
| } |
| |
| return "<bad TokenKind>"; |
| } |
| #endif |