blob: 9e7250ff2cd72fbc9b6e6440103f07d3b11d6a13 [file] [log] [blame]
// Copyright 2017 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef V8_ASMJS_ASM_SCANNER_H_
#define V8_ASMJS_ASM_SCANNER_H_
#include <memory>
#include <string>
#include <unordered_map>
#include "src/asmjs/asm-names.h"
#include "src/base/logging.h"
#include "src/common/globals.h"
namespace v8 {
namespace internal {
class Utf16CharacterStream;
// A custom scanner to extract the token stream needed to parse valid
// asm.js: http://asmjs.org/spec/latest/
// This scanner intentionally avoids the portion of JavaScript lexing
// that are not required to determine if code is valid asm.js code.
// * Strings are disallowed except for 'use asm'.
// * Only the subset of keywords needed to check asm.js invariants are
// included.
// * Identifiers are accumulated into local + global string tables
// (for performance).
class V8_EXPORT_PRIVATE AsmJsScanner {
public:
using token_t = int32_t;
explicit AsmJsScanner(Utf16CharacterStream* stream);
// Get current token.
token_t Token() const { return token_; }
// Get position of current token.
size_t Position() const { return position_; }
// Advance to the next token.
void Next();
// Back up by one token.
void Rewind();
// Get raw string for current identifier. Note that the returned string will
// become invalid when the scanner advances, create a copy to preserve it.
const std::string& GetIdentifierString() const {
// Identifier strings don't work after a rewind.
DCHECK(!rewind_);
return identifier_string_;
}
// Check if we just passed a newline.
bool IsPrecededByNewline() const {
// Newline tracking doesn't work if you back up.
DCHECK(!rewind_);
return preceded_by_newline_;
}
#if DEBUG
// Debug only method to go from a token back to its name.
// Slow, only use for debugging.
std::string Name(token_t token) const;
#endif
// Restores old position (token after that position). Note that it is not
// allowed to rewind right after a seek, because previous tokens are unknown.
void Seek(size_t pos);
// Select whether identifiers are resolved in global or local scope,
// and which scope new identifiers are added to.
void EnterLocalScope() { in_local_scope_ = true; }
void EnterGlobalScope() { in_local_scope_ = false; }
// Drop all current local identifiers.
void ResetLocals();
// Methods to check if a token is an identifier and which scope.
bool IsLocal() const { return IsLocal(Token()); }
bool IsGlobal() const { return IsGlobal(Token()); }
static bool IsLocal(token_t token) { return token <= kLocalsStart; }
static bool IsGlobal(token_t token) { return token >= kGlobalsStart; }
// Methods to find the index position of an identifier (count starting from
// 0 for each scope separately).
static size_t LocalIndex(token_t token) {
DCHECK(IsLocal(token));
return -(token - kLocalsStart);
}
static size_t GlobalIndex(token_t token) {
DCHECK(IsGlobal(token));
return token - kGlobalsStart;
}
// Methods to check if the current token is a numeric literal considered an
// asm.js "double" (contains a dot) or an "unsigned" (without a dot). Note
// that numbers without a dot outside the [0 .. 2^32) range are errors.
bool IsUnsigned() const { return Token() == kUnsigned; }
uint32_t AsUnsigned() const {
DCHECK(IsUnsigned());
return unsigned_value_;
}
bool IsDouble() const { return Token() == kDouble; }
double AsDouble() const {
DCHECK(IsDouble());
return double_value_;
}
// clang-format off
enum {
// [-10000-kMaxIdentifierCount, -10000) :: Local identifiers (counting
// backwards)
// [-10000 .. -1) :: Builtin tokens like keywords
// (also includes some special
// ones like end of input)
// 0 .. 255 :: Single char tokens
// 256 .. 256+kMaxIdentifierCount :: Global identifiers
kLocalsStart = -10000,
#define V(name, _junk1, _junk2, _junk3) kToken_##name,
STDLIB_MATH_FUNCTION_LIST(V)
STDLIB_ARRAY_TYPE_LIST(V)
#undef V
#define V(name, _junk1) kToken_##name,
STDLIB_MATH_VALUE_LIST(V)
#undef V
#define V(name) kToken_##name,
STDLIB_OTHER_LIST(V)
KEYWORD_NAME_LIST(V)
#undef V
#define V(rawname, name) kToken_##name,
LONG_SYMBOL_NAME_LIST(V)
#undef V
#define V(name, value, string_name) name = value,
SPECIAL_TOKEN_LIST(V)
#undef V
kGlobalsStart = 256,
};
// clang-format on
static constexpr uc32 kEndOfInputU = static_cast<uc32>(kEndOfInput);
private:
Utf16CharacterStream* stream_;
token_t token_;
token_t preceding_token_;
token_t next_token_; // Only set when in {rewind} state.
size_t position_; // Corresponds to {token} position.
size_t preceding_position_; // Corresponds to {preceding_token} position.
size_t next_position_; // Only set when in {rewind} state.
bool rewind_;
std::string identifier_string_;
bool in_local_scope_;
std::unordered_map<std::string, token_t> local_names_;
std::unordered_map<std::string, token_t> global_names_;
std::unordered_map<std::string, token_t> property_names_;
int global_count_;
double double_value_;
uint32_t unsigned_value_;
bool preceded_by_newline_;
// Consume multiple characters.
void ConsumeIdentifier(uc32 ch);
void ConsumeNumber(uc32 ch);
bool ConsumeCComment();
void ConsumeCPPComment();
void ConsumeString(uc32 quote);
void ConsumeCompareOrShift(uc32 ch);
// Classify character categories.
bool IsIdentifierStart(uc32 ch);
bool IsIdentifierPart(uc32 ch);
bool IsNumberStart(uc32 ch);
};
} // namespace internal
} // namespace v8
#endif // V8_ASMJS_ASM_SCANNER_H_