| // Copyright 2017 the V8 project authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #ifndef V8_ASMJS_ASM_SCANNER_H_ |
| #define V8_ASMJS_ASM_SCANNER_H_ |
| |
| #include <memory> |
| #include <string> |
| #include <unordered_map> |
| |
| #include "src/asmjs/asm-names.h" |
| #include "src/base/logging.h" |
| #include "src/common/globals.h" |
| |
| namespace v8 { |
| namespace internal { |
| |
| class Utf16CharacterStream; |
| |
| // A custom scanner to extract the token stream needed to parse valid |
| // asm.js: http://asmjs.org/spec/latest/ |
| // This scanner intentionally avoids the portion of JavaScript lexing |
| // that are not required to determine if code is valid asm.js code. |
| // * Strings are disallowed except for 'use asm'. |
| // * Only the subset of keywords needed to check asm.js invariants are |
| // included. |
| // * Identifiers are accumulated into local + global string tables |
| // (for performance). |
| class V8_EXPORT_PRIVATE AsmJsScanner { |
| public: |
| using token_t = int32_t; |
| |
| explicit AsmJsScanner(Utf16CharacterStream* stream); |
| |
| // Get current token. |
| token_t Token() const { return token_; } |
| // Get position of current token. |
| size_t Position() const { return position_; } |
| // Advance to the next token. |
| void Next(); |
| // Back up by one token. |
| void Rewind(); |
| |
| // Get raw string for current identifier. Note that the returned string will |
| // become invalid when the scanner advances, create a copy to preserve it. |
| const std::string& GetIdentifierString() const { |
| // Identifier strings don't work after a rewind. |
| DCHECK(!rewind_); |
| return identifier_string_; |
| } |
| |
| // Check if we just passed a newline. |
| bool IsPrecededByNewline() const { |
| // Newline tracking doesn't work if you back up. |
| DCHECK(!rewind_); |
| return preceded_by_newline_; |
| } |
| |
| #if DEBUG |
| // Debug only method to go from a token back to its name. |
| // Slow, only use for debugging. |
| std::string Name(token_t token) const; |
| #endif |
| |
| // Restores old position (token after that position). Note that it is not |
| // allowed to rewind right after a seek, because previous tokens are unknown. |
| void Seek(size_t pos); |
| |
| // Select whether identifiers are resolved in global or local scope, |
| // and which scope new identifiers are added to. |
| void EnterLocalScope() { in_local_scope_ = true; } |
| void EnterGlobalScope() { in_local_scope_ = false; } |
| // Drop all current local identifiers. |
| void ResetLocals(); |
| |
| // Methods to check if a token is an identifier and which scope. |
| bool IsLocal() const { return IsLocal(Token()); } |
| bool IsGlobal() const { return IsGlobal(Token()); } |
| static bool IsLocal(token_t token) { return token <= kLocalsStart; } |
| static bool IsGlobal(token_t token) { return token >= kGlobalsStart; } |
| // Methods to find the index position of an identifier (count starting from |
| // 0 for each scope separately). |
| static size_t LocalIndex(token_t token) { |
| DCHECK(IsLocal(token)); |
| return -(token - kLocalsStart); |
| } |
| static size_t GlobalIndex(token_t token) { |
| DCHECK(IsGlobal(token)); |
| return token - kGlobalsStart; |
| } |
| |
| // Methods to check if the current token is a numeric literal considered an |
| // asm.js "double" (contains a dot) or an "unsigned" (without a dot). Note |
| // that numbers without a dot outside the [0 .. 2^32) range are errors. |
| bool IsUnsigned() const { return Token() == kUnsigned; } |
| uint32_t AsUnsigned() const { |
| DCHECK(IsUnsigned()); |
| return unsigned_value_; |
| } |
| bool IsDouble() const { return Token() == kDouble; } |
| double AsDouble() const { |
| DCHECK(IsDouble()); |
| return double_value_; |
| } |
| |
| // clang-format off |
| enum { |
| // [-10000-kMaxIdentifierCount, -10000) :: Local identifiers (counting |
| // backwards) |
| // [-10000 .. -1) :: Builtin tokens like keywords |
| // (also includes some special |
| // ones like end of input) |
| // 0 .. 255 :: Single char tokens |
| // 256 .. 256+kMaxIdentifierCount :: Global identifiers |
| kLocalsStart = -10000, |
| #define V(name, _junk1, _junk2, _junk3) kToken_##name, |
| STDLIB_MATH_FUNCTION_LIST(V) |
| STDLIB_ARRAY_TYPE_LIST(V) |
| #undef V |
| #define V(name, _junk1) kToken_##name, |
| STDLIB_MATH_VALUE_LIST(V) |
| #undef V |
| #define V(name) kToken_##name, |
| STDLIB_OTHER_LIST(V) |
| KEYWORD_NAME_LIST(V) |
| #undef V |
| #define V(rawname, name) kToken_##name, |
| LONG_SYMBOL_NAME_LIST(V) |
| #undef V |
| #define V(name, value, string_name) name = value, |
| SPECIAL_TOKEN_LIST(V) |
| #undef V |
| kGlobalsStart = 256, |
| }; |
| // clang-format on |
| |
| static constexpr uc32 kEndOfInputU = static_cast<uc32>(kEndOfInput); |
| |
| private: |
| Utf16CharacterStream* stream_; |
| token_t token_; |
| token_t preceding_token_; |
| token_t next_token_; // Only set when in {rewind} state. |
| size_t position_; // Corresponds to {token} position. |
| size_t preceding_position_; // Corresponds to {preceding_token} position. |
| size_t next_position_; // Only set when in {rewind} state. |
| bool rewind_; |
| std::string identifier_string_; |
| bool in_local_scope_; |
| std::unordered_map<std::string, token_t> local_names_; |
| std::unordered_map<std::string, token_t> global_names_; |
| std::unordered_map<std::string, token_t> property_names_; |
| int global_count_; |
| double double_value_; |
| uint32_t unsigned_value_; |
| bool preceded_by_newline_; |
| |
| // Consume multiple characters. |
| void ConsumeIdentifier(uc32 ch); |
| void ConsumeNumber(uc32 ch); |
| bool ConsumeCComment(); |
| void ConsumeCPPComment(); |
| void ConsumeString(uc32 quote); |
| void ConsumeCompareOrShift(uc32 ch); |
| |
| // Classify character categories. |
| bool IsIdentifierStart(uc32 ch); |
| bool IsIdentifierPart(uc32 ch); |
| bool IsNumberStart(uc32 ch); |
| }; |
| |
| } // namespace internal |
| } // namespace v8 |
| |
| #endif // V8_ASMJS_ASM_SCANNER_H_ |