| //===-- GoLexer.cpp ---------------------------------------------*- C++ -*-===// |
| // |
| // The LLVM Compiler Infrastructure |
| // |
| // This file is distributed under the University of Illinois Open Source |
| // License. See LICENSE.TXT for details. |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #include <string.h> |
| |
| #include "GoLexer.h" |
| |
| using namespace lldb_private; |
| |
| llvm::StringMap<GoLexer::TokenType> *GoLexer::m_keywords; |
| |
| GoLexer::GoLexer(const char *src) |
| : m_src(src), m_end(src + strlen(src)), m_last_token(TOK_INVALID, "") {} |
| |
| bool GoLexer::SkipWhitespace() { |
| bool saw_newline = false; |
| for (; m_src < m_end; ++m_src) { |
| if (*m_src == '\n') |
| saw_newline = true; |
| if (*m_src == '/' && !SkipComment()) |
| return saw_newline; |
| else if (!IsWhitespace(*m_src)) |
| return saw_newline; |
| } |
| return saw_newline; |
| } |
| |
| bool GoLexer::SkipComment() { |
| if (m_src[0] == '/' && m_src[1] == '/') { |
| for (const char *c = m_src + 2; c < m_end; ++c) { |
| if (*c == '\n') { |
| m_src = c - 1; |
| return true; |
| } |
| } |
| return true; |
| } else if (m_src[0] == '/' && m_src[1] == '*') { |
| for (const char *c = m_src + 2; c < m_end; ++c) { |
| if (c[0] == '*' && c[1] == '/') { |
| m_src = c + 1; |
| return true; |
| } |
| } |
| } |
| return false; |
| } |
| |
| const GoLexer::Token &GoLexer::Lex() { |
| bool newline = SkipWhitespace(); |
| const char *start = m_src; |
| m_last_token.m_type = InternalLex(newline); |
| m_last_token.m_value = llvm::StringRef(start, m_src - start); |
| return m_last_token; |
| } |
| |
| GoLexer::TokenType GoLexer::InternalLex(bool newline) { |
| if (m_src >= m_end) { |
| return TOK_EOF; |
| } |
| if (newline) { |
| switch (m_last_token.m_type) { |
| case TOK_IDENTIFIER: |
| case LIT_FLOAT: |
| case LIT_IMAGINARY: |
| case LIT_INTEGER: |
| case LIT_RUNE: |
| case LIT_STRING: |
| case KEYWORD_BREAK: |
| case KEYWORD_CONTINUE: |
| case KEYWORD_FALLTHROUGH: |
| case KEYWORD_RETURN: |
| case OP_PLUS_PLUS: |
| case OP_MINUS_MINUS: |
| case OP_RPAREN: |
| case OP_RBRACK: |
| case OP_RBRACE: |
| return OP_SEMICOLON; |
| default: |
| break; |
| } |
| } |
| char c = *m_src; |
| switch (c) { |
| case '0': |
| case '1': |
| case '2': |
| case '3': |
| case '4': |
| case '5': |
| case '6': |
| case '7': |
| case '8': |
| case '9': |
| return DoNumber(); |
| case '+': |
| case '-': |
| case '*': |
| case '/': |
| case '%': |
| case '&': |
| case '|': |
| case '^': |
| case '<': |
| case '>': |
| case '!': |
| case ':': |
| case ';': |
| case '(': |
| case ')': |
| case '[': |
| case ']': |
| case '{': |
| case '}': |
| case ',': |
| case '=': |
| return DoOperator(); |
| case '.': |
| if (IsDecimal(m_src[1])) |
| return DoNumber(); |
| return DoOperator(); |
| case '$': |
| // For lldb persistent vars. |
| return DoIdent(); |
| case '"': |
| case '`': |
| return DoString(); |
| case '\'': |
| return DoRune(); |
| default: |
| break; |
| } |
| if (IsLetterOrDigit(c)) |
| return DoIdent(); |
| ++m_src; |
| return TOK_INVALID; |
| } |
| |
| GoLexer::TokenType GoLexer::DoOperator() { |
| TokenType t = TOK_INVALID; |
| if (m_end - m_src > 2) { |
| t = LookupKeyword(llvm::StringRef(m_src, 3)); |
| if (t != TOK_INVALID) |
| m_src += 3; |
| } |
| if (t == TOK_INVALID && m_end - m_src > 1) { |
| t = LookupKeyword(llvm::StringRef(m_src, 2)); |
| if (t != TOK_INVALID) |
| m_src += 2; |
| } |
| if (t == TOK_INVALID) { |
| t = LookupKeyword(llvm::StringRef(m_src, 1)); |
| ++m_src; |
| } |
| return t; |
| } |
| |
| GoLexer::TokenType GoLexer::DoIdent() { |
| const char *start = m_src++; |
| while (m_src < m_end && IsLetterOrDigit(*m_src)) { |
| ++m_src; |
| } |
| TokenType kw = LookupKeyword(llvm::StringRef(start, m_src - start)); |
| if (kw != TOK_INVALID) |
| return kw; |
| return TOK_IDENTIFIER; |
| } |
| |
| GoLexer::TokenType GoLexer::DoNumber() { |
| if (m_src[0] == '0' && (m_src[1] == 'x' || m_src[1] == 'X')) { |
| m_src += 2; |
| while (IsHexChar(*m_src)) |
| ++m_src; |
| return LIT_INTEGER; |
| } |
| bool dot_ok = true; |
| bool e_ok = true; |
| while (true) { |
| while (IsDecimal(*m_src)) |
| ++m_src; |
| switch (*m_src) { |
| case 'i': |
| ++m_src; |
| return LIT_IMAGINARY; |
| case '.': |
| if (!dot_ok) |
| return LIT_FLOAT; |
| ++m_src; |
| dot_ok = false; |
| break; |
| case 'e': |
| case 'E': |
| if (!e_ok) |
| return LIT_FLOAT; |
| dot_ok = e_ok = false; |
| ++m_src; |
| if (*m_src == '+' || *m_src == '-') |
| ++m_src; |
| break; |
| default: |
| if (dot_ok) |
| return LIT_INTEGER; |
| return LIT_FLOAT; |
| } |
| } |
| } |
| |
| GoLexer::TokenType GoLexer::DoRune() { |
| while (++m_src < m_end) { |
| switch (*m_src) { |
| case '\'': |
| ++m_src; |
| return LIT_RUNE; |
| case '\n': |
| return TOK_INVALID; |
| case '\\': |
| if (m_src[1] == '\n') |
| return TOK_INVALID; |
| ++m_src; |
| } |
| } |
| return TOK_INVALID; |
| } |
| |
| GoLexer::TokenType GoLexer::DoString() { |
| if (*m_src == '`') { |
| while (++m_src < m_end) { |
| if (*m_src == '`') { |
| ++m_src; |
| return LIT_STRING; |
| } |
| } |
| return TOK_INVALID; |
| } |
| while (++m_src < m_end) { |
| switch (*m_src) { |
| case '"': |
| ++m_src; |
| return LIT_STRING; |
| case '\n': |
| return TOK_INVALID; |
| case '\\': |
| if (m_src[1] == '\n') |
| return TOK_INVALID; |
| ++m_src; |
| } |
| } |
| return TOK_INVALID; |
| } |
| |
| GoLexer::TokenType GoLexer::LookupKeyword(llvm::StringRef id) { |
| if (m_keywords == nullptr) |
| m_keywords = InitKeywords(); |
| const auto &it = m_keywords->find(id); |
| if (it == m_keywords->end()) |
| return TOK_INVALID; |
| return it->second; |
| } |
| |
| llvm::StringRef GoLexer::LookupToken(TokenType t) { |
| if (m_keywords == nullptr) |
| m_keywords = InitKeywords(); |
| for (const auto &e : *m_keywords) { |
| if (e.getValue() == t) |
| return e.getKey(); |
| } |
| return ""; |
| } |
| |
| llvm::StringMap<GoLexer::TokenType> *GoLexer::InitKeywords() { |
| auto &result = *new llvm::StringMap<TokenType>(128); |
| result["break"] = KEYWORD_BREAK; |
| result["default"] = KEYWORD_DEFAULT; |
| result["func"] = KEYWORD_FUNC; |
| result["interface"] = KEYWORD_INTERFACE; |
| result["select"] = KEYWORD_SELECT; |
| result["case"] = KEYWORD_CASE; |
| result["defer"] = KEYWORD_DEFER; |
| result["go"] = KEYWORD_GO; |
| result["map"] = KEYWORD_MAP; |
| result["struct"] = KEYWORD_STRUCT; |
| result["chan"] = KEYWORD_CHAN; |
| result["else"] = KEYWORD_ELSE; |
| result["goto"] = KEYWORD_GOTO; |
| result["package"] = KEYWORD_PACKAGE; |
| result["switch"] = KEYWORD_SWITCH; |
| result["const"] = KEYWORD_CONST; |
| result["fallthrough"] = KEYWORD_FALLTHROUGH; |
| result["if"] = KEYWORD_IF; |
| result["range"] = KEYWORD_RANGE; |
| result["type"] = KEYWORD_TYPE; |
| result["continue"] = KEYWORD_CONTINUE; |
| result["for"] = KEYWORD_FOR; |
| result["import"] = KEYWORD_IMPORT; |
| result["return"] = KEYWORD_RETURN; |
| result["var"] = KEYWORD_VAR; |
| result["+"] = OP_PLUS; |
| result["-"] = OP_MINUS; |
| result["*"] = OP_STAR; |
| result["/"] = OP_SLASH; |
| result["%"] = OP_PERCENT; |
| result["&"] = OP_AMP; |
| result["|"] = OP_PIPE; |
| result["^"] = OP_CARET; |
| result["<<"] = OP_LSHIFT; |
| result[">>"] = OP_RSHIFT; |
| result["&^"] = OP_AMP_CARET; |
| result["+="] = OP_PLUS_EQ; |
| result["-="] = OP_MINUS_EQ; |
| result["*="] = OP_STAR_EQ; |
| result["/="] = OP_SLASH_EQ; |
| result["%="] = OP_PERCENT_EQ; |
| result["&="] = OP_AMP_EQ; |
| result["|="] = OP_PIPE_EQ; |
| result["^="] = OP_CARET_EQ; |
| result["<<="] = OP_LSHIFT_EQ; |
| result[">>="] = OP_RSHIFT_EQ; |
| result["&^="] = OP_AMP_CARET_EQ; |
| result["&&"] = OP_AMP_AMP; |
| result["||"] = OP_PIPE_PIPE; |
| result["<-"] = OP_LT_MINUS; |
| result["++"] = OP_PLUS_PLUS; |
| result["--"] = OP_MINUS_MINUS; |
| result["=="] = OP_EQ_EQ; |
| result["<"] = OP_LT; |
| result[">"] = OP_GT; |
| result["="] = OP_EQ; |
| result["!"] = OP_BANG; |
| result["!="] = OP_BANG_EQ; |
| result["<="] = OP_LT_EQ; |
| result[">="] = OP_GT_EQ; |
| result[":="] = OP_COLON_EQ; |
| result["..."] = OP_DOTS; |
| result["("] = OP_LPAREN; |
| result["["] = OP_LBRACK; |
| result["{"] = OP_LBRACE; |
| result[","] = OP_COMMA; |
| result["."] = OP_DOT; |
| result[")"] = OP_RPAREN; |
| result["]"] = OP_RBRACK; |
| result["}"] = OP_RBRACE; |
| result[";"] = OP_SEMICOLON; |
| result[":"] = OP_COLON; |
| return &result; |
| } |