| // Copyright 2019 the V8 project authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #ifndef V8_CCTEST_DISASM_REGEX_HELPER_H_ |
| #define V8_CCTEST_DISASM_REGEX_HELPER_H_ |
| |
| #include <iostream> |
| #include <map> |
| #include <regex> // NOLINT(build/c++11) |
| #include <vector> |
| |
| #include "src/base/logging.h" |
| #include "src/base/macros.h" |
| |
| namespace v8 { |
| namespace internal { |
| |
| // This class provides methods for regular expression matching with an extra |
| // feature of user defined named capture groups which are alive across |
| // regex search calls. |
| // |
| // The main use case for the class is to test multiple-line assembly |
| // output with an ability to express dataflow or dependencies by allowing single |
| // definition / multiple use symbols. When processing output lines and trying to |
| // match them against the set of patterns a user can define a named group - a |
| // symbol - and a regex for matching it. If the regex with the definitions is |
| // matched then whenever this symbol appears again (no redefinitions though) in |
| // the following patterns the parser will replace the symbol reference in the |
| // pattern by an actual literal value matched during processing symbol |
| // definition. This effectively checks that all of the output lines have |
| // the same literal for the described symbol. To track the symbols this class |
| // implements a simple single-definition symbol table. |
| // |
| // Example: Lets consider a case when we want to test that the assembly |
| // output consists of two instructions - a load and a store; we also want |
| // to check that the loaded value is used as store value for the store, |
| // like here: |
| // |
| // ldr x3, [x4] |
| // str x3, [x5] |
| // |
| // Using special syntax for symbol definitions and uses one could write the |
| // following regex making sure that the load register is used by the store: |
| // |
| // 'ldr <<NamedReg:x[0-9]+>>, [x[0-9]+]' |
| // 'str <<NamedReg>>, [x[0-9]+]' |
| // |
| // See 'ProcessPattern' for more details. |
| class RegexParser { |
| public: |
| RegexParser() |
| // Regex to parse symbol references: definitions or uses. |
| // <<SymbolName[:'def regex']>> |
| : symbol_ref_regex_("<<([a-zA-Z_][a-zA-Z0-9_]*)(?::(.*?))?>>") {} |
| |
| // Status codes used for return values and error diagnostics. |
| enum class Status { |
| kSuccess = 0, |
| kNotMatched, |
| kWrongPattern, |
| kDefNotFound, |
| kRedefinition, |
| }; |
| |
| // This class holds info on a symbol definition. |
| class SymbolInfo { |
| public: |
| explicit SymbolInfo(const std::string& matched_value) |
| : matched_value_(matched_value) {} |
| |
| // Returns an actual matched value for the symbol. |
| const std::string& matched_value() const { return matched_value_; } |
| |
| private: |
| std::string matched_value_; |
| }; |
| |
| // This class holds temporary info on a symbol while processing an input line. |
| class SymbolVectorElem { |
| public: |
| SymbolVectorElem(bool is_def, const std::string& symbol_name) |
| : is_def_(is_def), symbol_name_(symbol_name) {} |
| |
| bool is_def() const { return is_def_; } |
| const std::string& symbol_name() const { return symbol_name_; } |
| |
| private: |
| bool is_def_; |
| std::string symbol_name_; |
| }; |
| |
| using SymbolMap = std::map<std::string, SymbolInfo>; |
| using MatchVector = std::vector<SymbolVectorElem>; |
| |
| // Tries to match (actually search, similar to std::regex_serach) the line |
| // against the pattern (possibly containing symbols references) and if |
| // matched commits symbols definitions from the pattern to the symbol table. |
| // |
| // Returns: status of the matching attempt. |
| // |
| // Important: the format of pattern regexs is based on std::ECMAScript syntax |
| // (http://www.cplusplus.com/reference/regex/ECMAScript/) with a few extra |
| // restrictions: |
| // * no backreference (or submatch) groups |
| // - when a group (e.g. "(a|b)+") is needed use a passive group |
| // (e.g. "(?:a|b)+"). |
| // * special syntax for symbol definitions: <<Name:regex>> |
| // - 'Name' must be c-ctyle variable name ([a-zA-Z_][a-zA-Z0-9_]*). |
| // - 'regex' - is a regex for the actual literal expected in the symbol |
| // definition line. It must not contain any symbol references. |
| // * special syntax for symbol uses <<Name>> |
| // |
| // Semantical restrictions on symbols references: |
| // * symbols mustn't be referenced before they are defined. |
| // - a pattern R1 which uses symbol 'A' mustn't be processed if a pattern |
| // R2 with the symbol 'A' definition hasn't been yet matched (R1!=R2). |
| // - A pattern mustn't define a symbol and use it inside the same regex. |
| // * symbols mustn't be redefined. |
| // - if a line has been matched against a pattern R1 with symbol 'A' |
| // then other patterns mustn't define symbol 'A'. |
| // * symbols defininitions are only committed and registered if the whole |
| // pattern is successfully matched. |
| // |
| // Notes: |
| // * A pattern may contain uses of the same or different symbols and |
| // definitions of different symbols however if a symbol is defined in the |
| // pattern it can't be used in the same pattern. |
| // |
| // Pattern example: "<<A:[0-9]+>> <<B>>, <<B> <<C:[a-z]+>>" (assuming 'B' is |
| // defined and matched). |
| Status ProcessPattern(const std::string& line, const std::string& pattern) { |
| // Processed pattern which is going to be used for std::regex_search; symbol |
| // references are replaced accordingly to the reference type - def or use. |
| std::string final_pattern; |
| // A vector of records for symbols references in the pattern. The format is |
| // {is_definition, symbol_name}. |
| MatchVector symbols_refs; |
| Status status = |
| ParseSymbolsInPattern(pattern, &final_pattern, &symbols_refs); |
| if (status != Status::kSuccess) { |
| return status; |
| } |
| |
| std::smatch match; |
| if (!std::regex_search(line, match, std::regex(final_pattern))) { |
| return Status::kNotMatched; |
| } |
| |
| // This checks that no backreference groups were used in the pattern except |
| // for those added by ParseSymbolsInPattern. |
| if (symbols_refs.size() != (match.size() - 1)) { |
| return Status::kWrongPattern; |
| } |
| |
| status = CheckSymbolsMatchedValues(symbols_refs, match); |
| if (status != Status::kSuccess) { |
| return status; |
| } |
| |
| CommitSymbolsDefinitions(symbols_refs, match); |
| |
| return Status::kSuccess; |
| } |
| |
| // Returns whether a symbol is defined in the symbol name. |
| bool IsSymbolDefined(const std::string& symbol_name) const { |
| auto symbol_map_iter = map_.find(symbol_name); |
| return symbol_map_iter != std::end(map_); |
| } |
| |
| // Returns the matched value for a symbol. |
| std::string GetSymbolMatchedValue(const std::string& symbol_name) const { |
| DCHECK(IsSymbolDefined(symbol_name)); |
| return map_.find(symbol_name)->second.matched_value(); |
| } |
| |
| // Prints the symbol table. |
| void PrintSymbols(std::ostream& os) const { |
| os << "Printing symbol table..." << std::endl; |
| for (const auto& t : map_) { |
| const std::string& sym_name = t.first; |
| const SymbolInfo& sym_info = t.second; |
| os << "<<" << sym_name << ">>: \"" << sym_info.matched_value() << "\"" |
| << std::endl; |
| } |
| } |
| |
| protected: |
| // Fixed layout for the symbol reference match. |
| enum SymbolMatchIndex { |
| kFullSubmatch = 0, |
| kName = 1, |
| kDefRegex = 2, |
| kSize = kDefRegex + 1, |
| }; |
| |
| // Processes a symbol reference: for definitions it adds the symbol regex, for |
| // uses it adds actual literal from a previously matched definition. Also |
| // fills the symbol references vector. |
| Status ProcessSymbol(const std::smatch& match, MatchVector* symbols_refs, |
| std::string* new_pattern) const { |
| bool is_def = match[SymbolMatchIndex::kDefRegex].length() != 0; |
| const std::string& symbol_name = match[SymbolMatchIndex::kName]; |
| |
| if (is_def) { |
| // Make sure the symbol isn't already defined. |
| auto symbol_iter = |
| std::find_if(symbols_refs->begin(), symbols_refs->end(), |
| [symbol_name](const SymbolVectorElem& ref) -> bool { |
| return ref.symbol_name() == symbol_name; |
| }); |
| if (symbol_iter != std::end(*symbols_refs)) { |
| return Status::kRedefinition; |
| } |
| |
| symbols_refs->emplace_back(true, symbol_name); |
| new_pattern->append("("); |
| new_pattern->append(match[SymbolMatchIndex::kDefRegex]); |
| new_pattern->append(")"); |
| } else { |
| auto symbol_map_iter = map_.find(symbol_name); |
| if (symbol_map_iter == std::end(map_)) { |
| return Status::kDefNotFound; |
| } |
| |
| const SymbolInfo& sym_info = symbol_map_iter->second; |
| new_pattern->append("("); |
| new_pattern->append(sym_info.matched_value()); |
| new_pattern->append(")"); |
| |
| symbols_refs->emplace_back(false, symbol_name); |
| } |
| return Status::kSuccess; |
| } |
| |
| // Parses the input pattern regex, processes symbols defs and uses inside |
| // it, fills a raw pattern used for std::regex_search. |
| Status ParseSymbolsInPattern(const std::string& pattern, |
| std::string* raw_pattern, |
| MatchVector* symbols_refs) const { |
| std::string::const_iterator low = pattern.cbegin(); |
| std::string::const_iterator high = pattern.cend(); |
| std::smatch match; |
| |
| while (low != high) { |
| // Search for a symbol reference. |
| if (!std::regex_search(low, high, match, symbol_ref_regex_)) { |
| raw_pattern->append(low, high); |
| break; |
| } |
| |
| if (match.size() != SymbolMatchIndex::kSize) { |
| return Status::kWrongPattern; |
| } |
| |
| raw_pattern->append(match.prefix()); |
| |
| Status status = ProcessSymbol(match, symbols_refs, raw_pattern); |
| if (status != Status::kSuccess) { |
| return status; |
| } |
| low = match[SymbolMatchIndex::kFullSubmatch].second; |
| } |
| return Status::kSuccess; |
| } |
| |
| // Checks that there are no symbol redefinitions and the symbols uses matched |
| // literal values are equal to corresponding matched definitions. |
| Status CheckSymbolsMatchedValues(const MatchVector& symbols_refs, |
| const std::smatch& match) const { |
| // There is a one-to-one correspondence between matched subexpressions and |
| // symbols refences in the vector (by construction). |
| for (size_t vec_pos = 0, size = symbols_refs.size(); vec_pos < size; |
| vec_pos++) { |
| auto elem = symbols_refs[vec_pos]; |
| auto map_iter = map_.find(elem.symbol_name()); |
| if (elem.is_def()) { |
| if (map_iter != std::end(map_)) { |
| return Status::kRedefinition; |
| } |
| } else { |
| DCHECK(map_iter != std::end(map_)); |
| // We replaced use with matched definition value literal. |
| DCHECK_EQ(map_iter->second.matched_value().compare(match[vec_pos + 1]), |
| 0); |
| } |
| } |
| return Status::kSuccess; |
| } |
| |
| // Commits symbols definitions and their matched values to the symbol table. |
| void CommitSymbolsDefinitions(const MatchVector& groups_vector, |
| const std::smatch& match) { |
| for (size_t vec_pos = 0, size = groups_vector.size(); vec_pos < size; |
| vec_pos++) { |
| size_t match_pos = vec_pos + 1; |
| auto elem = groups_vector[vec_pos]; |
| if (elem.is_def()) { |
| auto emplace_res = |
| map_.emplace(elem.symbol_name(), SymbolInfo(match[match_pos])); |
| USE(emplace_res); // Silence warning about unused variable. |
| DCHECK(emplace_res.second == true); |
| } |
| } |
| } |
| |
| const std::regex symbol_ref_regex_; |
| SymbolMap map_; |
| }; |
| |
| bool CheckDisassemblyRegexPatterns( |
| const char* function_name, const std::vector<std::string>& patterns_array); |
| |
| } // namespace internal |
| } // namespace v8 |
| |
| #endif // V8_CCTEST_DISASM_REGEX_HELPER_H_ |