| // Copyright 2021 The Chromium Authors |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #include "base/debug/dwarf_line_no.h" |
| |
| #include "base/memory/raw_ref.h" |
| |
| #ifdef USE_SYMBOLIZE |
| #include <algorithm> |
| #include <cstdint> |
| #include <limits> |
| |
| #include <string.h> |
| #include <unistd.h> |
| |
| #include "base/debug/buffered_dwarf_reader.h" |
| #include "base/debug/stack_trace.h" |
| #include "base/memory/raw_ptr.h" |
| #include "base/third_party/symbolize/symbolize.h" |
| |
| namespace base { |
| namespace debug { |
| |
| namespace { |
| |
| constexpr uint64_t kMaxOffset = std::numeric_limits<uint64_t>::max(); |
| |
| // These numbers are suitable for most compilation units for chrome and |
| // content_shell. If a compilation unit has bigger number of directories or |
| // filenames, the additional directories/filenames will be ignored, and the |
| // stack frames pointing to these directories/filenames will not get line |
| // numbers. We can't set these numbers too big because they affect the size of |
| // ProgramInfo which is allocated in the stack. |
| constexpr int kMaxDirectories = 128; |
| constexpr size_t kMaxFilenames = 512; |
| |
| // DWARF-4 line number program header, section 6.2.4 |
| struct ProgramInfo { |
| uint64_t header_length; |
| uint64_t start_offset; |
| uint64_t end_offset; |
| uint8_t minimum_instruction_length; |
| uint8_t maximum_operations_per_instruction; |
| uint8_t default_is_stmt; |
| int8_t line_base; |
| uint8_t line_range; |
| uint8_t opcode_base; |
| uint8_t standard_opcode_lengths[256]; |
| uint8_t include_directories_table_offset; |
| uint8_t file_names_table_offset; |
| |
| // Store the directories as offsets. |
| int num_directories = 1; |
| uint64_t directory_offsets[kMaxDirectories]; |
| uint64_t directory_sizes[kMaxDirectories]; |
| |
| // Store the file number table offsets. |
| mutable unsigned int num_filenames = 1; |
| mutable uint64_t filename_offsets[kMaxFilenames]; |
| mutable uint8_t filename_dirs[kMaxFilenames]; |
| |
| unsigned int OpcodeToAdvance(uint8_t adjusted_opcode) const { |
| // Special opcodes advance line numbers by an amount based on line_range |
| // and opcode_base. This calculation is taken from 6.2.5.1. |
| return static_cast<unsigned int>(adjusted_opcode) / line_range; |
| } |
| }; |
| |
| // DWARF-4 line number program registers, section 6.2.2 |
| struct LineNumberRegisters { |
| // During the line number program evaluation, some instructions perform a |
| // "commit" which is when the registers have finished calculating a new row in |
| // the line-number table. This callback is executed and can be viewed as a |
| // iterator over all rows in the line number table. |
| class OnCommit { |
| public: |
| virtual void Do(LineNumberRegisters* registers) = 0; |
| }; |
| |
| raw_ptr<OnCommit> on_commit; |
| LineNumberRegisters(ProgramInfo info, OnCommit* on_commit) |
| : on_commit(on_commit), is_stmt(info.default_is_stmt) {} |
| |
| // Current program counter. |
| uintptr_t address = 0; |
| |
| // For VLIW architectures, the index of the operation in the VLIW instruction. |
| unsigned int op_index = 0; |
| |
| // Identifies the source file relating to the address in the DWARF File name |
| // table. |
| uint64_t file = 0; |
| |
| // Identifies the line number. Starts at 1. Can become 0 if instruction does |
| // not match any line in the file. |
| uint64_t line = 1; |
| |
| // Identifies the column within the source line. Starts at 1 though "0" |
| // also means "left edge" of the line. |
| uint64_t column = 0; |
| |
| // Boolean determining if this is a recommended spot for a breakpoint. |
| // Should be initialized by the program header. |
| bool is_stmt = false; |
| |
| // Indicates start of a basic block. |
| bool basic_block = false; |
| |
| // Indicates first byte after a sequence of machine instructions. |
| bool end_sequence = false; |
| |
| // Indicates this may be where execution should stop if trying to break for |
| // entering a function. |
| bool prologue_end = false; |
| |
| // Indicates this may be where execution should stop if trying to break for |
| // exiting a function. |
| bool epilogue_begin = false; |
| |
| // Identifier for the instruction set of the current address. |
| uint64_t isa = 0; |
| |
| // Identifies which block the current instruction belongs to. |
| uint64_t discriminator = 0; |
| |
| // Values from the previously committed line. See OnCommit interface for more |
| // details. This conceptually should be a copy of the whole |
| // LineNumberRegisters but since only 4 pieces of data are needed, hacking |
| // it inline was easier. |
| uintptr_t last_address = 0; |
| uint64_t last_file = 0; |
| uint64_t last_line = 0; |
| uint64_t last_column = 0; |
| |
| // This is the magical calculation for decompressing the line-number |
| // information. The `program_info` provides the parameters for the formula |
| // and the `op_advance` is the input value. See DWARF-4 sections 6.2.5.1 for |
| // the formula. |
| void OpAdvance(const ProgramInfo* program_info, uint64_t op_advance) { |
| address += program_info->minimum_instruction_length * |
| ((op_index + op_advance) / |
| program_info->maximum_operations_per_instruction); |
| |
| op_index = (op_index + op_advance) % |
| program_info->maximum_operations_per_instruction; |
| } |
| |
| // Committing a line means the calculation has landed on a stable set of |
| // values that represent an actual entry in the line number table. |
| void CommitLine() { |
| on_commit->Do(this); |
| |
| // Inlined or compiler generator code may have line number 0 which isn't |
| // useful to the user. Better to go up one line number. |
| if (line != 0) { |
| last_address = address; |
| last_file = file; |
| last_column = column; |
| last_line = line; |
| } |
| } |
| }; |
| |
| struct LineNumberInfo { |
| uint64_t pc = 0; |
| uint64_t line = 0; |
| uint64_t column = 0; |
| |
| // Offsets here are to the file table and directory table arrays inside the |
| // ProgramInfo. |
| uint64_t module_dir_offset = 0; |
| uint64_t dir_size = 0; |
| uint64_t module_filename_offset = 0; |
| }; |
| |
| // Evaluates a Line Number Program as defined by the rules in section 6.2.5. |
| void EvaluateLineNumberProgram(const int fd, |
| LineNumberInfo* info, |
| uint64_t base_address, |
| uint64_t start, |
| const ProgramInfo& program_info) { |
| BufferedDwarfReader reader(fd, start); |
| uint64_t module_relative_pc = info->pc - base_address; |
| |
| // Helper that records the line-number table entry corresponding with the |
| // `module_relative_pc`. This is the thing that actually finds the line |
| // number for an address. |
| struct OnCommitImpl : public LineNumberRegisters::OnCommit { |
| private: |
| raw_ptr<LineNumberInfo> info; |
| uint64_t module_relative_pc; |
| const raw_ref<const ProgramInfo> program_info; |
| |
| public: |
| OnCommitImpl(LineNumberInfo* info, |
| uint64_t module_relative_pc, |
| const ProgramInfo& program_info) |
| : info(info), |
| module_relative_pc(module_relative_pc), |
| program_info(program_info) {} |
| |
| void Do(LineNumberRegisters* registers) override { |
| // When a line is committed, the program counter needs to check if it is |
| // in the [last_address, cur_addres) range. If yes, then the line pertains |
| // to the program counter. |
| if (registers->last_address == 0) { |
| // This is the first table entry so by definition, nothing is in its |
| // range. |
| return; |
| } |
| |
| // If module_relative_pc is out of range, skip. |
| if (module_relative_pc < registers->last_address || |
| module_relative_pc >= registers->address) |
| return; |
| |
| if (registers->last_file < program_info->num_filenames) { |
| info->line = registers->last_line; |
| info->column = registers->last_column; |
| |
| // Since DW_AT_name in the compile_unit is optional, it may be empty. If |
| // it is, guess that the file in entry 1 is the name. This does not |
| // follow spec, but seems to be common behavior. See the following LLVM |
| // bug for more info: https://reviews.llvm.org/D11003 |
| if (registers->last_file == 0 && |
| program_info->filename_offsets[0] == 0 && |
| 1 < program_info->num_filenames) { |
| program_info->filename_offsets[0] = program_info->filename_offsets[1]; |
| program_info->filename_dirs[0] = program_info->filename_dirs[1]; |
| } |
| |
| if (registers->last_file < kMaxFilenames) { |
| info->module_filename_offset = |
| program_info->filename_offsets[registers->last_file]; |
| |
| uint8_t dir = program_info->filename_dirs[registers->last_file]; |
| info->module_dir_offset = program_info->directory_offsets[dir]; |
| info->dir_size = program_info->directory_sizes[dir]; |
| } |
| } |
| } |
| } on_commit(info, module_relative_pc, program_info); |
| |
| LineNumberRegisters registers(program_info, &on_commit); |
| |
| // Special opcode range is [program_info.opcode_base, 255]. |
| // Lines can be max incremented by [line_base + line range - 1]. |
| // opcode = (desired line increment - line_base) + (line_range * operation |
| // advance) + opcode_base. |
| uint8_t opcode; |
| while (reader.position() < program_info.end_offset && info->line == 0) { |
| if (!reader.ReadInt8(opcode)) |
| return; |
| |
| // It's SPECIAL OPCODE TIME!. They're so special that they make up the |
| // vast majority of the opcodes and are the first thing described in the |
| // documentation. |
| // |
| // See DWARF-4 spec 6.2.5.1. |
| if (opcode >= program_info.opcode_base) { |
| uint8_t adjusted_opcode = opcode - program_info.opcode_base; |
| registers.OpAdvance(&program_info, |
| program_info.OpcodeToAdvance(adjusted_opcode)); |
| const int line_adjust = |
| program_info.line_base + (adjusted_opcode % program_info.line_range); |
| if (line_adjust < 0) { |
| if (static_cast<uint64_t>(-line_adjust) > registers.line) |
| return; |
| registers.line -= static_cast<uint64_t>(-line_adjust); |
| } else { |
| registers.line += static_cast<uint64_t>(line_adjust); |
| } |
| registers.basic_block = false; |
| registers.prologue_end = false; |
| registers.epilogue_begin = false; |
| registers.discriminator = 0; |
| registers.CommitLine(); |
| } else { |
| // Standard opcodes |
| switch (opcode) { |
| case 0: { |
| // Extended opcode. |
| uint64_t extended_opcode; |
| uint64_t extended_opcode_length; |
| if (!reader.ReadLeb128(extended_opcode_length)) |
| return; |
| uint64_t next_opcode = reader.position() + extended_opcode_length; |
| if (!reader.ReadLeb128(extended_opcode)) |
| return; |
| switch (extended_opcode) { |
| case 1: { |
| // DW_LNE_end_sequence |
| registers.end_sequence = true; |
| registers.CommitLine(); |
| registers = LineNumberRegisters(program_info, &on_commit); |
| break; |
| } |
| |
| case 2: { |
| // DW_LNE_set_address |
| uint32_t value; |
| if (!reader.ReadInt32(value)) |
| return; |
| registers.address = value; |
| registers.op_index = 0; |
| break; |
| } |
| |
| case 3: { |
| // DW_LNE_define_file |
| // |
| // This should only get used if the filename table itself is null. |
| // Record the module offset for the string and then drop the data. |
| uint64_t filename_offset = reader.position(); |
| reader.ReadCString(program_info.end_offset, nullptr, 0); |
| |
| // dir index |
| uint64_t value; |
| if (!reader.ReadLeb128(value)) |
| return; |
| size_t cur_filename = program_info.num_filenames; |
| if (cur_filename < kMaxFilenames && value < kMaxDirectories) { |
| ++program_info.num_filenames; |
| // Store the offset from the start of file and skip the data to |
| // save memory. |
| program_info.filename_offsets[cur_filename] = filename_offset; |
| program_info.filename_dirs[cur_filename] = |
| static_cast<uint8_t>(value); |
| } |
| |
| // modification time |
| if (!reader.ReadLeb128(value)) |
| return; |
| |
| // source file length |
| if (!reader.ReadLeb128(value)) |
| return; |
| break; |
| } |
| |
| case 4: { |
| // DW_LNE_set_discriminator |
| uint64_t value; |
| if (!reader.ReadLeb128(value)) |
| return; |
| registers.discriminator = value; |
| break; |
| } |
| |
| default: |
| abort(); |
| } |
| |
| // Skip any padding bytes in extended opcode. |
| reader.set_position(next_opcode); |
| break; |
| } |
| |
| case 1: { |
| // DW_LNS_copy. This commits the registers to the line number table. |
| registers.CommitLine(); |
| registers.discriminator = 0; |
| registers.basic_block = false; |
| registers.prologue_end = false; |
| registers.epilogue_begin = false; |
| break; |
| } |
| |
| case 2: { |
| // DW_LNS_advance_pc |
| uint64_t op_advance; |
| if (!reader.ReadLeb128(op_advance)) |
| return; |
| registers.OpAdvance(&program_info, op_advance); |
| break; |
| } |
| |
| case 3: { |
| // DW_LNS_advance_line |
| int64_t line_advance; |
| if (!reader.ReadLeb128(line_advance)) |
| return; |
| if (line_advance < 0) { |
| if (static_cast<uint64_t>(-line_advance) > registers.line) |
| return; |
| registers.line -= static_cast<uint64_t>(-line_advance); |
| } else { |
| registers.line += static_cast<uint64_t>(line_advance); |
| } |
| break; |
| } |
| |
| case 4: { |
| // DW_LNS_set_file |
| uint64_t value; |
| if (!reader.ReadLeb128(value)) |
| return; |
| registers.file = value; |
| break; |
| } |
| |
| case 5: { |
| // DW_LNS_set_column |
| uint64_t value; |
| if (!reader.ReadLeb128(value)) |
| return; |
| registers.column = value; |
| break; |
| } |
| |
| case 6: |
| // DW_LNS_negate_stmt |
| registers.is_stmt = !registers.is_stmt; |
| break; |
| |
| case 7: |
| // DW_LNS_set_basic_block |
| registers.basic_block = true; |
| break; |
| |
| case 8: |
| // DW_LNS_const_add_pc |
| registers.OpAdvance( |
| &program_info, |
| program_info.OpcodeToAdvance(255 - program_info.opcode_base)); |
| break; |
| |
| case 9: { |
| // DW_LNS_fixed_advance_pc |
| uint16_t value; |
| if (!reader.ReadInt16(value)) |
| return; |
| registers.address += value; |
| registers.op_index = 0; |
| break; |
| } |
| |
| case 10: |
| // DW_LNS_set_prologue_end |
| registers.prologue_end = true; |
| break; |
| |
| case 11: |
| // DW_LNS_set_epilogue_begin |
| registers.epilogue_begin = true; |
| break; |
| |
| case 12: { |
| // DW_LNS_set_isa |
| uint64_t value; |
| if (!reader.ReadLeb128(value)) |
| return; |
| registers.isa = value; |
| break; |
| } |
| |
| default: |
| abort(); |
| } |
| } |
| } |
| } |
| |
| // Parses a 32-bit DWARF-4 line number program header per section 6.2.4. |
| // `cu_name_offset` is the module offset for the 0th entry of the file table. |
| bool ParseDwarf4ProgramInfo(BufferedDwarfReader* reader, |
| bool is_64bit, |
| uint64_t cu_name_offset, |
| ProgramInfo* program_info) { |
| if (!reader->ReadOffset(is_64bit, program_info->header_length)) |
| return false; |
| program_info->start_offset = reader->position() + program_info->header_length; |
| |
| if (!reader->ReadInt8(program_info->minimum_instruction_length) || |
| !reader->ReadInt8(program_info->maximum_operations_per_instruction) || |
| !reader->ReadInt8(program_info->default_is_stmt) || |
| !reader->ReadInt8(program_info->line_base) || |
| !reader->ReadInt8(program_info->line_range) || |
| !reader->ReadInt8(program_info->opcode_base)) { |
| return false; |
| } |
| |
| for (int i = 0; i < (program_info->opcode_base - 1); i++) { |
| if (!reader->ReadInt8(program_info->standard_opcode_lengths[i])) |
| return false; |
| } |
| |
| // Table ends with a single null line. This basically means search for 2 |
| // contiguous empty bytes. |
| uint8_t last = 0, cur = 0; |
| for (;;) { |
| // Read a byte. |
| last = cur; |
| if (!reader->ReadInt8(cur)) |
| return false; |
| |
| if (last == 0 && cur == 0) { |
| // We're at the last entry where it's a double null. |
| break; |
| } |
| |
| // Read in all of the filename. |
| int cur_dir = program_info->num_directories; |
| if (cur_dir < kMaxDirectories) { |
| ++program_info->num_directories; |
| // "-1" is because we have already read the first byte above. |
| program_info->directory_offsets[cur_dir] = reader->position() - 1; |
| program_info->directory_sizes[cur_dir] = 1; |
| } |
| do { |
| if (!reader->ReadInt8(cur)) |
| return false; |
| if (cur_dir < kMaxDirectories) |
| ++program_info->directory_sizes[cur_dir]; |
| } while (cur != '\0'); |
| } |
| |
| // Read filename table line-by-line. |
| last = 0; |
| cur = 0; |
| for (;;) { |
| // Read a byte. |
| last = cur; |
| if (!reader->ReadInt8(cur)) |
| return false; |
| |
| if (last == 0 && cur == 0) { |
| // We're at the last entry where it's a double null. |
| break; |
| } |
| |
| // Read in all of the filename. "-1" is because we have already read the |
| // first byte of the filename above. |
| uint64_t filename_offset = reader->position() - 1; |
| do { |
| if (!reader->ReadInt8(cur)) |
| return false; |
| } while (cur != '\0'); |
| |
| uint64_t value; |
| |
| // Dir index |
| if (!reader->ReadLeb128(value)) |
| return false; |
| size_t cur_filename = program_info->num_filenames; |
| if (cur_filename < kMaxFilenames && value < kMaxDirectories) { |
| ++program_info->num_filenames; |
| program_info->filename_offsets[cur_filename] = filename_offset; |
| program_info->filename_dirs[cur_filename] = static_cast<uint8_t>(value); |
| } |
| |
| // Modification time |
| if (!reader->ReadLeb128(value)) |
| return false; |
| |
| // Bytes in file. |
| if (!reader->ReadLeb128(value)) |
| return false; |
| } |
| |
| // Set up the 0th filename. |
| program_info->filename_offsets[0] = cu_name_offset; |
| program_info->filename_dirs[0] = 0; |
| program_info->directory_offsets[0] = 0; |
| |
| return true; |
| } |
| |
| // Returns the offset of the next byte to read. |
| // `program_info.program_end` is guaranteed to be initlialized to either |
| // `kMaxOffset` if the program length could not be processed, or to |
| // the byte after the end of this program. |
| bool ReadProgramInfo(const int fd, |
| uint64_t start, |
| uint64_t cu_name_offset, |
| ProgramInfo* program_info) { |
| BufferedDwarfReader reader(fd, start); |
| program_info->end_offset = kMaxOffset; |
| |
| // Note that 64-bit dwarf does NOT imply a 64-bit binary and vice-versa. In |
| // fact many 64-bit binaries use 32-bit dwarf encoding. |
| bool is_64bit = false; |
| uint64_t data_length; |
| if (!reader.ReadInitialLength(is_64bit, data_length)) { |
| return false; |
| } |
| |
| // Set the program end. This allows the search to recover by skipping an |
| // unparsable program. |
| program_info->end_offset = reader.position() + data_length; |
| |
| uint16_t version; |
| if (!reader.ReadInt16(version)) { |
| return false; |
| } |
| |
| if (version == 4) { |
| return ParseDwarf4ProgramInfo(&reader, is_64bit, cu_name_offset, |
| program_info); |
| } |
| |
| // Currently does not support other DWARF versions. |
| return false; |
| } |
| |
| // Attempts to find line-number info for all of |info|. Returns the number of |
| // entries that do not have info yet. |
| uint64_t GetLineNumbersInProgram(const int fd, |
| LineNumberInfo* info, |
| uint64_t base_address, |
| uint64_t start, |
| uint64_t cu_name_offset) { |
| // Open the program. |
| ProgramInfo program_info; |
| if (ReadProgramInfo(fd, start, cu_name_offset, &program_info)) { |
| EvaluateLineNumberProgram(fd, info, base_address, program_info.start_offset, |
| program_info); |
| } |
| |
| return program_info.end_offset; |
| } |
| |
| // Scans the .debug_abbrev entry until it finds the Attribute List matching the |
| // `wanted_abbreviation_code`. This is called when parsing a DIE in .debug_info. |
| bool AdvancedReaderToAttributeList(BufferedDwarfReader& reader, |
| uint64_t table_end, |
| uint64_t wanted_abbreviation_code, |
| uint64_t& tag, |
| bool& has_children) { |
| // Abbreviation Table entries are: |
| // LEB128 - abbreviation code |
| // LEB128 - the entry's tag |
| // 1 byte - DW_CHILDREN_yes or DW_CHILDREN_no for if entry has children. |
| // [LEB128, LEB128] -- repeated set of attribute + form values in LEB128 |
| // [0, 0] -- null entry terminating list is 2 LEB128 0s. |
| while (reader.position() < table_end) { |
| uint64_t abbreviation_code; |
| if (!reader.ReadLeb128(abbreviation_code)) { |
| return false; |
| } |
| |
| if (!reader.ReadLeb128(tag)) { |
| return false; |
| } |
| |
| uint8_t raw_has_children; |
| if (!reader.ReadInt8(raw_has_children)) { |
| return false; |
| } |
| if (raw_has_children == 0) { |
| has_children = false; |
| } else if (raw_has_children == 1) { |
| has_children = true; |
| } else { |
| return false; |
| } |
| |
| if (abbreviation_code == wanted_abbreviation_code) { |
| return true; |
| } |
| |
| // Incorrect Abbreviation entry. Skip all of its attributes. |
| uint64_t attr; |
| uint64_t form; |
| do { |
| if (!reader.ReadLeb128(attr) || !reader.ReadLeb128(form)) { |
| return false; |
| } |
| } while (attr != 0 || form != 0); |
| } |
| |
| return false; |
| } |
| |
| // This reads through a .debug_info compile unit entry to try and extract |
| // the `cu_name_offset` as well as the `debug_line_offset` (offset into the |
| // .debug_lines table` corresponding to `pc`. |
| // |
| // The .debug_info sections are a packed set of bytes whose format is defined |
| // by a corresponding .debug_abbrev entry. Basically .debug_abbrev describes |
| // a struct and .debug_info has a header that tells which struct it is followed |
| // by a bunch of bytes. |
| // |
| // The control flow is to find the .debug_abbrev entry for each .debug_info |
| // entry, then walk through the .debug_abbrev entry to parse the bytes of the |
| // .debug_info entry. A successful parse calculates the address range that the |
| // .debug_info entry covers. When that is retrieved, `pc` can be compared to |
| // the range and a corresponding .debug_info can be found. |
| // |
| // The `debug_info_start` be the start of the whole .debug_info section or an |
| // offset into the section if it was known ahead of time (perhaps by consulting |
| // .debug_aranges). |
| // |
| // To fully interpret this data, the .debug_ranges and .debug_str sections |
| // also need to be interpreted. |
| bool GetCompileUnitName(int fd, |
| uint64_t debug_info_start, |
| uint64_t debug_info_end, |
| uint64_t pc, |
| uint64_t module_base_address, |
| uint64_t* debug_line_offset, |
| uint64_t* cu_name_offset) { |
| // Ensure defined `cu_name_offset` in case DW_AT_name is missing. |
| *cu_name_offset = 0; |
| |
| // Open .debug_info and .debug_abbrev as both are needed to find the |
| // DW_AT_name for the DW_TAG_compile_unit or DW_TAG_partial_unit |
| // corresponding to the given address. |
| |
| ElfW(Shdr) debug_abbrev; |
| constexpr static char kDebugAbbrevSectionName[] = ".debug_abbrev"; |
| if (!google::GetSectionHeaderByName(fd, kDebugAbbrevSectionName, |
| sizeof(kDebugAbbrevSectionName), |
| &debug_abbrev)) { |
| return false; |
| } |
| uint64_t debug_abbrev_end = debug_abbrev.sh_offset + debug_abbrev.sh_size; |
| |
| ElfW(Shdr) debug_str; |
| constexpr static char kDebugStrSectionName[] = ".debug_str"; |
| if (!google::GetSectionHeaderByName( |
| fd, kDebugStrSectionName, sizeof(kDebugStrSectionName), &debug_str)) { |
| return false; |
| } |
| uint64_t debug_str_end = debug_str.sh_offset + debug_str.sh_size; |
| |
| ElfW(Shdr) debug_ranges; |
| constexpr static char kDebugRangesSectionName[] = ".debug_ranges"; |
| if (!google::GetSectionHeaderByName(fd, kDebugRangesSectionName, |
| sizeof(kDebugRangesSectionName), |
| &debug_ranges)) { |
| return false; |
| } |
| uint64_t debug_ranges_end = debug_ranges.sh_offset + debug_ranges.sh_size; |
| |
| // Iterate Compile Units. |
| uint64_t next_compilation_unit = kMaxOffset; |
| for (BufferedDwarfReader reader(fd, debug_info_start); |
| reader.position() < debug_info_end; |
| reader.set_position(next_compilation_unit)) { |
| bool is_64bit; |
| uint64_t length; |
| uint16_t dwarf_version; |
| uint64_t abbrev_offset; |
| uint8_t address_size; |
| if (!reader.ReadCommonHeader(is_64bit, length, dwarf_version, abbrev_offset, |
| address_size, next_compilation_unit)) { |
| return false; |
| } |
| |
| // Compilation Unit Header parsed. Now read the first tag which is either a |
| // DW_TAG_compile_unit or DW_TAG_partial_unit. The entry type is designated |
| // by a LEB128 number that needs to be cross-referenced in the abbreviations |
| // table to understand the format of the rest of the entry. |
| uint64_t abbreviation_code; |
| if (!reader.ReadLeb128(abbreviation_code)) { |
| return false; |
| } |
| |
| // Find entry in the abbreviation table. |
| BufferedDwarfReader abbrev_reader(fd, |
| debug_abbrev.sh_offset + abbrev_offset); |
| uint64_t tag; |
| bool has_children; |
| AdvancedReaderToAttributeList(abbrev_reader, debug_abbrev_end, |
| abbreviation_code, tag, has_children); |
| |
| // Ignore if it has children. |
| static constexpr int kDW_TAG_compile_unit = 0x11; |
| static constexpr int kDW_TAG_partial_unit = 0x3c; |
| if (tag != kDW_TAG_compile_unit && tag != kDW_TAG_partial_unit) { |
| return false; |
| } |
| |
| // Use table to parse the name, high, and low attributes. |
| static constexpr int kDW_AT_name = 0x3; // string |
| static constexpr int kDW_AT_stmt_list = 0x10; // lineptr |
| static constexpr int kDW_AT_low_pc = 0x11; // address |
| static constexpr int kDW_AT_high_pc = 0x12; // address, constant |
| static constexpr int kDW_AT_ranges = 0x55; // rangelistptr |
| uint64_t attr; |
| uint64_t form; |
| uint64_t low_pc = 0; |
| uint64_t high_pc = 0; |
| bool high_pc_is_offset = false; |
| bool is_found_in_range = false; |
| do { |
| if (!abbrev_reader.ReadLeb128(attr)) { |
| return false; |
| } |
| if (!abbrev_reader.ReadLeb128(form)) { |
| return false; |
| } |
| // Table from 7.5.4, Figure 20. |
| enum Form { |
| kDW_FORM_addr = 0x01, |
| kDW_FORM_block2 = 0x03, |
| kDW_FORM_block4 = 0x04, |
| kDW_FORM_data2 = 0x05, |
| kDW_FORM_data4 = 0x06, |
| kDW_FORM_data8 = 0x07, |
| kDW_FORM_string = 0x08, |
| kDW_FORM_block = 0x09, |
| kDW_FORM_block1 = 0x0a, |
| kDW_FORM_data1 = 0x0b, |
| kDW_FORM_flag = 0x0c, |
| kDW_FORM_sdata = 0x0d, |
| kDW_FORM_strp = 0x0e, |
| kDW_FORM_udata = 0x0f, |
| kDW_FORM_ref_addr = 0x10, |
| kDW_FORM_ref1 = 0x11, |
| kDW_FORM_ref2 = 0x12, |
| kDW_FORM_ref4 = 0x13, |
| kDW_FORM_ref8 = 0x14, |
| kDW_FORM_ref_udata = 0x15, |
| kDW_FORM_ref_indrect = 0x16, |
| kDW_FORM_sec_offset = 0x17, |
| kDW_FORM_exprloc = 0x18, |
| kDW_FORM_flag_present = 0x19, |
| kDW_FORM_ref_sig8 = 0x20, |
| }; |
| |
| switch (form) { |
| case kDW_FORM_string: { |
| // Read the value into if necessary `out` |
| if (attr == kDW_AT_name) { |
| *cu_name_offset = reader.position(); |
| } |
| if (!reader.ReadCString(debug_info_end, nullptr, 0)) { |
| return false; |
| } |
| } break; |
| |
| case kDW_FORM_strp: { |
| uint64_t strp_offset; |
| if (!reader.ReadOffset(is_64bit, strp_offset)) { |
| return false; |
| } |
| |
| if (attr == kDW_AT_name) { |
| uint64_t pos = debug_str.sh_offset + strp_offset; |
| if (pos >= debug_str_end) { |
| return false; |
| } |
| *cu_name_offset = pos; |
| } |
| } break; |
| |
| case kDW_FORM_addr: { |
| uint64_t address; |
| if (!reader.ReadAddress(address_size, address)) { |
| return false; |
| } |
| |
| if (attr == kDW_AT_low_pc) { |
| low_pc = address; |
| } else if (attr == kDW_AT_high_pc) { |
| high_pc_is_offset = false; |
| high_pc = address; |
| } |
| } break; |
| |
| case kDW_FORM_data1: { |
| uint8_t data; |
| if (!reader.ReadInt8(data)) { |
| return false; |
| } |
| if (attr == kDW_AT_high_pc) { |
| high_pc_is_offset = true; |
| high_pc = data; |
| } |
| } break; |
| |
| case kDW_FORM_data2: { |
| uint16_t data; |
| if (!reader.ReadInt16(data)) { |
| return false; |
| } |
| if (attr == kDW_AT_high_pc) { |
| high_pc_is_offset = true; |
| high_pc = data; |
| } |
| } break; |
| |
| case kDW_FORM_data4: { |
| uint32_t data; |
| if (!reader.ReadInt32(data)) { |
| return false; |
| } |
| if (attr == kDW_AT_high_pc) { |
| high_pc_is_offset = true; |
| high_pc = data; |
| } |
| } break; |
| |
| case kDW_FORM_data8: { |
| uint64_t data; |
| if (!reader.ReadInt64(data)) { |
| return false; |
| } |
| if (attr == kDW_AT_high_pc) { |
| high_pc_is_offset = true; |
| high_pc = data; |
| } |
| } break; |
| |
| case kDW_FORM_sdata: { |
| int64_t data; |
| if (!reader.ReadLeb128(data)) { |
| return false; |
| } |
| if (attr == kDW_AT_high_pc) { |
| high_pc_is_offset = true; |
| high_pc = static_cast<uint64_t>(data); |
| } |
| } break; |
| |
| case kDW_FORM_udata: { |
| uint64_t data; |
| if (!reader.ReadLeb128(data)) { |
| return false; |
| } |
| if (attr == kDW_AT_high_pc) { |
| high_pc_is_offset = true; |
| high_pc = data; |
| } |
| } break; |
| |
| case kDW_FORM_ref_addr: |
| case kDW_FORM_sec_offset: { |
| uint64_t value; |
| if (!reader.ReadOffset(is_64bit, value)) { |
| return false; |
| } |
| |
| if (attr == kDW_AT_ranges) { |
| uint64_t current_base_address = module_base_address; |
| BufferedDwarfReader ranges_reader(fd, |
| debug_ranges.sh_offset + value); |
| |
| while (ranges_reader.position() < debug_ranges_end) { |
| // Ranges are 2 addresses in size. |
| uint64_t range_start; |
| uint64_t range_end; |
| if (!ranges_reader.ReadAddress(address_size, range_start)) { |
| return false; |
| } |
| if (!ranges_reader.ReadAddress(address_size, range_end)) { |
| return false; |
| } |
| uint64_t relative_pc = pc - current_base_address; |
| |
| if (range_start == 0 && range_end == 0) { |
| if (!is_found_in_range) { |
| // Time to go to the next iteration. |
| goto next_cu; |
| } |
| break; |
| } else if (((address_size == 4) && |
| (range_start == 0xffffffffUL)) || |
| ((address_size == 8) && |
| (range_start == 0xffffffffffffffffULL))) { |
| // Check if this is a new base add value. 2.17.3 |
| current_base_address = range_end; |
| } else { |
| if (relative_pc >= range_start && relative_pc < range_end) { |
| is_found_in_range = true; |
| break; |
| } |
| } |
| } |
| } else if (attr == kDW_AT_stmt_list) { |
| *debug_line_offset = value; |
| } |
| } break; |
| |
| case kDW_FORM_flag: |
| case kDW_FORM_ref1: |
| case kDW_FORM_block1: { |
| uint8_t dummy; |
| if (!reader.ReadInt8(dummy)) { |
| return false; |
| } |
| } break; |
| |
| case kDW_FORM_ref2: |
| case kDW_FORM_block2: { |
| uint16_t dummy; |
| if (!reader.ReadInt16(dummy)) { |
| return false; |
| } |
| } break; |
| |
| case kDW_FORM_ref4: |
| case kDW_FORM_block4: { |
| uint32_t dummy; |
| if (!reader.ReadInt32(dummy)) { |
| return false; |
| } |
| } break; |
| |
| case kDW_FORM_ref8: { |
| uint64_t dummy; |
| if (!reader.ReadInt64(dummy)) { |
| return false; |
| } |
| } break; |
| |
| case kDW_FORM_ref_udata: |
| case kDW_FORM_block: { |
| uint64_t dummy; |
| if (!reader.ReadLeb128(dummy)) { |
| return false; |
| } |
| } break; |
| |
| case kDW_FORM_exprloc: { |
| uint64_t value; |
| if (!reader.ReadLeb128(value)) { |
| return false; |
| } |
| reader.set_position(reader.position() + value); |
| } break; |
| } |
| } while (attr != 0 || form != 0); |
| |
| // Because attributes can be in any order, most of the computations (minus |
| // checking range list entries) cannot happen until everything is parsed for |
| // the one .debug_info entry. Do the analysis here. |
| if (is_found_in_range) { |
| // Well formed compile_unit and partial_unit tags either have a |
| // DT_AT_ranges entry or an DT_AT_low_pc entiry. If is_found_in_range |
| // matched as true, then this entry matches the given pc. |
| return true; |
| } |
| |
| // If high_pc_is_offset is 0, it was never found in the DIE. This indicates |
| // a single address entry. Only look at the low_pc. |
| { |
| uint64_t module_relative_pc = pc - module_base_address; |
| if (high_pc == 0 && module_relative_pc != low_pc) { |
| goto next_cu; |
| } |
| |
| // Otherwise this is a contiguous range DIE. Normalize the meaning of the |
| // high_pc field and check if it contains the pc. |
| if (high_pc_is_offset) { |
| high_pc = low_pc + high_pc; |
| high_pc_is_offset = false; |
| } |
| |
| if (module_relative_pc >= low_pc && module_relative_pc < high_pc) { |
| return true; |
| } |
| } |
| |
| // Not found. |
| next_cu:; |
| } |
| return false; |
| } |
| |
| // Thin wrapper over `GetCompileUnitName` that opens the .debug_info section. |
| bool ReadCompileUnit(int fd, |
| uint64_t pc, |
| uint64_t cu_offset, |
| uint64_t base_address, |
| uint64_t* debug_line_offset, |
| uint64_t* cu_name_offset) { |
| if (cu_offset == 0) { |
| return false; |
| } |
| |
| ElfW(Shdr) debug_info; |
| constexpr static char kDebugInfoSectionName[] = ".debug_info"; |
| if (!google::GetSectionHeaderByName(fd, kDebugInfoSectionName, |
| sizeof(kDebugInfoSectionName), |
| &debug_info)) { |
| return false; |
| } |
| uint64_t debug_info_end = debug_info.sh_offset + debug_info.sh_size; |
| |
| return GetCompileUnitName(fd, debug_info.sh_offset + cu_offset, |
| debug_info_end, pc, base_address, debug_line_offset, |
| cu_name_offset); |
| } |
| |
| // Takes the information from `info` and renders the data located in the |
| // object file `fd` into `out`. The format looks like: |
| // |
| // [../path/to/foo.cc:10:40] |
| // |
| // which would indicate line 10 column 40 in ../path/to/foo.cc |
| void SerializeLineNumberInfoToString(int fd, |
| const LineNumberInfo& info, |
| char* out, |
| size_t out_size) { |
| size_t out_pos = 0; |
| if (info.module_filename_offset) { |
| BufferedDwarfReader reader(fd, info.module_dir_offset); |
| if (info.module_dir_offset != 0) { |
| out_pos += |
| reader.ReadCString(kMaxOffset, out + out_pos, out_size - out_pos); |
| out[out_pos - 1] = '/'; |
| } |
| |
| reader.set_position(info.module_filename_offset); |
| out_pos += |
| reader.ReadCString(kMaxOffset, out + out_pos, out_size - out_pos); |
| } else { |
| out[out_pos++] = '\0'; |
| } |
| |
| out[out_pos - 1] = ':'; |
| char* tmp = internal::itoa_r(static_cast<intptr_t>(info.line), out + out_pos, |
| out_size - out_pos, 10, 0); |
| out_pos += strlen(tmp) + 1; |
| out[out_pos - 1] = ':'; |
| tmp = internal::itoa_r(static_cast<intptr_t>(info.column), out + out_pos, |
| out_size - out_pos, 10, 0); |
| out_pos += strlen(tmp) + 1; |
| } |
| |
| // Reads the Line Number info for a compile unit. |
| bool GetLineNumberInfoFromObject(int fd, |
| uint64_t pc, |
| uint64_t cu_offset, |
| uint64_t base_address, |
| char* out, |
| size_t out_size) { |
| uint64_t cu_name_offset; |
| uint64_t debug_line_offset; |
| if (!ReadCompileUnit(fd, pc, cu_offset, base_address, &debug_line_offset, |
| &cu_name_offset)) { |
| return false; |
| } |
| |
| ElfW(Shdr) debug_line; |
| constexpr static char kDebugLineSectionName[] = ".debug_line"; |
| if (!google::GetSectionHeaderByName(fd, kDebugLineSectionName, |
| sizeof(kDebugLineSectionName), |
| &debug_line)) { |
| return false; |
| } |
| |
| LineNumberInfo info; |
| info.pc = pc; |
| uint64_t line_info_program_offset = debug_line.sh_offset + debug_line_offset; |
| GetLineNumbersInProgram(fd, &info, base_address, line_info_program_offset, |
| cu_name_offset); |
| |
| if (info.line == 0) { |
| // No matching line number or filename found. |
| return false; |
| } |
| |
| SerializeLineNumberInfoToString(fd, info, out, out_size); |
| |
| return true; |
| } |
| |
| struct FrameInfo { |
| raw_ptr<uint64_t> cu_offset; |
| uintptr_t pc; |
| }; |
| |
| // Returns the number of frames still missing info. |
| // |
| // The aranges table is a mapping of ranges to compilation units. Given an array |
| // of `frame_info`, this finds the compile units for each of the frames doing |
| // only one pass over the table. It does not preserve the order of `frame_info`. |
| // |
| // The main benefit of this function is preserving the single pass through the |
| // table which is important for performance. |
| size_t ProcessFlatArangeSet(BufferedDwarfReader* reader, |
| uint64_t next_set, |
| uint8_t address_size, |
| uint64_t base_address, |
| uint64_t cu_offset, |
| FrameInfo* frame_info, |
| size_t num_frames) { |
| size_t unsorted_start = 0; |
| while (unsorted_start < num_frames && reader->position() < next_set) { |
| uint64_t start; |
| uint64_t length; |
| if (!reader->ReadAddress(address_size, start)) { |
| break; |
| } |
| if (!reader->ReadAddress(address_size, length)) { |
| break; |
| } |
| uint64_t end = start + length; |
| for (size_t i = unsorted_start; i < num_frames; ++i) { |
| uint64_t module_relative_pc = frame_info[i].pc - base_address; |
| if (start <= module_relative_pc && module_relative_pc < end) { |
| *frame_info[i].cu_offset = cu_offset; |
| if (i != unsorted_start) { |
| // Move to sorted section. |
| std::swap(frame_info[i], frame_info[unsorted_start]); |
| } |
| unsorted_start++; |
| } |
| } |
| } |
| |
| return unsorted_start; |
| } |
| |
| // This is a pre-step that uses the .debug_aranges table to find all the compile |
| // units for a given set of frames. This allows code to avoid iterating over |
| // all compile units at a later step in the symbolization process. |
| void PopulateCompileUnitOffsets(int fd, |
| FrameInfo* frame_info, |
| size_t num_frames, |
| uint64_t base_address) { |
| ElfW(Shdr) debug_aranges; |
| constexpr static char kDebugArangesSectionName[] = ".debug_aranges"; |
| if (!google::GetSectionHeaderByName(fd, kDebugArangesSectionName, |
| sizeof(kDebugArangesSectionName), |
| &debug_aranges)) { |
| return; |
| } |
| uint64_t debug_aranges_end = debug_aranges.sh_offset + debug_aranges.sh_size; |
| uint64_t next_arange_set = kMaxOffset; |
| size_t unsorted_start = 0; |
| for (BufferedDwarfReader reader(fd, debug_aranges.sh_offset); |
| unsorted_start < num_frames && reader.position() < debug_aranges_end; |
| reader.set_position(next_arange_set)) { |
| bool is_64bit; |
| uint64_t length; |
| uint16_t arange_version; |
| uint64_t debug_info_offset; |
| uint8_t address_size; |
| if (!reader.ReadCommonHeader(is_64bit, length, arange_version, |
| debug_info_offset, address_size, |
| next_arange_set)) { |
| return; |
| } |
| |
| uint8_t segment_size; |
| if (!reader.ReadInt8(segment_size)) { |
| return; |
| } |
| |
| if (segment_size != 0) { |
| // Only flat namespaces are supported. |
| return; |
| } |
| |
| // The tuple list is aligned, to a multiple of the tuple-size after the |
| // section sstart. Because this code only supports flat address spaces, this |
| // means 2*address_size. |
| while (((reader.position() - debug_aranges.sh_offset) % |
| (2 * address_size)) != 0) { |
| uint8_t dummy; |
| if (!reader.ReadInt8(dummy)) { |
| return; |
| } |
| } |
| unsorted_start += ProcessFlatArangeSet( |
| &reader, next_arange_set, address_size, base_address, debug_info_offset, |
| &frame_info[unsorted_start], num_frames - unsorted_start); |
| } |
| } |
| |
| } // namespace |
| |
| bool GetDwarfSourceLineNumber(void* pc, |
| uintptr_t cu_offset, |
| char* out, |
| size_t out_size) { |
| uint64_t pc0 = reinterpret_cast<uint64_t>(pc); |
| uint64_t object_start_address = 0; |
| uint64_t object_base_address = 0; |
| |
| google::FileDescriptor object_fd(google::FileDescriptor( |
| google::OpenObjectFileContainingPcAndGetStartAddress( |
| pc0, object_start_address, object_base_address, nullptr, 0))); |
| |
| if (!object_fd.get()) { |
| return false; |
| } |
| |
| if (!GetLineNumberInfoFromObject(object_fd.get(), pc0, cu_offset, |
| object_base_address, out, out_size)) { |
| return false; |
| } |
| |
| return true; |
| } |
| |
| void GetDwarfCompileUnitOffsets(void* const* trace, |
| uint64_t* cu_offsets, |
| size_t num_frames) { |
| // Ensure `cu_offsets` always has a known state. |
| memset(cu_offsets, 0, sizeof(uint64_t) * num_frames); |
| |
| FrameInfo* frame_info = |
| static_cast<FrameInfo*>(alloca(sizeof(FrameInfo) * num_frames)); |
| for (size_t i = 0; i < num_frames; i++) { |
| // The `cu_offset` also encodes the original sort order. |
| frame_info[i].cu_offset = &cu_offsets[i]; |
| frame_info[i].pc = reinterpret_cast<uintptr_t>(trace[i]); |
| } |
| auto pc_comparator = [](const FrameInfo& lhs, const FrameInfo& rhs) { |
| return lhs.pc < rhs.pc; |
| }; |
| |
| // Use heapsort to avoid recursion in a signal handler. |
| std::make_heap(&frame_info[0], &frame_info[num_frames - 1], pc_comparator); |
| std::sort_heap(&frame_info[0], &frame_info[num_frames - 1], pc_comparator); |
| |
| // Walk the frame_info one compilation unit at a time. |
| for (size_t cur_frame = 0; cur_frame < num_frames; ++cur_frame) { |
| uint64_t object_start_address = 0; |
| uint64_t object_base_address = 0; |
| google::FileDescriptor object_fd(google::FileDescriptor( |
| google::OpenObjectFileContainingPcAndGetStartAddress( |
| frame_info[cur_frame].pc, object_start_address, object_base_address, |
| nullptr, 0))); |
| |
| // TODO(https://crbug.com/1335630): Consider exposing the end address so a |
| // range of frames can be bulk-populated. This was originally implemented, |
| // but line number symbolization is currently broken by default (and also |
| // broken in sandboxed processes). The various issues will be addressed |
| // incrementally in follow-up patches, and the optimization here restored if |
| // needed. |
| |
| PopulateCompileUnitOffsets(object_fd.get(), &frame_info[cur_frame], 1, |
| object_base_address); |
| } |
| } |
| |
| } // namespace debug |
| } // namespace base |
| |
| #else // USE_SYMBOLIZE |
| |
| #include <cstring> |
| |
| namespace base { |
| namespace debug { |
| |
| bool GetDwarfSourceLineNumber(void* pc, |
| uintptr_t cu_offset, |
| char* out, |
| size_t out_size) { |
| return false; |
| } |
| |
| void GetDwarfCompileUnitOffsets(void* const* trace, |
| uint64_t* cu_offsets, |
| size_t num_frames) { |
| // Provide defined values even in the stub. |
| memset(cu_offsets, 0, sizeof(cu_offsets) * num_frames); |
| } |
| |
| } // namespace debug |
| } // namespace base |
| |
| #endif |