| #!/usr/bin/env python |
| # |
| # Copyright 2012 the V8 project authors. All rights reserved. |
| # Redistribution and use in source and binary forms, with or without |
| # modification, are permitted provided that the following conditions are |
| # met: |
| # |
| # * Redistributions of source code must retain the above copyright |
| # notice, this list of conditions and the following disclaimer. |
| # * Redistributions in binary form must reproduce the above |
| # copyright notice, this list of conditions and the following |
| # disclaimer in the documentation and/or other materials provided |
| # with the distribution. |
| # * Neither the name of Google Inc. nor the names of its |
| # contributors may be used to endorse or promote products derived |
| # from this software without specific prior written permission. |
| # |
| # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| |
| |
| # for py2/py3 compatibility |
| from __future__ import print_function |
| |
| import bisect |
| import collections |
| import ctypes |
| import disasm |
| import mmap |
| import optparse |
| import os |
| import re |
| import subprocess |
| import sys |
| import time |
| |
| |
| USAGE="""usage: %prog [OPTION]... |
| |
| Analyses V8 and perf logs to produce profiles. |
| |
| Perf logs can be collected using a command like: |
| $ perf record -R -e cycles -c 10000 -f -i ./d8 bench.js --ll-prof |
| # -R: collect all data |
| # -e cycles: use cpu-cycles event (run "perf list" for details) |
| # -c 10000: write a sample after each 10000 events |
| # -f: force output file overwrite |
| # -i: limit profiling to our process and the kernel |
| # --ll-prof shell flag enables the right V8 logs |
| This will produce a binary trace file (perf.data) that %prog can analyse. |
| |
| IMPORTANT: |
| The kernel has an internal maximum for events per second, it is 100K by |
| default. That's not enough for "-c 10000". Set it to some higher value: |
| $ echo 10000000 | sudo tee /proc/sys/kernel/perf_event_max_sample_rate |
| You can also make the warning about kernel address maps go away: |
| $ echo 0 | sudo tee /proc/sys/kernel/kptr_restrict |
| |
| We have a convenience script that handles all of the above for you: |
| $ tools/run-llprof.sh ./d8 bench.js |
| |
| Examples: |
| # Print flat profile with annotated disassembly for the 10 top |
| # symbols. Use default log names. |
| $ %prog --disasm-top=10 |
| |
| # Print flat profile with annotated disassembly for all used symbols. |
| # Use default log names and include kernel symbols into analysis. |
| $ %prog --disasm-all --kernel |
| |
| # Print flat profile. Use custom log names. |
| $ %prog --log=foo.log --trace=foo.data |
| """ |
| |
| |
| JS_ORIGIN = "js" |
| |
| |
| class Code(object): |
| """Code object.""" |
| |
| _id = 0 |
| UNKNOWN = 0 |
| V8INTERNAL = 1 |
| FULL_CODEGEN = 2 |
| OPTIMIZED = 3 |
| |
| def __init__(self, name, start_address, end_address, origin, origin_offset): |
| self.id = Code._id |
| Code._id += 1 |
| self.name = name |
| self.other_names = None |
| self.start_address = start_address |
| self.end_address = end_address |
| self.origin = origin |
| self.origin_offset = origin_offset |
| self.self_ticks = 0 |
| self.self_ticks_map = None |
| self.callee_ticks = None |
| if name.startswith("LazyCompile:*"): |
| self.codetype = Code.OPTIMIZED |
| elif name.startswith("LazyCompile:"): |
| self.codetype = Code.FULL_CODEGEN |
| elif name.startswith("v8::internal::"): |
| self.codetype = Code.V8INTERNAL |
| else: |
| self.codetype = Code.UNKNOWN |
| |
| def AddName(self, name): |
| assert self.name != name |
| if self.other_names is None: |
| self.other_names = [name] |
| return |
| if not name in self.other_names: |
| self.other_names.append(name) |
| |
| def FullName(self): |
| if self.other_names is None: |
| return self.name |
| self.other_names.sort() |
| return "%s (aka %s)" % (self.name, ", ".join(self.other_names)) |
| |
| def IsUsed(self): |
| return self.self_ticks > 0 or self.callee_ticks is not None |
| |
| def Tick(self, pc): |
| self.self_ticks += 1 |
| if self.self_ticks_map is None: |
| self.self_ticks_map = collections.defaultdict(lambda: 0) |
| offset = pc - self.start_address |
| self.self_ticks_map[offset] += 1 |
| |
| def CalleeTick(self, callee): |
| if self.callee_ticks is None: |
| self.callee_ticks = collections.defaultdict(lambda: 0) |
| self.callee_ticks[callee] += 1 |
| |
| def PrintAnnotated(self, arch, options): |
| if self.self_ticks_map is None: |
| ticks_map = [] |
| else: |
| ticks_map = self.self_ticks_map.items() |
| # Convert the ticks map to offsets and counts arrays so that later |
| # we can do binary search in the offsets array. |
| ticks_map.sort(key=lambda t: t[0]) |
| ticks_offsets = [t[0] for t in ticks_map] |
| ticks_counts = [t[1] for t in ticks_map] |
| # Get a list of disassembled lines and their addresses. |
| lines = self._GetDisasmLines(arch, options) |
| if len(lines) == 0: |
| return |
| # Print annotated lines. |
| address = lines[0][0] |
| total_count = 0 |
| for i in range(len(lines)): |
| start_offset = lines[i][0] - address |
| if i == len(lines) - 1: |
| end_offset = self.end_address - self.start_address |
| else: |
| end_offset = lines[i + 1][0] - address |
| # Ticks (reported pc values) are not always precise, i.e. not |
| # necessarily point at instruction starts. So we have to search |
| # for ticks that touch the current instruction line. |
| j = bisect.bisect_left(ticks_offsets, end_offset) |
| count = 0 |
| for offset, cnt in reversed(zip(ticks_offsets[:j], ticks_counts[:j])): |
| if offset < start_offset: |
| break |
| count += cnt |
| total_count += count |
| percent = 100.0 * count / self.self_ticks |
| offset = lines[i][0] |
| if percent >= 0.01: |
| # 5 spaces for tick count |
| # 1 space following |
| # 1 for '|' |
| # 1 space following |
| # 6 for the percentage number, incl. the '.' |
| # 1 for the '%' sign |
| # => 15 |
| print("%5d | %6.2f%% %x(%d): %s" % (count, percent, offset, offset, lines[i][1])) |
| else: |
| print("%s %x(%d): %s" % (" " * 15, offset, offset, lines[i][1])) |
| print() |
| assert total_count == self.self_ticks, \ |
| "Lost ticks (%d != %d) in %s" % (total_count, self.self_ticks, self) |
| |
| def __str__(self): |
| return "%s [0x%x, 0x%x) size: %d origin: %s" % ( |
| self.name, |
| self.start_address, |
| self.end_address, |
| self.end_address - self.start_address, |
| self.origin) |
| |
| def _GetDisasmLines(self, arch, options): |
| if self.origin == JS_ORIGIN: |
| inplace = False |
| filename = options.log + ".ll" |
| else: |
| inplace = True |
| filename = self.origin |
| return disasm.GetDisasmLines(filename, |
| self.origin_offset, |
| self.end_address - self.start_address, |
| arch, |
| inplace) |
| |
| |
| class CodePage(object): |
| """Group of adjacent code objects.""" |
| |
| SHIFT = 20 # 1M pages |
| SIZE = (1 << SHIFT) |
| MASK = ~(SIZE - 1) |
| |
| @staticmethod |
| def PageAddress(address): |
| return address & CodePage.MASK |
| |
| @staticmethod |
| def PageId(address): |
| return address >> CodePage.SHIFT |
| |
| @staticmethod |
| def PageAddressFromId(id): |
| return id << CodePage.SHIFT |
| |
| def __init__(self, address): |
| self.address = address |
| self.code_objects = [] |
| |
| def Add(self, code): |
| self.code_objects.append(code) |
| |
| def Remove(self, code): |
| self.code_objects.remove(code) |
| |
| def Find(self, pc): |
| code_objects = self.code_objects |
| for i, code in enumerate(code_objects): |
| if code.start_address <= pc < code.end_address: |
| code_objects[0], code_objects[i] = code, code_objects[0] |
| return code |
| return None |
| |
| def __iter__(self): |
| return self.code_objects.__iter__() |
| |
| |
| class CodeMap(object): |
| """Code object map.""" |
| |
| def __init__(self): |
| self.pages = {} |
| self.min_address = 1 << 64 |
| self.max_address = -1 |
| |
| def Add(self, code, max_pages=-1): |
| page_id = CodePage.PageId(code.start_address) |
| limit_id = CodePage.PageId(code.end_address + CodePage.SIZE - 1) |
| pages = 0 |
| while page_id < limit_id: |
| if max_pages >= 0 and pages > max_pages: |
| print("Warning: page limit (%d) reached for %s [%s]" % ( |
| max_pages, code.name, code.origin), file=sys.stderr) |
| break |
| if page_id in self.pages: |
| page = self.pages[page_id] |
| else: |
| page = CodePage(CodePage.PageAddressFromId(page_id)) |
| self.pages[page_id] = page |
| page.Add(code) |
| page_id += 1 |
| pages += 1 |
| self.min_address = min(self.min_address, code.start_address) |
| self.max_address = max(self.max_address, code.end_address) |
| |
| def Remove(self, code): |
| page_id = CodePage.PageId(code.start_address) |
| limit_id = CodePage.PageId(code.end_address + CodePage.SIZE - 1) |
| removed = False |
| while page_id < limit_id: |
| if page_id not in self.pages: |
| page_id += 1 |
| continue |
| page = self.pages[page_id] |
| page.Remove(code) |
| removed = True |
| page_id += 1 |
| return removed |
| |
| def AllCode(self): |
| for page in self.pages.itervalues(): |
| for code in page: |
| if CodePage.PageAddress(code.start_address) == page.address: |
| yield code |
| |
| def UsedCode(self): |
| for code in self.AllCode(): |
| if code.IsUsed(): |
| yield code |
| |
| def Print(self): |
| for code in self.AllCode(): |
| print(code) |
| |
| def Find(self, pc): |
| if pc < self.min_address or pc >= self.max_address: |
| return None |
| page_id = CodePage.PageId(pc) |
| if page_id not in self.pages: |
| return None |
| return self.pages[page_id].Find(pc) |
| |
| |
| class CodeInfo(object): |
| """Generic info about generated code objects.""" |
| |
| def __init__(self, arch, header_size): |
| self.arch = arch |
| self.header_size = header_size |
| |
| |
| class LogReader(object): |
| """V8 low-level (binary) log reader.""" |
| |
| _ARCH_TO_POINTER_TYPE_MAP = { |
| "ia32": ctypes.c_uint32, |
| "arm": ctypes.c_uint32, |
| "mips": ctypes.c_uint32, |
| "x64": ctypes.c_uint64, |
| "arm64": ctypes.c_uint64 |
| } |
| |
| _CODE_CREATE_TAG = "C" |
| _CODE_MOVE_TAG = "M" |
| _CODE_MOVING_GC_TAG = "G" |
| |
| def __init__(self, log_name, code_map): |
| self.log_file = open(log_name, "r") |
| self.log = mmap.mmap(self.log_file.fileno(), 0, mmap.MAP_PRIVATE) |
| self.log_pos = 0 |
| self.code_map = code_map |
| |
| self.arch = self.log[:self.log.find("\0")] |
| self.log_pos += len(self.arch) + 1 |
| assert self.arch in LogReader._ARCH_TO_POINTER_TYPE_MAP, \ |
| "Unsupported architecture %s" % self.arch |
| pointer_type = LogReader._ARCH_TO_POINTER_TYPE_MAP[self.arch] |
| |
| self.code_create_struct = LogReader._DefineStruct([ |
| ("name_size", ctypes.c_int32), |
| ("code_address", pointer_type), |
| ("code_size", ctypes.c_int32)]) |
| |
| self.code_move_struct = LogReader._DefineStruct([ |
| ("from_address", pointer_type), |
| ("to_address", pointer_type)]) |
| |
| self.code_delete_struct = LogReader._DefineStruct([ |
| ("address", pointer_type)]) |
| |
| def ReadUpToGC(self): |
| while self.log_pos < self.log.size(): |
| tag = self.log[self.log_pos] |
| self.log_pos += 1 |
| |
| if tag == LogReader._CODE_MOVING_GC_TAG: |
| return |
| |
| if tag == LogReader._CODE_CREATE_TAG: |
| event = self.code_create_struct.from_buffer(self.log, self.log_pos) |
| self.log_pos += ctypes.sizeof(event) |
| start_address = event.code_address |
| end_address = start_address + event.code_size |
| name = self.log[self.log_pos:self.log_pos + event.name_size] |
| origin = JS_ORIGIN |
| self.log_pos += event.name_size |
| origin_offset = self.log_pos |
| self.log_pos += event.code_size |
| code = Code(name, start_address, end_address, origin, origin_offset) |
| conficting_code = self.code_map.Find(start_address) |
| if conficting_code: |
| if not (conficting_code.start_address == code.start_address and |
| conficting_code.end_address == code.end_address): |
| self.code_map.Remove(conficting_code) |
| else: |
| LogReader._HandleCodeConflict(conficting_code, code) |
| # TODO(vitalyr): this warning is too noisy because of our |
| # attempts to reconstruct code log from the snapshot. |
| # print >>sys.stderr, \ |
| # "Warning: Skipping duplicate code log entry %s" % code |
| continue |
| self.code_map.Add(code) |
| continue |
| |
| if tag == LogReader._CODE_MOVE_TAG: |
| event = self.code_move_struct.from_buffer(self.log, self.log_pos) |
| self.log_pos += ctypes.sizeof(event) |
| old_start_address = event.from_address |
| new_start_address = event.to_address |
| if old_start_address == new_start_address: |
| # Skip useless code move entries. |
| continue |
| code = self.code_map.Find(old_start_address) |
| if not code: |
| print("Warning: Not found %x" % old_start_address, file=sys.stderr) |
| continue |
| assert code.start_address == old_start_address, \ |
| "Inexact move address %x for %s" % (old_start_address, code) |
| self.code_map.Remove(code) |
| size = code.end_address - code.start_address |
| code.start_address = new_start_address |
| code.end_address = new_start_address + size |
| self.code_map.Add(code) |
| continue |
| |
| assert False, "Unknown tag %s" % tag |
| |
| def Dispose(self): |
| self.log.close() |
| self.log_file.close() |
| |
| @staticmethod |
| def _DefineStruct(fields): |
| class Struct(ctypes.Structure): |
| _fields_ = fields |
| return Struct |
| |
| @staticmethod |
| def _HandleCodeConflict(old_code, new_code): |
| assert (old_code.start_address == new_code.start_address and |
| old_code.end_address == new_code.end_address), \ |
| "Conficting code log entries %s and %s" % (old_code, new_code) |
| if old_code.name == new_code.name: |
| return |
| # Code object may be shared by a few functions. Collect the full |
| # set of names. |
| old_code.AddName(new_code.name) |
| |
| |
| class Descriptor(object): |
| """Descriptor of a structure in the binary trace log.""" |
| |
| CTYPE_MAP = { |
| "u16": ctypes.c_uint16, |
| "u32": ctypes.c_uint32, |
| "u64": ctypes.c_uint64 |
| } |
| |
| def __init__(self, fields): |
| class TraceItem(ctypes.Structure): |
| _fields_ = Descriptor.CtypesFields(fields) |
| |
| def __str__(self): |
| return ", ".join("%s: %s" % (field, self.__getattribute__(field)) |
| for field, _ in TraceItem._fields_) |
| |
| self.ctype = TraceItem |
| |
| def Read(self, trace, offset): |
| return self.ctype.from_buffer(trace, offset) |
| |
| @staticmethod |
| def CtypesFields(fields): |
| return [(field, Descriptor.CTYPE_MAP[format]) for (field, format) in fields] |
| |
| |
| # Please see http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=tree;f=tools/perf |
| # for the gory details. |
| |
| |
| # Reference: struct perf_file_header in kernel/tools/perf/util/header.h |
| TRACE_HEADER_DESC = Descriptor([ |
| ("magic", "u64"), |
| ("size", "u64"), |
| ("attr_size", "u64"), |
| ("attrs_offset", "u64"), |
| ("attrs_size", "u64"), |
| ("data_offset", "u64"), |
| ("data_size", "u64"), |
| ("event_types_offset", "u64"), |
| ("event_types_size", "u64") |
| ]) |
| |
| |
| # Reference: /usr/include/linux/perf_event.h |
| PERF_EVENT_ATTR_DESC = Descriptor([ |
| ("type", "u32"), |
| ("size", "u32"), |
| ("config", "u64"), |
| ("sample_period_or_freq", "u64"), |
| ("sample_type", "u64"), |
| ("read_format", "u64"), |
| ("flags", "u64"), |
| ("wakeup_events_or_watermark", "u32"), |
| ("bp_type", "u32"), |
| ("bp_addr", "u64"), |
| ("bp_len", "u64") |
| ]) |
| |
| |
| # Reference: /usr/include/linux/perf_event.h |
| PERF_EVENT_HEADER_DESC = Descriptor([ |
| ("type", "u32"), |
| ("misc", "u16"), |
| ("size", "u16") |
| ]) |
| |
| |
| # Reference: kernel/tools/perf/util/event.h |
| PERF_MMAP_EVENT_BODY_DESC = Descriptor([ |
| ("pid", "u32"), |
| ("tid", "u32"), |
| ("addr", "u64"), |
| ("len", "u64"), |
| ("pgoff", "u64") |
| ]) |
| |
| # Reference: kernel/tools/perf/util/event.h |
| PERF_MMAP2_EVENT_BODY_DESC = Descriptor([ |
| ("pid", "u32"), |
| ("tid", "u32"), |
| ("addr", "u64"), |
| ("len", "u64"), |
| ("pgoff", "u64"), |
| ("maj", "u32"), |
| ("min", "u32"), |
| ("ino", "u64"), |
| ("ino_generation", "u64"), |
| ("prot", "u32"), |
| ("flags","u32") |
| ]) |
| |
| # perf_event_attr.sample_type bits control the set of |
| # perf_sample_event fields. |
| PERF_SAMPLE_IP = 1 << 0 |
| PERF_SAMPLE_TID = 1 << 1 |
| PERF_SAMPLE_TIME = 1 << 2 |
| PERF_SAMPLE_ADDR = 1 << 3 |
| PERF_SAMPLE_READ = 1 << 4 |
| PERF_SAMPLE_CALLCHAIN = 1 << 5 |
| PERF_SAMPLE_ID = 1 << 6 |
| PERF_SAMPLE_CPU = 1 << 7 |
| PERF_SAMPLE_PERIOD = 1 << 8 |
| PERF_SAMPLE_STREAM_ID = 1 << 9 |
| PERF_SAMPLE_RAW = 1 << 10 |
| |
| |
| # Reference: /usr/include/perf_event.h, the comment for PERF_RECORD_SAMPLE. |
| PERF_SAMPLE_EVENT_BODY_FIELDS = [ |
| ("ip", "u64", PERF_SAMPLE_IP), |
| ("pid", "u32", PERF_SAMPLE_TID), |
| ("tid", "u32", PERF_SAMPLE_TID), |
| ("time", "u64", PERF_SAMPLE_TIME), |
| ("addr", "u64", PERF_SAMPLE_ADDR), |
| ("id", "u64", PERF_SAMPLE_ID), |
| ("stream_id", "u64", PERF_SAMPLE_STREAM_ID), |
| ("cpu", "u32", PERF_SAMPLE_CPU), |
| ("res", "u32", PERF_SAMPLE_CPU), |
| ("period", "u64", PERF_SAMPLE_PERIOD), |
| # Don't want to handle read format that comes after the period and |
| # before the callchain and has variable size. |
| ("nr", "u64", PERF_SAMPLE_CALLCHAIN) |
| # Raw data follows the callchain and is ignored. |
| ] |
| |
| |
| PERF_SAMPLE_EVENT_IP_FORMAT = "u64" |
| |
| |
| PERF_RECORD_MMAP = 1 |
| PERF_RECORD_MMAP2 = 10 |
| PERF_RECORD_SAMPLE = 9 |
| |
| |
| class TraceReader(object): |
| """Perf (linux-2.6/tools/perf) trace file reader.""" |
| |
| _TRACE_HEADER_MAGIC = 4993446653023372624 |
| |
| def __init__(self, trace_name): |
| self.trace_file = open(trace_name, "r") |
| self.trace = mmap.mmap(self.trace_file.fileno(), 0, mmap.MAP_PRIVATE) |
| self.trace_header = TRACE_HEADER_DESC.Read(self.trace, 0) |
| if self.trace_header.magic != TraceReader._TRACE_HEADER_MAGIC: |
| print("Warning: unsupported trace header magic", file=sys.stderr) |
| self.offset = self.trace_header.data_offset |
| self.limit = self.trace_header.data_offset + self.trace_header.data_size |
| assert self.limit <= self.trace.size(), \ |
| "Trace data limit exceeds trace file size" |
| self.header_size = ctypes.sizeof(PERF_EVENT_HEADER_DESC.ctype) |
| assert self.trace_header.attrs_size != 0, \ |
| "No perf event attributes found in the trace" |
| perf_event_attr = PERF_EVENT_ATTR_DESC.Read(self.trace, |
| self.trace_header.attrs_offset) |
| self.sample_event_body_desc = self._SampleEventBodyDesc( |
| perf_event_attr.sample_type) |
| self.callchain_supported = \ |
| (perf_event_attr.sample_type & PERF_SAMPLE_CALLCHAIN) != 0 |
| if self.callchain_supported: |
| self.ip_struct = Descriptor.CTYPE_MAP[PERF_SAMPLE_EVENT_IP_FORMAT] |
| self.ip_size = ctypes.sizeof(self.ip_struct) |
| |
| def ReadEventHeader(self): |
| if self.offset >= self.limit: |
| return None, 0 |
| offset = self.offset |
| header = PERF_EVENT_HEADER_DESC.Read(self.trace, self.offset) |
| self.offset += header.size |
| return header, offset |
| |
| def ReadMmap(self, header, offset): |
| mmap_info = PERF_MMAP_EVENT_BODY_DESC.Read(self.trace, |
| offset + self.header_size) |
| # Read null-terminated filename. |
| filename = self.trace[offset + self.header_size + ctypes.sizeof(mmap_info): |
| offset + header.size] |
| mmap_info.filename = HOST_ROOT + filename[:filename.find(chr(0))] |
| return mmap_info |
| |
| def ReadMmap2(self, header, offset): |
| mmap_info = PERF_MMAP2_EVENT_BODY_DESC.Read(self.trace, |
| offset + self.header_size) |
| # Read null-terminated filename. |
| filename = self.trace[offset + self.header_size + ctypes.sizeof(mmap_info): |
| offset + header.size] |
| mmap_info.filename = HOST_ROOT + filename[:filename.find(chr(0))] |
| return mmap_info |
| |
| def ReadSample(self, header, offset): |
| sample = self.sample_event_body_desc.Read(self.trace, |
| offset + self.header_size) |
| if not self.callchain_supported: |
| return sample |
| sample.ips = [] |
| offset += self.header_size + ctypes.sizeof(sample) |
| for _ in range(sample.nr): |
| sample.ips.append( |
| self.ip_struct.from_buffer(self.trace, offset).value) |
| offset += self.ip_size |
| return sample |
| |
| def Dispose(self): |
| self.trace.close() |
| self.trace_file.close() |
| |
| def _SampleEventBodyDesc(self, sample_type): |
| assert (sample_type & PERF_SAMPLE_READ) == 0, \ |
| "Can't hande read format in samples" |
| fields = [(field, format) |
| for (field, format, bit) in PERF_SAMPLE_EVENT_BODY_FIELDS |
| if (bit & sample_type) != 0] |
| return Descriptor(fields) |
| |
| |
| OBJDUMP_SECTION_HEADER_RE = re.compile( |
| r"^\s*\d+\s(\.\S+)\s+[a-f0-9]") |
| OBJDUMP_SYMBOL_LINE_RE = re.compile( |
| r"^([a-f0-9]+)\s(.{7})\s(\S+)\s+([a-f0-9]+)\s+(?:\.hidden\s+)?(.*)$") |
| OBJDUMP_DYNAMIC_SYMBOLS_START_RE = re.compile( |
| r"^DYNAMIC SYMBOL TABLE") |
| OBJDUMP_SKIP_RE = re.compile( |
| r"^.*ld\.so\.cache$") |
| KERNEL_ALLSYMS_FILE = "/proc/kallsyms" |
| PERF_KERNEL_ALLSYMS_RE = re.compile( |
| r".*kallsyms.*") |
| KERNEL_ALLSYMS_LINE_RE = re.compile( |
| r"^([a-f0-9]+)\s(?:t|T)\s(\S+)$") |
| |
| |
| class LibraryRepo(object): |
| def __init__(self): |
| self.infos = [] |
| self.names = set() |
| self.ticks = {} |
| |
| |
| def HasDynamicSymbols(self, filename): |
| if filename.endswith(".ko"): return False |
| process = subprocess.Popen( |
| "%s -h %s" % (OBJDUMP_BIN, filename), |
| shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) |
| pipe = process.stdout |
| try: |
| for line in pipe: |
| match = OBJDUMP_SECTION_HEADER_RE.match(line) |
| if match and match.group(1) == 'dynsym': return True |
| finally: |
| pipe.close() |
| assert process.wait() == 0, "Failed to objdump -h %s" % filename |
| return False |
| |
| |
| def Load(self, mmap_info, code_map, options): |
| # Skip kernel mmaps when requested using the fact that their tid |
| # is 0. |
| if mmap_info.tid == 0 and not options.kernel: |
| return True |
| if OBJDUMP_SKIP_RE.match(mmap_info.filename): |
| return True |
| if PERF_KERNEL_ALLSYMS_RE.match(mmap_info.filename): |
| return self._LoadKernelSymbols(code_map) |
| self.infos.append(mmap_info) |
| mmap_info.ticks = 0 |
| mmap_info.unique_name = self._UniqueMmapName(mmap_info) |
| if not os.path.exists(mmap_info.filename): |
| return True |
| # Request section headers (-h), symbols (-t), and dynamic symbols |
| # (-T) from objdump. |
| # Unfortunately, section headers span two lines, so we have to |
| # keep the just seen section name (from the first line in each |
| # section header) in the after_section variable. |
| if self.HasDynamicSymbols(mmap_info.filename): |
| dynamic_symbols = "-T" |
| else: |
| dynamic_symbols = "" |
| process = subprocess.Popen( |
| "%s -h -t %s -C %s" % (OBJDUMP_BIN, dynamic_symbols, mmap_info.filename), |
| shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) |
| pipe = process.stdout |
| after_section = None |
| code_sections = set() |
| reloc_sections = set() |
| dynamic = False |
| try: |
| for line in pipe: |
| if after_section: |
| if line.find("CODE") != -1: |
| code_sections.add(after_section) |
| if line.find("RELOC") != -1: |
| reloc_sections.add(after_section) |
| after_section = None |
| continue |
| |
| match = OBJDUMP_SECTION_HEADER_RE.match(line) |
| if match: |
| after_section = match.group(1) |
| continue |
| |
| if OBJDUMP_DYNAMIC_SYMBOLS_START_RE.match(line): |
| dynamic = True |
| continue |
| |
| match = OBJDUMP_SYMBOL_LINE_RE.match(line) |
| if match: |
| start_address = int(match.group(1), 16) |
| origin_offset = start_address |
| flags = match.group(2) |
| section = match.group(3) |
| if section in code_sections: |
| if dynamic or section in reloc_sections: |
| start_address += mmap_info.addr |
| size = int(match.group(4), 16) |
| name = match.group(5) |
| origin = mmap_info.filename |
| code_map.Add(Code(name, start_address, start_address + size, |
| origin, origin_offset)) |
| finally: |
| pipe.close() |
| assert process.wait() == 0, "Failed to objdump %s" % mmap_info.filename |
| |
| def Tick(self, pc): |
| for i, mmap_info in enumerate(self.infos): |
| if mmap_info.addr <= pc < (mmap_info.addr + mmap_info.len): |
| mmap_info.ticks += 1 |
| self.infos[0], self.infos[i] = mmap_info, self.infos[0] |
| return True |
| return False |
| |
| def _UniqueMmapName(self, mmap_info): |
| name = mmap_info.filename |
| index = 1 |
| while name in self.names: |
| name = "%s-%d" % (mmap_info.filename, index) |
| index += 1 |
| self.names.add(name) |
| return name |
| |
| def _LoadKernelSymbols(self, code_map): |
| if not os.path.exists(KERNEL_ALLSYMS_FILE): |
| print("Warning: %s not found" % KERNEL_ALLSYMS_FILE, file=sys.stderr) |
| return False |
| kallsyms = open(KERNEL_ALLSYMS_FILE, "r") |
| code = None |
| for line in kallsyms: |
| match = KERNEL_ALLSYMS_LINE_RE.match(line) |
| if match: |
| start_address = int(match.group(1), 16) |
| end_address = start_address |
| name = match.group(2) |
| if code: |
| code.end_address = start_address |
| code_map.Add(code, 16) |
| code = Code(name, start_address, end_address, "kernel", 0) |
| return True |
| |
| |
| def PrintReport(code_map, library_repo, arch, ticks, options): |
| print("Ticks per symbol:") |
| used_code = [code for code in code_map.UsedCode()] |
| used_code.sort(key=lambda x: x.self_ticks, reverse=True) |
| for i, code in enumerate(used_code): |
| code_ticks = code.self_ticks |
| print("%10d %5.1f%% %s [%s]" % (code_ticks, 100. * code_ticks / ticks, |
| code.FullName(), code.origin)) |
| if options.disasm_all or i < options.disasm_top: |
| code.PrintAnnotated(arch, options) |
| print() |
| print("Ticks per library:") |
| mmap_infos = [m for m in library_repo.infos if m.ticks > 0] |
| mmap_infos.sort(key=lambda m: m.ticks, reverse=True) |
| for mmap_info in mmap_infos: |
| mmap_ticks = mmap_info.ticks |
| print("%10d %5.1f%% %s" % (mmap_ticks, 100. * mmap_ticks / ticks, |
| mmap_info.unique_name)) |
| |
| |
| def PrintDot(code_map, options): |
| print("digraph G {") |
| for code in code_map.UsedCode(): |
| if code.self_ticks < 10: |
| continue |
| print("n%d [shape=box,label=\"%s\"];" % (code.id, code.name)) |
| if code.callee_ticks: |
| for callee, ticks in code.callee_ticks.iteritems(): |
| print("n%d -> n%d [label=\"%d\"];" % (code.id, callee.id, ticks)) |
| print("}") |
| |
| |
| if __name__ == "__main__": |
| parser = optparse.OptionParser(USAGE) |
| parser.add_option("--log", |
| default="v8.log", |
| help="V8 log file name [default: %default]") |
| parser.add_option("--trace", |
| default="perf.data", |
| help="perf trace file name [default: %default]") |
| parser.add_option("--kernel", |
| default=False, |
| action="store_true", |
| help="process kernel entries [default: %default]") |
| parser.add_option("--disasm-top", |
| default=0, |
| type="int", |
| help=("number of top symbols to disassemble and annotate " |
| "[default: %default]")) |
| parser.add_option("--disasm-all", |
| default=False, |
| action="store_true", |
| help=("disassemble and annotate all used symbols " |
| "[default: %default]")) |
| parser.add_option("--dot", |
| default=False, |
| action="store_true", |
| help="produce dot output (WIP) [default: %default]") |
| parser.add_option("--quiet", "-q", |
| default=False, |
| action="store_true", |
| help="no auxiliary messages [default: %default]") |
| parser.add_option("--gc-fake-mmap", |
| default="/tmp/__v8_gc__", |
| help="gc fake mmap file [default: %default]") |
| parser.add_option("--objdump", |
| default="/usr/bin/objdump", |
| help="objdump tool to use [default: %default]") |
| parser.add_option("--host-root", |
| default="", |
| help="Path to the host root [default: %default]") |
| options, args = parser.parse_args() |
| |
| if not options.quiet: |
| print("V8 log: %s, %s.ll" % (options.log, options.log)) |
| print("Perf trace file: %s" % options.trace) |
| |
| V8_GC_FAKE_MMAP = options.gc_fake_mmap |
| HOST_ROOT = options.host_root |
| if os.path.exists(options.objdump): |
| disasm.OBJDUMP_BIN = options.objdump |
| OBJDUMP_BIN = options.objdump |
| else: |
| print("Cannot find %s, falling back to default objdump" % options.objdump) |
| |
| # Stats. |
| events = 0 |
| ticks = 0 |
| missed_ticks = 0 |
| really_missed_ticks = 0 |
| optimized_ticks = 0 |
| generated_ticks = 0 |
| v8_internal_ticks = 0 |
| mmap_time = 0 |
| sample_time = 0 |
| |
| # Initialize the log reader. |
| code_map = CodeMap() |
| log_reader = LogReader(log_name=options.log + ".ll", |
| code_map=code_map) |
| if not options.quiet: |
| print("Generated code architecture: %s" % log_reader.arch) |
| print() |
| sys.stdout.flush() |
| |
| # Process the code and trace logs. |
| library_repo = LibraryRepo() |
| log_reader.ReadUpToGC() |
| trace_reader = TraceReader(options.trace) |
| while True: |
| header, offset = trace_reader.ReadEventHeader() |
| if not header: |
| break |
| events += 1 |
| if header.type == PERF_RECORD_MMAP: |
| start = time.time() |
| mmap_info = trace_reader.ReadMmap(header, offset) |
| if mmap_info.filename == HOST_ROOT + V8_GC_FAKE_MMAP: |
| log_reader.ReadUpToGC() |
| else: |
| library_repo.Load(mmap_info, code_map, options) |
| mmap_time += time.time() - start |
| elif header.type == PERF_RECORD_MMAP2: |
| start = time.time() |
| mmap_info = trace_reader.ReadMmap2(header, offset) |
| if mmap_info.filename == HOST_ROOT + V8_GC_FAKE_MMAP: |
| log_reader.ReadUpToGC() |
| else: |
| library_repo.Load(mmap_info, code_map, options) |
| mmap_time += time.time() - start |
| elif header.type == PERF_RECORD_SAMPLE: |
| ticks += 1 |
| start = time.time() |
| sample = trace_reader.ReadSample(header, offset) |
| code = code_map.Find(sample.ip) |
| if code: |
| code.Tick(sample.ip) |
| if code.codetype == Code.OPTIMIZED: |
| optimized_ticks += 1 |
| elif code.codetype == Code.FULL_CODEGEN: |
| generated_ticks += 1 |
| elif code.codetype == Code.V8INTERNAL: |
| v8_internal_ticks += 1 |
| else: |
| missed_ticks += 1 |
| if not library_repo.Tick(sample.ip) and not code: |
| really_missed_ticks += 1 |
| if trace_reader.callchain_supported: |
| for ip in sample.ips: |
| caller_code = code_map.Find(ip) |
| if caller_code: |
| if code: |
| caller_code.CalleeTick(code) |
| code = caller_code |
| sample_time += time.time() - start |
| |
| if options.dot: |
| PrintDot(code_map, options) |
| else: |
| PrintReport(code_map, library_repo, log_reader.arch, ticks, options) |
| |
| if not options.quiet: |
| def PrintTicks(number, total, description): |
| print("%10d %5.1f%% ticks in %s" % |
| (number, 100.0*number/total, description)) |
| print() |
| print("Stats:") |
| print("%10d total trace events" % events) |
| print("%10d total ticks" % ticks) |
| print("%10d ticks not in symbols" % missed_ticks) |
| unaccounted = "unaccounted ticks" |
| if really_missed_ticks > 0: |
| unaccounted += " (probably in the kernel, try --kernel)" |
| PrintTicks(really_missed_ticks, ticks, unaccounted) |
| PrintTicks(optimized_ticks, ticks, "ticks in optimized code") |
| PrintTicks(generated_ticks, ticks, "ticks in other lazily compiled code") |
| PrintTicks(v8_internal_ticks, ticks, "ticks in v8::internal::*") |
| print("%10d total symbols" % len([c for c in code_map.AllCode()])) |
| print("%10d used symbols" % len([c for c in code_map.UsedCode()])) |
| print("%9.2fs library processing time" % mmap_time) |
| print("%9.2fs tick processing time" % sample_time) |
| |
| log_reader.Dispose() |
| trace_reader.Dispose() |