src/v8/tools/ll_prof.py - cobalt - Git at Google

 #!/usr/bin/env python
 #
 # Copyright 2012 the V8 project authors. All rights reserved.
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions are
 # met:
 #
 #     * Redistributions of source code must retain the above copyright
 #       notice, this list of conditions and the following disclaimer.
 #     * Redistributions in binary form must reproduce the above
 #       copyright notice, this list of conditions and the following
 #       disclaimer in the documentation and/or other materials provided
 #       with the distribution.
 #     * Neither the name of Google Inc. nor the names of its
 #       contributors may be used to endorse or promote products derived
 #       from this software without specific prior written permission.
 #
 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


 # for py2/py3 compatibility
 from __future__ import print_function

 import bisect
 import collections
 import ctypes
 import disasm
 import mmap
 import optparse
 import os
 import re
 import subprocess
 import sys
 import time


 USAGE="""usage: %prog [OPTION]...

 Analyses V8 and perf logs to produce profiles.

 Perf logs can be collected using a command like:
   $ perf record -R -e cycles -c 10000 -f -i ./d8 bench.js --ll-prof
   # -R: collect all data
   # -e cycles: use cpu-cycles event (run "perf list" for details)
   # -c 10000: write a sample after each 10000 events
   # -f: force output file overwrite
   # -i: limit profiling to our process and the kernel
   # --ll-prof shell flag enables the right V8 logs
 This will produce a binary trace file (perf.data) that %prog can analyse.

 IMPORTANT:
   The kernel has an internal maximum for events per second, it is 100K by
   default. That's not enough for "-c 10000". Set it to some higher value:
   $ echo 10000000 | sudo tee /proc/sys/kernel/perf_event_max_sample_rate
   You can also make the warning about kernel address maps go away:
   $ echo 0 | sudo tee /proc/sys/kernel/kptr_restrict

 We have a convenience script that handles all of the above for you:
   $ tools/run-llprof.sh ./d8 bench.js

 Examples:
   # Print flat profile with annotated disassembly for the 10 top
   # symbols. Use default log names.
   $ %prog --disasm-top=10

   # Print flat profile with annotated disassembly for all used symbols.
   # Use default log names and include kernel symbols into analysis.
   $ %prog --disasm-all --kernel

   # Print flat profile. Use custom log names.
   $ %prog --log=foo.log --trace=foo.data
 """


 JS_ORIGIN = "js"


 class Code(object):
   """Code object."""

   _id = 0
   UNKNOWN = 0
   V8INTERNAL = 1
   FULL_CODEGEN = 2
   OPTIMIZED = 3

   def __init__(self, name, start_address, end_address, origin, origin_offset):
     self.id = Code._id
     Code._id += 1
     self.name = name
     self.other_names = None
     self.start_address = start_address
     self.end_address = end_address
     self.origin = origin
     self.origin_offset = origin_offset
     self.self_ticks = 0
     self.self_ticks_map = None
     self.callee_ticks = None
     if name.startswith("LazyCompile:*"):
       self.codetype = Code.OPTIMIZED
     elif name.startswith("LazyCompile:"):
       self.codetype = Code.FULL_CODEGEN
     elif name.startswith("v8::internal::"):
       self.codetype = Code.V8INTERNAL
     else:
       self.codetype = Code.UNKNOWN

   def AddName(self, name):
     assert self.name != name
     if self.other_names is None:
       self.other_names = [name]
       return
     if not name in self.other_names:
       self.other_names.append(name)

   def FullName(self):
     if self.other_names is None:
       return self.name
     self.other_names.sort()
     return "%s (aka %s)" % (self.name, ", ".join(self.other_names))

   def IsUsed(self):
     return self.self_ticks > 0 or self.callee_ticks is not None

   def Tick(self, pc):
     self.self_ticks += 1
     if self.self_ticks_map is None:
       self.self_ticks_map = collections.defaultdict(lambda: 0)
     offset = pc - self.start_address
     self.self_ticks_map[offset] += 1

   def CalleeTick(self, callee):
     if self.callee_ticks is None:
       self.callee_ticks = collections.defaultdict(lambda: 0)
     self.callee_ticks[callee] += 1

   def PrintAnnotated(self, arch, options):
     if self.self_ticks_map is None:
       ticks_map = []
     else:
       ticks_map = self.self_ticks_map.items()
     # Convert the ticks map to offsets and counts arrays so that later
     # we can do binary search in the offsets array.
     ticks_map.sort(key=lambda t: t[0])
     ticks_offsets = [t[0] for t in ticks_map]
     ticks_counts = [t[1] for t in ticks_map]
     # Get a list of disassembled lines and their addresses.
     lines = self._GetDisasmLines(arch, options)
     if len(lines) == 0:
       return
     # Print annotated lines.
     address = lines[0][0]
     total_count = 0
     for i in range(len(lines)):
       start_offset = lines[i][0] - address
       if i == len(lines) - 1:
         end_offset = self.end_address - self.start_address
       else:
         end_offset = lines[i + 1][0] - address
       # Ticks (reported pc values) are not always precise, i.e. not
       # necessarily point at instruction starts. So we have to search
       # for ticks that touch the current instruction line.
       j = bisect.bisect_left(ticks_offsets, end_offset)
       count = 0
       for offset, cnt in reversed(zip(ticks_offsets[:j], ticks_counts[:j])):
         if offset < start_offset:
           break
         count += cnt
       total_count += count
       percent = 100.0 * count / self.self_ticks
       offset = lines[i][0]
       if percent >= 0.01:
         # 5 spaces for tick count
         # 1 space following
         # 1 for '|'
         # 1 space following
         # 6 for the percentage number, incl. the '.'
         # 1 for the '%' sign
         # => 15
         print("%5d | %6.2f%% %x(%d): %s" % (count, percent, offset, offset, lines[i][1]))
       else:
         print("%s %x(%d): %s" % (" " * 15, offset, offset, lines[i][1]))
     print()
     assert total_count == self.self_ticks, \
         "Lost ticks (%d != %d) in %s" % (total_count, self.self_ticks, self)

   def __str__(self):
     return "%s [0x%x, 0x%x) size: %d origin: %s" % (
       self.name,
       self.start_address,
       self.end_address,
       self.end_address - self.start_address,
       self.origin)

   def _GetDisasmLines(self, arch, options):
     if self.origin == JS_ORIGIN:
       inplace = False
       filename = options.log + ".ll"
     else:
       inplace = True
       filename = self.origin
     return disasm.GetDisasmLines(filename,
                                  self.origin_offset,
                                  self.end_address - self.start_address,
                                  arch,
                                  inplace)


 class CodePage(object):
   """Group of adjacent code objects."""

   SHIFT = 20  # 1M pages
   SIZE = (1 << SHIFT)
   MASK = ~(SIZE - 1)

   @staticmethod
   def PageAddress(address):
     return address & CodePage.MASK

   @staticmethod
   def PageId(address):
     return address >> CodePage.SHIFT

   @staticmethod
   def PageAddressFromId(id):
     return id << CodePage.SHIFT

   def __init__(self, address):
     self.address = address
     self.code_objects = []

   def Add(self, code):
     self.code_objects.append(code)

   def Remove(self, code):
     self.code_objects.remove(code)

   def Find(self, pc):
     code_objects = self.code_objects
     for i, code in enumerate(code_objects):
       if code.start_address <= pc < code.end_address:
         code_objects[0], code_objects[i] = code, code_objects[0]
         return code
     return None

   def __iter__(self):
     return self.code_objects.__iter__()


 class CodeMap(object):
   """Code object map."""

   def __init__(self):
     self.pages = {}
     self.min_address = 1 << 64
     self.max_address = -1

   def Add(self, code, max_pages=-1):
     page_id = CodePage.PageId(code.start_address)
     limit_id = CodePage.PageId(code.end_address + CodePage.SIZE - 1)
     pages = 0
     while page_id < limit_id:
       if max_pages >= 0 and pages > max_pages:
         print("Warning: page limit (%d) reached for %s [%s]" % (
             max_pages, code.name, code.origin), file=sys.stderr)
         break
       if page_id in self.pages:
         page = self.pages[page_id]
       else:
         page = CodePage(CodePage.PageAddressFromId(page_id))
         self.pages[page_id] = page
       page.Add(code)
       page_id += 1
       pages += 1
     self.min_address = min(self.min_address, code.start_address)
     self.max_address = max(self.max_address, code.end_address)

   def Remove(self, code):
     page_id = CodePage.PageId(code.start_address)
     limit_id = CodePage.PageId(code.end_address + CodePage.SIZE - 1)
     removed = False
     while page_id < limit_id:
       if page_id not in self.pages:
         page_id += 1
         continue
       page = self.pages[page_id]
       page.Remove(code)
       removed = True
       page_id += 1
     return removed

   def AllCode(self):
     for page in self.pages.itervalues():
       for code in page:
         if CodePage.PageAddress(code.start_address) == page.address:
           yield code

   def UsedCode(self):
     for code in self.AllCode():
       if code.IsUsed():
         yield code

   def Print(self):
     for code in self.AllCode():
       print(code)

   def Find(self, pc):
     if pc < self.min_address or pc >= self.max_address:
       return None
     page_id = CodePage.PageId(pc)
     if page_id not in self.pages:
       return None
     return self.pages[page_id].Find(pc)


 class CodeInfo(object):
   """Generic info about generated code objects."""

   def __init__(self, arch, header_size):
     self.arch = arch
     self.header_size = header_size


 class LogReader(object):
   """V8 low-level (binary) log reader."""

   _ARCH_TO_POINTER_TYPE_MAP = {
     "ia32": ctypes.c_uint32,
     "arm": ctypes.c_uint32,
     "mips": ctypes.c_uint32,
     "x64": ctypes.c_uint64,
     "arm64": ctypes.c_uint64
   }

   _CODE_CREATE_TAG = "C"
   _CODE_MOVE_TAG = "M"
   _CODE_MOVING_GC_TAG = "G"

   def __init__(self, log_name, code_map):
     self.log_file = open(log_name, "r")
     self.log = mmap.mmap(self.log_file.fileno(), 0, mmap.MAP_PRIVATE)
     self.log_pos = 0
     self.code_map = code_map

     self.arch = self.log[:self.log.find("\0")]
     self.log_pos += len(self.arch) + 1
     assert self.arch in LogReader._ARCH_TO_POINTER_TYPE_MAP, \
         "Unsupported architecture %s" % self.arch
     pointer_type = LogReader._ARCH_TO_POINTER_TYPE_MAP[self.arch]

     self.code_create_struct = LogReader._DefineStruct([
         ("name_size", ctypes.c_int32),
         ("code_address", pointer_type),
         ("code_size", ctypes.c_int32)])

     self.code_move_struct = LogReader._DefineStruct([
         ("from_address", pointer_type),
         ("to_address", pointer_type)])

     self.code_delete_struct = LogReader._DefineStruct([
         ("address", pointer_type)])

   def ReadUpToGC(self):
     while self.log_pos < self.log.size():
       tag = self.log[self.log_pos]
       self.log_pos += 1

       if tag == LogReader._CODE_MOVING_GC_TAG:
         return

       if tag == LogReader._CODE_CREATE_TAG:
         event = self.code_create_struct.from_buffer(self.log, self.log_pos)
         self.log_pos += ctypes.sizeof(event)
         start_address = event.code_address
         end_address = start_address + event.code_size
         name = self.log[self.log_pos:self.log_pos + event.name_size]
         origin = JS_ORIGIN
         self.log_pos += event.name_size
         origin_offset = self.log_pos
         self.log_pos += event.code_size
         code = Code(name, start_address, end_address, origin, origin_offset)
         conficting_code = self.code_map.Find(start_address)
         if conficting_code:
           if not (conficting_code.start_address == code.start_address and
             conficting_code.end_address == code.end_address):
             self.code_map.Remove(conficting_code)
           else:
             LogReader._HandleCodeConflict(conficting_code, code)
             # TODO(vitalyr): this warning is too noisy because of our
             # attempts to reconstruct code log from the snapshot.
             # print >>sys.stderr, \
             #     "Warning: Skipping duplicate code log entry %s" % code
             continue
         self.code_map.Add(code)
         continue

       if tag == LogReader._CODE_MOVE_TAG:
         event = self.code_move_struct.from_buffer(self.log, self.log_pos)
         self.log_pos += ctypes.sizeof(event)
         old_start_address = event.from_address
         new_start_address = event.to_address
         if old_start_address == new_start_address:
           # Skip useless code move entries.
           continue
         code = self.code_map.Find(old_start_address)
         if not code:
           print("Warning: Not found %x" % old_start_address, file=sys.stderr)
           continue
         assert code.start_address == old_start_address, \
             "Inexact move address %x for %s" % (old_start_address, code)
         self.code_map.Remove(code)
         size = code.end_address - code.start_address
         code.start_address = new_start_address
         code.end_address = new_start_address + size
         self.code_map.Add(code)
         continue

       assert False, "Unknown tag %s" % tag

   def Dispose(self):
     self.log.close()
     self.log_file.close()

   @staticmethod
   def _DefineStruct(fields):
     class Struct(ctypes.Structure):
       _fields_ = fields
     return Struct

   @staticmethod
   def _HandleCodeConflict(old_code, new_code):
     assert (old_code.start_address == new_code.start_address and
             old_code.end_address == new_code.end_address), \
         "Conficting code log entries %s and %s" % (old_code, new_code)
     if old_code.name == new_code.name:
       return
     # Code object may be shared by a few functions. Collect the full
     # set of names.
     old_code.AddName(new_code.name)


 class Descriptor(object):
   """Descriptor of a structure in the binary trace log."""

   CTYPE_MAP = {
     "u16": ctypes.c_uint16,
     "u32": ctypes.c_uint32,
     "u64": ctypes.c_uint64
   }

   def __init__(self, fields):
     class TraceItem(ctypes.Structure):
       _fields_ = Descriptor.CtypesFields(fields)

       def __str__(self):
         return ", ".join("%s: %s" % (field, self.__getattribute__(field))
                          for field, _ in TraceItem._fields_)

     self.ctype = TraceItem

   def Read(self, trace, offset):
     return self.ctype.from_buffer(trace, offset)

   @staticmethod
   def CtypesFields(fields):
     return [(field, Descriptor.CTYPE_MAP[format]) for (field, format) in fields]


 # Please see http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=tree;f=tools/perf
 # for the gory details.


 # Reference: struct perf_file_header in kernel/tools/perf/util/header.h
 TRACE_HEADER_DESC = Descriptor([
   ("magic", "u64"),
   ("size", "u64"),
   ("attr_size", "u64"),
   ("attrs_offset", "u64"),
   ("attrs_size", "u64"),
   ("data_offset", "u64"),
   ("data_size", "u64"),
   ("event_types_offset", "u64"),
   ("event_types_size", "u64")
 ])


 # Reference: /usr/include/linux/perf_event.h
 PERF_EVENT_ATTR_DESC = Descriptor([
   ("type", "u32"),
   ("size", "u32"),
   ("config", "u64"),
   ("sample_period_or_freq", "u64"),
   ("sample_type", "u64"),
   ("read_format", "u64"),
   ("flags", "u64"),
   ("wakeup_events_or_watermark", "u32"),
   ("bp_type", "u32"),
   ("bp_addr", "u64"),
   ("bp_len", "u64")
 ])


 # Reference: /usr/include/linux/perf_event.h
 PERF_EVENT_HEADER_DESC = Descriptor([
   ("type", "u32"),
   ("misc", "u16"),
   ("size", "u16")
 ])


 # Reference: kernel/tools/perf/util/event.h
 PERF_MMAP_EVENT_BODY_DESC = Descriptor([
   ("pid", "u32"),
   ("tid", "u32"),
   ("addr", "u64"),
   ("len", "u64"),
   ("pgoff", "u64")
 ])

 # Reference: kernel/tools/perf/util/event.h
 PERF_MMAP2_EVENT_BODY_DESC = Descriptor([
   ("pid", "u32"),
   ("tid", "u32"),
   ("addr", "u64"),
   ("len", "u64"),
   ("pgoff", "u64"),
   ("maj", "u32"),
   ("min", "u32"),
   ("ino", "u64"),
   ("ino_generation", "u64"),
   ("prot", "u32"),
   ("flags","u32")
 ])

 # perf_event_attr.sample_type bits control the set of
 # perf_sample_event fields.
 PERF_SAMPLE_IP = 1 << 0
 PERF_SAMPLE_TID = 1 << 1
 PERF_SAMPLE_TIME = 1 << 2
 PERF_SAMPLE_ADDR = 1 << 3
 PERF_SAMPLE_READ = 1 << 4
 PERF_SAMPLE_CALLCHAIN = 1 << 5
 PERF_SAMPLE_ID = 1 << 6
 PERF_SAMPLE_CPU = 1 << 7
 PERF_SAMPLE_PERIOD = 1 << 8
 PERF_SAMPLE_STREAM_ID = 1 << 9
 PERF_SAMPLE_RAW = 1 << 10


 # Reference: /usr/include/perf_event.h, the comment for PERF_RECORD_SAMPLE.
 PERF_SAMPLE_EVENT_BODY_FIELDS = [
   ("ip", "u64", PERF_SAMPLE_IP),
   ("pid", "u32", PERF_SAMPLE_TID),
   ("tid", "u32", PERF_SAMPLE_TID),
   ("time", "u64", PERF_SAMPLE_TIME),
   ("addr", "u64", PERF_SAMPLE_ADDR),
   ("id", "u64", PERF_SAMPLE_ID),
   ("stream_id", "u64", PERF_SAMPLE_STREAM_ID),
   ("cpu", "u32", PERF_SAMPLE_CPU),
   ("res", "u32", PERF_SAMPLE_CPU),
   ("period", "u64", PERF_SAMPLE_PERIOD),
   # Don't want to handle read format that comes after the period and
   # before the callchain and has variable size.
   ("nr", "u64", PERF_SAMPLE_CALLCHAIN)
   # Raw data follows the callchain and is ignored.
 ]


 PERF_SAMPLE_EVENT_IP_FORMAT = "u64"


 PERF_RECORD_MMAP = 1
 PERF_RECORD_MMAP2 = 10
 PERF_RECORD_SAMPLE = 9


 class TraceReader(object):
   """Perf (linux-2.6/tools/perf) trace file reader."""

   _TRACE_HEADER_MAGIC = 4993446653023372624

   def __init__(self, trace_name):
     self.trace_file = open(trace_name, "r")
     self.trace = mmap.mmap(self.trace_file.fileno(), 0, mmap.MAP_PRIVATE)
     self.trace_header = TRACE_HEADER_DESC.Read(self.trace, 0)
     if self.trace_header.magic != TraceReader._TRACE_HEADER_MAGIC:
       print("Warning: unsupported trace header magic", file=sys.stderr)
     self.offset = self.trace_header.data_offset
     self.limit = self.trace_header.data_offset + self.trace_header.data_size
     assert self.limit <= self.trace.size(), \
         "Trace data limit exceeds trace file size"
     self.header_size = ctypes.sizeof(PERF_EVENT_HEADER_DESC.ctype)
     assert self.trace_header.attrs_size != 0, \
         "No perf event attributes found in the trace"
     perf_event_attr = PERF_EVENT_ATTR_DESC.Read(self.trace,
                                                 self.trace_header.attrs_offset)
     self.sample_event_body_desc = self._SampleEventBodyDesc(
         perf_event_attr.sample_type)
     self.callchain_supported = \
         (perf_event_attr.sample_type & PERF_SAMPLE_CALLCHAIN) != 0
     if self.callchain_supported:
       self.ip_struct = Descriptor.CTYPE_MAP[PERF_SAMPLE_EVENT_IP_FORMAT]
       self.ip_size = ctypes.sizeof(self.ip_struct)

   def ReadEventHeader(self):
     if self.offset >= self.limit:
       return None, 0
     offset = self.offset
     header = PERF_EVENT_HEADER_DESC.Read(self.trace, self.offset)
     self.offset += header.size
     return header, offset

   def ReadMmap(self, header, offset):
     mmap_info = PERF_MMAP_EVENT_BODY_DESC.Read(self.trace,
                                                offset + self.header_size)
     # Read null-terminated filename.
     filename = self.trace[offset + self.header_size + ctypes.sizeof(mmap_info):
                           offset + header.size]
     mmap_info.filename = HOST_ROOT + filename[:filename.find(chr(0))]
     return mmap_info

   def ReadMmap2(self, header, offset):
     mmap_info = PERF_MMAP2_EVENT_BODY_DESC.Read(self.trace,
                                                 offset + self.header_size)
     # Read null-terminated filename.
     filename = self.trace[offset + self.header_size + ctypes.sizeof(mmap_info):
                           offset + header.size]
     mmap_info.filename = HOST_ROOT + filename[:filename.find(chr(0))]
     return mmap_info

   def ReadSample(self, header, offset):
     sample = self.sample_event_body_desc.Read(self.trace,
                                               offset + self.header_size)
     if not self.callchain_supported:
       return sample
     sample.ips = []
     offset += self.header_size + ctypes.sizeof(sample)
     for _ in range(sample.nr):
       sample.ips.append(
         self.ip_struct.from_buffer(self.trace, offset).value)
       offset += self.ip_size
     return sample

   def Dispose(self):
     self.trace.close()
     self.trace_file.close()

   def _SampleEventBodyDesc(self, sample_type):
     assert (sample_type & PERF_SAMPLE_READ) == 0, \
            "Can't hande read format in samples"
     fields = [(field, format)
               for (field, format, bit) in PERF_SAMPLE_EVENT_BODY_FIELDS
               if (bit & sample_type) != 0]
     return Descriptor(fields)


 OBJDUMP_SECTION_HEADER_RE = re.compile(
   r"^\s*\d+\s(\.\S+)\s+[a-f0-9]")
 OBJDUMP_SYMBOL_LINE_RE = re.compile(
   r"^([a-f0-9]+)\s(.{7})\s(\S+)\s+([a-f0-9]+)\s+(?:\.hidden\s+)?(.*)$")
 OBJDUMP_DYNAMIC_SYMBOLS_START_RE = re.compile(
   r"^DYNAMIC SYMBOL TABLE")
 OBJDUMP_SKIP_RE = re.compile(
   r"^.*ld\.so\.cache$")
 KERNEL_ALLSYMS_FILE = "/proc/kallsyms"
 PERF_KERNEL_ALLSYMS_RE = re.compile(
   r".*kallsyms.*")
 KERNEL_ALLSYMS_LINE_RE = re.compile(
   r"^([a-f0-9]+)\s(?:t|T)\s(\S+)$")


 class LibraryRepo(object):
   def __init__(self):
     self.infos = []
     self.names = set()
     self.ticks = {}


   def HasDynamicSymbols(self, filename):
     if filename.endswith(".ko"): return False
     process = subprocess.Popen(
       "%s -h %s" % (OBJDUMP_BIN, filename),
       shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
     pipe = process.stdout
     try:
       for line in pipe:
         match = OBJDUMP_SECTION_HEADER_RE.match(line)
         if match and match.group(1) == 'dynsym': return True
     finally:
       pipe.close()
     assert process.wait() == 0, "Failed to objdump -h %s" % filename
     return False


   def Load(self, mmap_info, code_map, options):
     # Skip kernel mmaps when requested using the fact that their tid
     # is 0.
     if mmap_info.tid == 0 and not options.kernel:
       return True
     if OBJDUMP_SKIP_RE.match(mmap_info.filename):
       return True
     if PERF_KERNEL_ALLSYMS_RE.match(mmap_info.filename):
       return self._LoadKernelSymbols(code_map)
     self.infos.append(mmap_info)
     mmap_info.ticks = 0
     mmap_info.unique_name = self._UniqueMmapName(mmap_info)
     if not os.path.exists(mmap_info.filename):
       return True
     # Request section headers (-h), symbols (-t), and dynamic symbols
     # (-T) from objdump.
     # Unfortunately, section headers span two lines, so we have to
     # keep the just seen section name (from the first line in each
     # section header) in the after_section variable.
     if self.HasDynamicSymbols(mmap_info.filename):
       dynamic_symbols = "-T"
     else:
       dynamic_symbols = ""
     process = subprocess.Popen(
       "%s -h -t %s -C %s" % (OBJDUMP_BIN, dynamic_symbols, mmap_info.filename),
       shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
     pipe = process.stdout
     after_section = None
     code_sections = set()
     reloc_sections = set()
     dynamic = False
     try:
       for line in pipe:
         if after_section:
           if line.find("CODE") != -1:
             code_sections.add(after_section)
           if line.find("RELOC") != -1:
             reloc_sections.add(after_section)
           after_section = None
           continue

         match = OBJDUMP_SECTION_HEADER_RE.match(line)
         if match:
           after_section = match.group(1)
           continue

         if OBJDUMP_DYNAMIC_SYMBOLS_START_RE.match(line):
           dynamic = True
           continue

         match = OBJDUMP_SYMBOL_LINE_RE.match(line)
         if match:
           start_address = int(match.group(1), 16)
           origin_offset = start_address
           flags = match.group(2)
           section = match.group(3)
           if section in code_sections:
             if dynamic or section in reloc_sections:
               start_address += mmap_info.addr
             size = int(match.group(4), 16)
             name = match.group(5)
             origin = mmap_info.filename
             code_map.Add(Code(name, start_address, start_address + size,
                               origin, origin_offset))
     finally:
       pipe.close()
     assert process.wait() == 0, "Failed to objdump %s" % mmap_info.filename

   def Tick(self, pc):
     for i, mmap_info in enumerate(self.infos):
       if mmap_info.addr <= pc < (mmap_info.addr + mmap_info.len):
         mmap_info.ticks += 1
         self.infos[0], self.infos[i] = mmap_info, self.infos[0]
         return True
     return False

   def _UniqueMmapName(self, mmap_info):
     name = mmap_info.filename
     index = 1
     while name in self.names:
       name = "%s-%d" % (mmap_info.filename, index)
       index += 1
     self.names.add(name)
     return name

   def _LoadKernelSymbols(self, code_map):
     if not os.path.exists(KERNEL_ALLSYMS_FILE):
       print("Warning: %s not found" % KERNEL_ALLSYMS_FILE, file=sys.stderr)
       return False
     kallsyms = open(KERNEL_ALLSYMS_FILE, "r")
     code = None
     for line in kallsyms:
       match = KERNEL_ALLSYMS_LINE_RE.match(line)
       if match:
         start_address = int(match.group(1), 16)
         end_address = start_address
         name = match.group(2)
         if code:
           code.end_address = start_address
           code_map.Add(code, 16)
         code = Code(name, start_address, end_address, "kernel", 0)
     return True


 def PrintReport(code_map, library_repo, arch, ticks, options):
   print("Ticks per symbol:")
   used_code = [code for code in code_map.UsedCode()]
   used_code.sort(key=lambda x: x.self_ticks, reverse=True)
   for i, code in enumerate(used_code):
     code_ticks = code.self_ticks
     print("%10d %5.1f%% %s [%s]" % (code_ticks, 100. * code_ticks / ticks,
                                     code.FullName(), code.origin))
     if options.disasm_all or i < options.disasm_top:
       code.PrintAnnotated(arch, options)
   print()
   print("Ticks per library:")
   mmap_infos = [m for m in library_repo.infos if m.ticks > 0]
   mmap_infos.sort(key=lambda m: m.ticks, reverse=True)
   for mmap_info in mmap_infos:
     mmap_ticks = mmap_info.ticks
     print("%10d %5.1f%% %s" % (mmap_ticks, 100. * mmap_ticks / ticks,
                                mmap_info.unique_name))


 def PrintDot(code_map, options):
   print("digraph G {")
   for code in code_map.UsedCode():
     if code.self_ticks < 10:
       continue
     print("n%d [shape=box,label=\"%s\"];" % (code.id, code.name))
     if code.callee_ticks:
       for callee, ticks in code.callee_ticks.iteritems():
         print("n%d -> n%d [label=\"%d\"];" % (code.id, callee.id, ticks))
   print("}")


 if __name__ == "__main__":
   parser = optparse.OptionParser(USAGE)
   parser.add_option("--log",
                     default="v8.log",
                     help="V8 log file name [default: %default]")
   parser.add_option("--trace",
                     default="perf.data",
                     help="perf trace file name [default: %default]")
   parser.add_option("--kernel",
                     default=False,
                     action="store_true",
                     help="process kernel entries [default: %default]")
   parser.add_option("--disasm-top",
                     default=0,
                     type="int",
                     help=("number of top symbols to disassemble and annotate "
                           "[default: %default]"))
   parser.add_option("--disasm-all",
                     default=False,
                     action="store_true",
                     help=("disassemble and annotate all used symbols "
                           "[default: %default]"))
   parser.add_option("--dot",
                     default=False,
                     action="store_true",
                     help="produce dot output (WIP) [default: %default]")
   parser.add_option("--quiet", "-q",
                     default=False,
                     action="store_true",
                     help="no auxiliary messages [default: %default]")
   parser.add_option("--gc-fake-mmap",
                     default="/tmp/__v8_gc__",
                     help="gc fake mmap file [default: %default]")
   parser.add_option("--objdump",
                     default="/usr/bin/objdump",
                     help="objdump tool to use [default: %default]")
   parser.add_option("--host-root",
                     default="",
                     help="Path to the host root [default: %default]")
   options, args = parser.parse_args()

   if not options.quiet:
     print("V8 log: %s, %s.ll" % (options.log, options.log))
     print("Perf trace file: %s" % options.trace)

   V8_GC_FAKE_MMAP = options.gc_fake_mmap
   HOST_ROOT = options.host_root
   if os.path.exists(options.objdump):
     disasm.OBJDUMP_BIN = options.objdump
     OBJDUMP_BIN = options.objdump
   else:
     print("Cannot find %s, falling back to default objdump" % options.objdump)

   # Stats.
   events = 0
   ticks = 0
   missed_ticks = 0
   really_missed_ticks = 0
   optimized_ticks = 0
   generated_ticks = 0
   v8_internal_ticks = 0
   mmap_time = 0
   sample_time = 0

   # Initialize the log reader.
   code_map = CodeMap()
   log_reader = LogReader(log_name=options.log + ".ll",
                          code_map=code_map)
   if not options.quiet:
     print("Generated code architecture: %s" % log_reader.arch)
     print()
     sys.stdout.flush()

   # Process the code and trace logs.
   library_repo = LibraryRepo()
   log_reader.ReadUpToGC()
   trace_reader = TraceReader(options.trace)
   while True:
     header, offset = trace_reader.ReadEventHeader()
     if not header:
       break
     events += 1
     if header.type == PERF_RECORD_MMAP:
       start = time.time()
       mmap_info = trace_reader.ReadMmap(header, offset)
       if mmap_info.filename == HOST_ROOT + V8_GC_FAKE_MMAP:
         log_reader.ReadUpToGC()
       else:
         library_repo.Load(mmap_info, code_map, options)
       mmap_time += time.time() - start
     elif header.type == PERF_RECORD_MMAP2:
       start = time.time()
       mmap_info = trace_reader.ReadMmap2(header, offset)
       if mmap_info.filename == HOST_ROOT + V8_GC_FAKE_MMAP:
         log_reader.ReadUpToGC()
       else:
         library_repo.Load(mmap_info, code_map, options)
       mmap_time += time.time() - start
     elif header.type == PERF_RECORD_SAMPLE:
       ticks += 1
       start = time.time()
       sample = trace_reader.ReadSample(header, offset)
       code = code_map.Find(sample.ip)
       if code:
         code.Tick(sample.ip)
         if code.codetype == Code.OPTIMIZED:
           optimized_ticks += 1
         elif code.codetype == Code.FULL_CODEGEN:
           generated_ticks += 1
         elif code.codetype == Code.V8INTERNAL:
           v8_internal_ticks += 1
       else:
         missed_ticks += 1
       if not library_repo.Tick(sample.ip) and not code:
         really_missed_ticks += 1
       if trace_reader.callchain_supported:
         for ip in sample.ips:
           caller_code = code_map.Find(ip)
           if caller_code:
             if code:
               caller_code.CalleeTick(code)
             code = caller_code
       sample_time += time.time() - start

   if options.dot:
     PrintDot(code_map, options)
   else:
     PrintReport(code_map, library_repo, log_reader.arch, ticks, options)

     if not options.quiet:
       def PrintTicks(number, total, description):
         print("%10d %5.1f%% ticks in %s" %
               (number, 100.0*number/total, description))
       print()
       print("Stats:")
       print("%10d total trace events" % events)
       print("%10d total ticks" % ticks)
       print("%10d ticks not in symbols" % missed_ticks)
       unaccounted = "unaccounted ticks"
       if really_missed_ticks > 0:
         unaccounted += " (probably in the kernel, try --kernel)"
       PrintTicks(really_missed_ticks, ticks, unaccounted)
       PrintTicks(optimized_ticks, ticks, "ticks in optimized code")
       PrintTicks(generated_ticks, ticks, "ticks in other lazily compiled code")
       PrintTicks(v8_internal_ticks, ticks, "ticks in v8::internal::*")
       print("%10d total symbols" % len([c for c in code_map.AllCode()]))
       print("%10d used symbols" % len([c for c in code_map.UsedCode()]))
       print("%9.2fs library processing time" % mmap_time)
       print("%9.2fs tick processing time" % sample_time)

   log_reader.Dispose()
   trace_reader.Dispose()