| //===-- xray_profile_collector.cc ------------------------------*- C++ -*-===// |
| // |
| // The LLVM Compiler Infrastructure |
| // |
| // This file is distributed under the University of Illinois Open Source |
| // License. See LICENSE.TXT for details. |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| // This file is a part of XRay, a dynamic runtime instrumentation system. |
| // |
| // This implements the interface for the profileCollectorService. |
| // |
| //===----------------------------------------------------------------------===// |
| #include "xray_profile_collector.h" |
| #include "sanitizer_common/sanitizer_allocator_internal.h" |
| #include "sanitizer_common/sanitizer_common.h" |
| #include "sanitizer_common/sanitizer_vector.h" |
| #include "xray_profiling_flags.h" |
| #include <memory> |
| #include <pthread.h> |
| #include <utility> |
| |
| namespace __xray { |
| namespace profileCollectorService { |
| |
| namespace { |
| |
| SpinMutex GlobalMutex; |
| struct ThreadTrie { |
| tid_t TId; |
| FunctionCallTrie *Trie; |
| }; |
| |
| struct ProfileBuffer { |
| void *Data; |
| size_t Size; |
| }; |
| |
| // Current version of the profile format. |
| constexpr u64 XRayProfilingVersion = 0x20180424; |
| |
| // Identifier for XRay profiling files 'xrayprof' in hex. |
| constexpr u64 XRayMagicBytes = 0x7872617970726f66; |
| |
| struct XRayProfilingFileHeader { |
| const u64 MagicBytes = XRayMagicBytes; |
| const u64 Version = XRayProfilingVersion; |
| u64 Timestamp = 0; // System time in nanoseconds. |
| u64 PID = 0; // Process ID. |
| }; |
| |
| struct BlockHeader { |
| u32 BlockSize; |
| u32 BlockNum; |
| u64 ThreadId; |
| }; |
| |
| // These need to be pointers that point to heap/internal-allocator-allocated |
| // objects because these are accessed even at program exit. |
| Vector<ThreadTrie> *ThreadTries = nullptr; |
| Vector<ProfileBuffer> *ProfileBuffers = nullptr; |
| FunctionCallTrie::Allocators *GlobalAllocators = nullptr; |
| |
| } // namespace |
| |
| void post(const FunctionCallTrie &T, tid_t TId) { |
| static pthread_once_t Once = PTHREAD_ONCE_INIT; |
| pthread_once(&Once, +[] { |
| SpinMutexLock Lock(&GlobalMutex); |
| GlobalAllocators = reinterpret_cast<FunctionCallTrie::Allocators *>( |
| InternalAlloc(sizeof(FunctionCallTrie::Allocators))); |
| new (GlobalAllocators) FunctionCallTrie::Allocators(); |
| *GlobalAllocators = FunctionCallTrie::InitAllocatorsCustom( |
| profilingFlags()->global_allocator_max); |
| ThreadTries = reinterpret_cast<Vector<ThreadTrie> *>( |
| InternalAlloc(sizeof(Vector<ThreadTrie>))); |
| new (ThreadTries) Vector<ThreadTrie>(); |
| ProfileBuffers = reinterpret_cast<Vector<ProfileBuffer> *>( |
| InternalAlloc(sizeof(Vector<ProfileBuffer>))); |
| new (ProfileBuffers) Vector<ProfileBuffer>(); |
| }); |
| DCHECK_NE(GlobalAllocators, nullptr); |
| DCHECK_NE(ThreadTries, nullptr); |
| DCHECK_NE(ProfileBuffers, nullptr); |
| |
| ThreadTrie *Item = nullptr; |
| { |
| SpinMutexLock Lock(&GlobalMutex); |
| if (GlobalAllocators == nullptr) |
| return; |
| |
| Item = ThreadTries->PushBack(); |
| Item->TId = TId; |
| |
| // Here we're using the internal allocator instead of the managed allocator |
| // because: |
| // |
| // 1) We're not using the segmented array data structure to host |
| // FunctionCallTrie objects. We're using a Vector (from sanitizer_common) |
| // which works like a std::vector<...> keeping elements contiguous in |
| // memory. The segmented array data structure assumes that elements are |
| // trivially destructible, where FunctionCallTrie isn't. |
| // |
| // 2) Using a managed allocator means we need to manage that separately, |
| // which complicates the nature of this code. To get around that, we're |
| // using the internal allocator instead, which has its own global state |
| // and is decoupled from the lifetime management required by the managed |
| // allocator we have in XRay. |
| // |
| Item->Trie = reinterpret_cast<FunctionCallTrie *>(InternalAlloc( |
| sizeof(FunctionCallTrie), nullptr, alignof(FunctionCallTrie))); |
| DCHECK_NE(Item->Trie, nullptr); |
| new (Item->Trie) FunctionCallTrie(*GlobalAllocators); |
| } |
| |
| T.deepCopyInto(*Item->Trie); |
| } |
| |
| // A PathArray represents the function id's representing a stack trace. In this |
| // context a path is almost always represented from the leaf function in a call |
| // stack to a root of the call trie. |
| using PathArray = Array<int32_t>; |
| |
| struct ProfileRecord { |
| using PathAllocator = typename PathArray::AllocatorType; |
| |
| // The Path in this record is the function id's from the leaf to the root of |
| // the function call stack as represented from a FunctionCallTrie. |
| PathArray *Path = nullptr; |
| const FunctionCallTrie::Node *Node = nullptr; |
| |
| // Constructor for in-place construction. |
| ProfileRecord(PathAllocator &A, const FunctionCallTrie::Node *N) |
| : Path([&] { |
| auto P = |
| reinterpret_cast<PathArray *>(InternalAlloc(sizeof(PathArray))); |
| new (P) PathArray(A); |
| return P; |
| }()), |
| Node(N) {} |
| }; |
| |
| namespace { |
| |
| using ProfileRecordArray = Array<ProfileRecord>; |
| |
| // Walk a depth-first traversal of each root of the FunctionCallTrie to generate |
| // the path(s) and the data associated with the path. |
| static void populateRecords(ProfileRecordArray &PRs, |
| ProfileRecord::PathAllocator &PA, |
| const FunctionCallTrie &Trie) { |
| using StackArray = Array<const FunctionCallTrie::Node *>; |
| using StackAllocator = typename StackArray::AllocatorType; |
| StackAllocator StackAlloc(profilingFlags()->stack_allocator_max); |
| StackArray DFSStack(StackAlloc); |
| for (const auto R : Trie.getRoots()) { |
| DFSStack.Append(R); |
| while (!DFSStack.empty()) { |
| auto Node = DFSStack.back(); |
| DFSStack.trim(1); |
| auto Record = PRs.AppendEmplace(PA, Node); |
| if (Record == nullptr) |
| return; |
| DCHECK_NE(Record, nullptr); |
| |
| // Traverse the Node's parents and as we're doing so, get the FIds in |
| // the order they appear. |
| for (auto N = Node; N != nullptr; N = N->Parent) |
| Record->Path->Append(N->FId); |
| DCHECK(!Record->Path->empty()); |
| |
| for (const auto C : Node->Callees) |
| DFSStack.Append(C.NodePtr); |
| } |
| } |
| } |
| |
| static void serializeRecords(ProfileBuffer *Buffer, const BlockHeader &Header, |
| const ProfileRecordArray &ProfileRecords) { |
| auto NextPtr = static_cast<char *>( |
| internal_memcpy(Buffer->Data, &Header, sizeof(Header))) + |
| sizeof(Header); |
| for (const auto &Record : ProfileRecords) { |
| // List of IDs follow: |
| for (const auto FId : *Record.Path) |
| NextPtr = |
| static_cast<char *>(internal_memcpy(NextPtr, &FId, sizeof(FId))) + |
| sizeof(FId); |
| |
| // Add the sentinel here. |
| constexpr int32_t SentinelFId = 0; |
| NextPtr = static_cast<char *>( |
| internal_memset(NextPtr, SentinelFId, sizeof(SentinelFId))) + |
| sizeof(SentinelFId); |
| |
| // Add the node data here. |
| NextPtr = |
| static_cast<char *>(internal_memcpy(NextPtr, &Record.Node->CallCount, |
| sizeof(Record.Node->CallCount))) + |
| sizeof(Record.Node->CallCount); |
| NextPtr = static_cast<char *>( |
| internal_memcpy(NextPtr, &Record.Node->CumulativeLocalTime, |
| sizeof(Record.Node->CumulativeLocalTime))) + |
| sizeof(Record.Node->CumulativeLocalTime); |
| } |
| |
| DCHECK_EQ(NextPtr - static_cast<char *>(Buffer->Data), Buffer->Size); |
| } |
| |
| } // namespace |
| |
| void serialize() { |
| SpinMutexLock Lock(&GlobalMutex); |
| |
| // Clear out the global ProfileBuffers. |
| for (uptr I = 0; I < ProfileBuffers->Size(); ++I) |
| InternalFree((*ProfileBuffers)[I].Data); |
| ProfileBuffers->Reset(); |
| |
| if (ThreadTries->Size() == 0) |
| return; |
| |
| // Then repopulate the global ProfileBuffers. |
| for (u32 I = 0; I < ThreadTries->Size(); ++I) { |
| using ProfileRecordAllocator = typename ProfileRecordArray::AllocatorType; |
| ProfileRecordAllocator PRAlloc(profilingFlags()->global_allocator_max); |
| ProfileRecord::PathAllocator PathAlloc( |
| profilingFlags()->global_allocator_max); |
| ProfileRecordArray ProfileRecords(PRAlloc); |
| |
| // First, we want to compute the amount of space we're going to need. We'll |
| // use a local allocator and an __xray::Array<...> to store the intermediary |
| // data, then compute the size as we're going along. Then we'll allocate the |
| // contiguous space to contain the thread buffer data. |
| const auto &Trie = *(*ThreadTries)[I].Trie; |
| if (Trie.getRoots().empty()) |
| continue; |
| populateRecords(ProfileRecords, PathAlloc, Trie); |
| DCHECK(!Trie.getRoots().empty()); |
| DCHECK(!ProfileRecords.empty()); |
| |
| // Go through each record, to compute the sizes. |
| // |
| // header size = block size (4 bytes) |
| // + block number (4 bytes) |
| // + thread id (8 bytes) |
| // record size = path ids (4 bytes * number of ids + sentinel 4 bytes) |
| // + call count (8 bytes) |
| // + local time (8 bytes) |
| // + end of record (8 bytes) |
| u32 CumulativeSizes = 0; |
| for (const auto &Record : ProfileRecords) |
| CumulativeSizes += 20 + (4 * Record.Path->size()); |
| |
| BlockHeader Header{16 + CumulativeSizes, I, (*ThreadTries)[I].TId}; |
| auto Buffer = ProfileBuffers->PushBack(); |
| Buffer->Size = sizeof(Header) + CumulativeSizes; |
| Buffer->Data = InternalAlloc(Buffer->Size, nullptr, 64); |
| DCHECK_NE(Buffer->Data, nullptr); |
| serializeRecords(Buffer, Header, ProfileRecords); |
| |
| // Now clean up the ProfileRecords array, one at a time. |
| for (auto &Record : ProfileRecords) { |
| Record.Path->~PathArray(); |
| InternalFree(Record.Path); |
| } |
| } |
| } |
| |
| void reset() { |
| SpinMutexLock Lock(&GlobalMutex); |
| if (ProfileBuffers != nullptr) { |
| // Clear out the profile buffers that have been serialized. |
| for (uptr I = 0; I < ProfileBuffers->Size(); ++I) |
| InternalFree((*ProfileBuffers)[I].Data); |
| ProfileBuffers->Reset(); |
| InternalFree(ProfileBuffers); |
| ProfileBuffers = nullptr; |
| } |
| |
| if (ThreadTries != nullptr) { |
| // Clear out the function call tries per thread. |
| for (uptr I = 0; I < ThreadTries->Size(); ++I) { |
| auto &T = (*ThreadTries)[I]; |
| T.Trie->~FunctionCallTrie(); |
| InternalFree(T.Trie); |
| } |
| ThreadTries->Reset(); |
| InternalFree(ThreadTries); |
| ThreadTries = nullptr; |
| } |
| |
| // Reset the global allocators. |
| if (GlobalAllocators != nullptr) { |
| GlobalAllocators->~Allocators(); |
| InternalFree(GlobalAllocators); |
| GlobalAllocators = nullptr; |
| } |
| GlobalAllocators = reinterpret_cast<FunctionCallTrie::Allocators *>( |
| InternalAlloc(sizeof(FunctionCallTrie::Allocators))); |
| new (GlobalAllocators) FunctionCallTrie::Allocators(); |
| *GlobalAllocators = FunctionCallTrie::InitAllocators(); |
| ThreadTries = reinterpret_cast<Vector<ThreadTrie> *>( |
| InternalAlloc(sizeof(Vector<ThreadTrie>))); |
| new (ThreadTries) Vector<ThreadTrie>(); |
| ProfileBuffers = reinterpret_cast<Vector<ProfileBuffer> *>( |
| InternalAlloc(sizeof(Vector<ProfileBuffer>))); |
| new (ProfileBuffers) Vector<ProfileBuffer>(); |
| } |
| |
| XRayBuffer nextBuffer(XRayBuffer B) { |
| SpinMutexLock Lock(&GlobalMutex); |
| |
| if (ProfileBuffers == nullptr || ProfileBuffers->Size() == 0) |
| return {nullptr, 0}; |
| |
| static pthread_once_t Once = PTHREAD_ONCE_INIT; |
| static typename std::aligned_storage<sizeof(XRayProfilingFileHeader)>::type |
| FileHeaderStorage; |
| pthread_once(&Once, |
| +[] { new (&FileHeaderStorage) XRayProfilingFileHeader{}; }); |
| |
| if (UNLIKELY(B.Data == nullptr)) { |
| // The first buffer should always contain the file header information. |
| auto &FileHeader = |
| *reinterpret_cast<XRayProfilingFileHeader *>(&FileHeaderStorage); |
| FileHeader.Timestamp = NanoTime(); |
| FileHeader.PID = internal_getpid(); |
| return {&FileHeaderStorage, sizeof(XRayProfilingFileHeader)}; |
| } |
| |
| if (UNLIKELY(B.Data == &FileHeaderStorage)) |
| return {(*ProfileBuffers)[0].Data, (*ProfileBuffers)[0].Size}; |
| |
| BlockHeader Header; |
| internal_memcpy(&Header, B.Data, sizeof(BlockHeader)); |
| auto NextBlock = Header.BlockNum + 1; |
| if (NextBlock < ProfileBuffers->Size()) |
| return {(*ProfileBuffers)[NextBlock].Data, |
| (*ProfileBuffers)[NextBlock].Size}; |
| return {nullptr, 0}; |
| } |
| |
| } // namespace profileCollectorService |
| } // namespace __xray |