| //===-- xray_function_call_trie.h ------------------------------*- C++ -*-===// |
| // |
| // The LLVM Compiler Infrastructure |
| // |
| // This file is distributed under the University of Illinois Open Source |
| // License. See LICENSE.TXT for details. |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| // This file is a part of XRay, a dynamic runtime instrumentation system. |
| // |
| // This file defines the interface for a function call trie. |
| // |
| //===----------------------------------------------------------------------===// |
| #ifndef XRAY_FUNCTION_CALL_TRIE_H |
| #define XRAY_FUNCTION_CALL_TRIE_H |
| |
| #include "sanitizer_common/sanitizer_allocator_internal.h" |
| #include "xray_profiling_flags.h" |
| #include "xray_segmented_array.h" |
| #include <memory> // For placement new. |
| #include <utility> |
| |
| namespace __xray { |
| |
| /// A FunctionCallTrie represents the stack traces of XRay instrumented |
| /// functions that we've encountered, where a node corresponds to a function and |
| /// the path from the root to the node its stack trace. Each node in the trie |
| /// will contain some useful values, including: |
| /// |
| /// * The cumulative amount of time spent in this particular node/stack. |
| /// * The number of times this stack has appeared. |
| /// * A histogram of latencies for that particular node. |
| /// |
| /// Each node in the trie will also contain a list of callees, represented using |
| /// a Array<NodeIdPair> -- each NodeIdPair instance will contain the function |
| /// ID of the callee, and a pointer to the node. |
| /// |
| /// If we visualise this data structure, we'll find the following potential |
| /// representation: |
| /// |
| /// [function id node] -> [callees] [cumulative time] |
| /// [call counter] [latency histogram] |
| /// |
| /// As an example, when we have a function in this pseudocode: |
| /// |
| /// func f(N) { |
| /// g() |
| /// h() |
| /// for i := 1..N { j() } |
| /// } |
| /// |
| /// We may end up with a trie of the following form: |
| /// |
| /// f -> [ g, h, j ] [...] [1] [...] |
| /// g -> [ ... ] [...] [1] [...] |
| /// h -> [ ... ] [...] [1] [...] |
| /// j -> [ ... ] [...] [N] [...] |
| /// |
| /// If for instance the function g() called j() like so: |
| /// |
| /// func g() { |
| /// for i := 1..10 { j() } |
| /// } |
| /// |
| /// We'll find the following updated trie: |
| /// |
| /// f -> [ g, h, j ] [...] [1] [...] |
| /// g -> [ j' ] [...] [1] [...] |
| /// h -> [ ... ] [...] [1] [...] |
| /// j -> [ ... ] [...] [N] [...] |
| /// j' -> [ ... ] [...] [10] [...] |
| /// |
| /// Note that we'll have a new node representing the path `f -> g -> j'` with |
| /// isolated data. This isolation gives us a means of representing the stack |
| /// traces as a path, as opposed to a key in a table. The alternative |
| /// implementation here would be to use a separate table for the path, and use |
| /// hashes of the path as an identifier to accumulate the information. We've |
| /// moved away from this approach as it takes a lot of time to compute the hash |
| /// every time we need to update a function's call information as we're handling |
| /// the entry and exit events. |
| /// |
| /// This approach allows us to maintain a shadow stack, which represents the |
| /// currently executing path, and on function exits quickly compute the amount |
| /// of time elapsed from the entry, then update the counters for the node |
| /// already represented in the trie. This necessitates an efficient |
| /// representation of the various data structures (the list of callees must be |
| /// cache-aware and efficient to look up, and the histogram must be compact and |
| /// quick to update) to enable us to keep the overheads of this implementation |
| /// to the minimum. |
| class FunctionCallTrie { |
| public: |
| struct Node; |
| |
| // We use a NodeIdPair type instead of a std::pair<...> to not rely on the |
| // standard library types in this header. |
| struct NodeIdPair { |
| Node *NodePtr; |
| int32_t FId; |
| |
| // Constructor for inplace-construction. |
| NodeIdPair(Node *N, int32_t F) : NodePtr(N), FId(F) {} |
| }; |
| |
| using NodeIdPairArray = Array<NodeIdPair>; |
| using NodeIdPairAllocatorType = NodeIdPairArray::AllocatorType; |
| |
| // A Node in the FunctionCallTrie gives us a list of callees, the cumulative |
| // number of times this node actually appeared, the cumulative amount of time |
| // for this particular node including its children call times, and just the |
| // local time spent on this node. Each Node will have the ID of the XRay |
| // instrumented function that it is associated to. |
| struct Node { |
| Node *Parent; |
| NodeIdPairArray Callees; |
| int64_t CallCount; |
| int64_t CumulativeLocalTime; // Typically in TSC deltas, not wall-time. |
| int32_t FId; |
| |
| // We add a constructor here to allow us to inplace-construct through |
| // Array<...>'s AppendEmplace. |
| Node(Node *P, NodeIdPairAllocatorType &A, int64_t CC, int64_t CLT, |
| int32_t F) |
| : Parent(P), Callees(A), CallCount(CC), CumulativeLocalTime(CLT), |
| FId(F) {} |
| |
| // TODO: Include the compact histogram. |
| }; |
| |
| private: |
| struct ShadowStackEntry { |
| uint64_t EntryTSC; |
| Node *NodePtr; |
| |
| // We add a constructor here to allow us to inplace-construct through |
| // Array<...>'s AppendEmplace. |
| ShadowStackEntry(uint64_t T, Node *N) : EntryTSC{T}, NodePtr{N} {} |
| }; |
| |
| using NodeArray = Array<Node>; |
| using RootArray = Array<Node *>; |
| using ShadowStackArray = Array<ShadowStackEntry>; |
| |
| public: |
| // We collate the allocators we need into a single struct, as a convenience to |
| // allow us to initialize these as a group. |
| struct Allocators { |
| using NodeAllocatorType = NodeArray::AllocatorType; |
| using RootAllocatorType = RootArray::AllocatorType; |
| using ShadowStackAllocatorType = ShadowStackArray::AllocatorType; |
| |
| NodeAllocatorType *NodeAllocator = nullptr; |
| RootAllocatorType *RootAllocator = nullptr; |
| ShadowStackAllocatorType *ShadowStackAllocator = nullptr; |
| NodeIdPairAllocatorType *NodeIdPairAllocator = nullptr; |
| |
| Allocators() {} |
| Allocators(const Allocators &) = delete; |
| Allocators &operator=(const Allocators &) = delete; |
| |
| Allocators(Allocators &&O) |
| : NodeAllocator(O.NodeAllocator), RootAllocator(O.RootAllocator), |
| ShadowStackAllocator(O.ShadowStackAllocator), |
| NodeIdPairAllocator(O.NodeIdPairAllocator) { |
| O.NodeAllocator = nullptr; |
| O.RootAllocator = nullptr; |
| O.ShadowStackAllocator = nullptr; |
| O.NodeIdPairAllocator = nullptr; |
| } |
| |
| Allocators &operator=(Allocators &&O) { |
| { |
| auto Tmp = O.NodeAllocator; |
| O.NodeAllocator = this->NodeAllocator; |
| this->NodeAllocator = Tmp; |
| } |
| { |
| auto Tmp = O.RootAllocator; |
| O.RootAllocator = this->RootAllocator; |
| this->RootAllocator = Tmp; |
| } |
| { |
| auto Tmp = O.ShadowStackAllocator; |
| O.ShadowStackAllocator = this->ShadowStackAllocator; |
| this->ShadowStackAllocator = Tmp; |
| } |
| { |
| auto Tmp = O.NodeIdPairAllocator; |
| O.NodeIdPairAllocator = this->NodeIdPairAllocator; |
| this->NodeIdPairAllocator = Tmp; |
| } |
| return *this; |
| } |
| |
| ~Allocators() { |
| // Note that we cannot use delete on these pointers, as they need to be |
| // returned to the sanitizer_common library's internal memory tracking |
| // system. |
| if (NodeAllocator != nullptr) { |
| NodeAllocator->~NodeAllocatorType(); |
| InternalFree(NodeAllocator); |
| NodeAllocator = nullptr; |
| } |
| if (RootAllocator != nullptr) { |
| RootAllocator->~RootAllocatorType(); |
| InternalFree(RootAllocator); |
| RootAllocator = nullptr; |
| } |
| if (ShadowStackAllocator != nullptr) { |
| ShadowStackAllocator->~ShadowStackAllocatorType(); |
| InternalFree(ShadowStackAllocator); |
| ShadowStackAllocator = nullptr; |
| } |
| if (NodeIdPairAllocator != nullptr) { |
| NodeIdPairAllocator->~NodeIdPairAllocatorType(); |
| InternalFree(NodeIdPairAllocator); |
| NodeIdPairAllocator = nullptr; |
| } |
| } |
| }; |
| |
| // TODO: Support configuration of options through the arguments. |
| static Allocators InitAllocators() { |
| return InitAllocatorsCustom(profilingFlags()->per_thread_allocator_max); |
| } |
| |
| static Allocators InitAllocatorsCustom(uptr Max) { |
| Allocators A; |
| auto NodeAllocator = reinterpret_cast<Allocators::NodeAllocatorType *>( |
| InternalAlloc(sizeof(Allocators::NodeAllocatorType))); |
| new (NodeAllocator) Allocators::NodeAllocatorType(Max); |
| A.NodeAllocator = NodeAllocator; |
| |
| auto RootAllocator = reinterpret_cast<Allocators::RootAllocatorType *>( |
| InternalAlloc(sizeof(Allocators::RootAllocatorType))); |
| new (RootAllocator) Allocators::RootAllocatorType(Max); |
| A.RootAllocator = RootAllocator; |
| |
| auto ShadowStackAllocator = |
| reinterpret_cast<Allocators::ShadowStackAllocatorType *>( |
| InternalAlloc(sizeof(Allocators::ShadowStackAllocatorType))); |
| new (ShadowStackAllocator) Allocators::ShadowStackAllocatorType(Max); |
| A.ShadowStackAllocator = ShadowStackAllocator; |
| |
| auto NodeIdPairAllocator = reinterpret_cast<NodeIdPairAllocatorType *>( |
| InternalAlloc(sizeof(NodeIdPairAllocatorType))); |
| new (NodeIdPairAllocator) NodeIdPairAllocatorType(Max); |
| A.NodeIdPairAllocator = NodeIdPairAllocator; |
| return A; |
| } |
| |
| private: |
| NodeArray Nodes; |
| RootArray Roots; |
| ShadowStackArray ShadowStack; |
| NodeIdPairAllocatorType *NodeIdPairAllocator = nullptr; |
| |
| public: |
| explicit FunctionCallTrie(const Allocators &A) |
| : Nodes(*A.NodeAllocator), Roots(*A.RootAllocator), |
| ShadowStack(*A.ShadowStackAllocator), |
| NodeIdPairAllocator(A.NodeIdPairAllocator) {} |
| |
| void enterFunction(const int32_t FId, uint64_t TSC) { |
| DCHECK_NE(FId, 0); |
| // This function primarily deals with ensuring that the ShadowStack is |
| // consistent and ready for when an exit event is encountered. |
| if (UNLIKELY(ShadowStack.empty())) { |
| auto NewRoot = |
| Nodes.AppendEmplace(nullptr, *NodeIdPairAllocator, 0, 0, FId); |
| if (UNLIKELY(NewRoot == nullptr)) |
| return; |
| Roots.Append(NewRoot); |
| ShadowStack.AppendEmplace(TSC, NewRoot); |
| return; |
| } |
| |
| auto &Top = ShadowStack.back(); |
| auto TopNode = Top.NodePtr; |
| DCHECK_NE(TopNode, nullptr); |
| |
| // If we've seen this callee before, then we just access that node and place |
| // that on the top of the stack. |
| auto Callee = TopNode->Callees.find_element( |
| [FId](const NodeIdPair &NR) { return NR.FId == FId; }); |
| if (Callee != nullptr) { |
| CHECK_NE(Callee->NodePtr, nullptr); |
| ShadowStack.AppendEmplace(TSC, Callee->NodePtr); |
| return; |
| } |
| |
| // This means we've never seen this stack before, create a new node here. |
| auto NewNode = |
| Nodes.AppendEmplace(TopNode, *NodeIdPairAllocator, 0, 0, FId); |
| if (UNLIKELY(NewNode == nullptr)) |
| return; |
| DCHECK_NE(NewNode, nullptr); |
| TopNode->Callees.AppendEmplace(NewNode, FId); |
| ShadowStack.AppendEmplace(TSC, NewNode); |
| DCHECK_NE(ShadowStack.back().NodePtr, nullptr); |
| return; |
| } |
| |
| void exitFunction(int32_t FId, uint64_t TSC) { |
| // When we exit a function, we look up the ShadowStack to see whether we've |
| // entered this function before. We do as little processing here as we can, |
| // since most of the hard work would have already been done at function |
| // entry. |
| uint64_t CumulativeTreeTime = 0; |
| while (!ShadowStack.empty()) { |
| const auto &Top = ShadowStack.back(); |
| auto TopNode = Top.NodePtr; |
| DCHECK_NE(TopNode, nullptr); |
| auto LocalTime = TSC - Top.EntryTSC; |
| TopNode->CallCount++; |
| TopNode->CumulativeLocalTime += LocalTime - CumulativeTreeTime; |
| CumulativeTreeTime += LocalTime; |
| ShadowStack.trim(1); |
| |
| // TODO: Update the histogram for the node. |
| if (TopNode->FId == FId) |
| break; |
| } |
| } |
| |
| const RootArray &getRoots() const { return Roots; } |
| |
| // The deepCopyInto operation will update the provided FunctionCallTrie by |
| // re-creating the contents of this particular FunctionCallTrie in the other |
| // FunctionCallTrie. It will do this using a Depth First Traversal from the |
| // roots, and while doing so recreating the traversal in the provided |
| // FunctionCallTrie. |
| // |
| // This operation will *not* destroy the state in `O`, and thus may cause some |
| // duplicate entries in `O` if it is not empty. |
| // |
| // This function is *not* thread-safe, and may require external |
| // synchronisation of both "this" and |O|. |
| // |
| // This function must *not* be called with a non-empty FunctionCallTrie |O|. |
| void deepCopyInto(FunctionCallTrie &O) const { |
| DCHECK(O.getRoots().empty()); |
| |
| // We then push the root into a stack, to use as the parent marker for new |
| // nodes we push in as we're traversing depth-first down the call tree. |
| struct NodeAndParent { |
| FunctionCallTrie::Node *Node; |
| FunctionCallTrie::Node *NewNode; |
| }; |
| using Stack = Array<NodeAndParent>; |
| |
| typename Stack::AllocatorType StackAllocator( |
| profilingFlags()->stack_allocator_max); |
| Stack DFSStack(StackAllocator); |
| |
| for (const auto Root : getRoots()) { |
| // Add a node in O for this root. |
| auto NewRoot = O.Nodes.AppendEmplace( |
| nullptr, *O.NodeIdPairAllocator, Root->CallCount, |
| Root->CumulativeLocalTime, Root->FId); |
| |
| // Because we cannot allocate more memory we should bail out right away. |
| if (UNLIKELY(NewRoot == nullptr)) |
| return; |
| |
| O.Roots.Append(NewRoot); |
| |
| // TODO: Figure out what to do if we fail to allocate any more stack |
| // space. Maybe warn or report once? |
| DFSStack.AppendEmplace(Root, NewRoot); |
| while (!DFSStack.empty()) { |
| NodeAndParent NP = DFSStack.back(); |
| DCHECK_NE(NP.Node, nullptr); |
| DCHECK_NE(NP.NewNode, nullptr); |
| DFSStack.trim(1); |
| for (const auto Callee : NP.Node->Callees) { |
| auto NewNode = O.Nodes.AppendEmplace( |
| NP.NewNode, *O.NodeIdPairAllocator, Callee.NodePtr->CallCount, |
| Callee.NodePtr->CumulativeLocalTime, Callee.FId); |
| if (UNLIKELY(NewNode == nullptr)) |
| return; |
| NP.NewNode->Callees.AppendEmplace(NewNode, Callee.FId); |
| DFSStack.AppendEmplace(Callee.NodePtr, NewNode); |
| } |
| } |
| } |
| } |
| |
| // The mergeInto operation will update the provided FunctionCallTrie by |
| // traversing the current trie's roots and updating (i.e. merging) the data in |
| // the nodes with the data in the target's nodes. If the node doesn't exist in |
| // the provided trie, we add a new one in the right position, and inherit the |
| // data from the original (current) trie, along with all its callees. |
| // |
| // This function is *not* thread-safe, and may require external |
| // synchronisation of both "this" and |O|. |
| void mergeInto(FunctionCallTrie &O) const { |
| struct NodeAndTarget { |
| FunctionCallTrie::Node *OrigNode; |
| FunctionCallTrie::Node *TargetNode; |
| }; |
| using Stack = Array<NodeAndTarget>; |
| typename Stack::AllocatorType StackAllocator( |
| profilingFlags()->stack_allocator_max); |
| Stack DFSStack(StackAllocator); |
| |
| for (const auto Root : getRoots()) { |
| Node *TargetRoot = nullptr; |
| auto R = O.Roots.find_element( |
| [&](const Node *Node) { return Node->FId == Root->FId; }); |
| if (R == nullptr) { |
| TargetRoot = O.Nodes.AppendEmplace(nullptr, *O.NodeIdPairAllocator, 0, |
| 0, Root->FId); |
| if (UNLIKELY(TargetRoot == nullptr)) |
| return; |
| |
| O.Roots.Append(TargetRoot); |
| } else { |
| TargetRoot = *R; |
| } |
| |
| DFSStack.Append(NodeAndTarget{Root, TargetRoot}); |
| while (!DFSStack.empty()) { |
| NodeAndTarget NT = DFSStack.back(); |
| DCHECK_NE(NT.OrigNode, nullptr); |
| DCHECK_NE(NT.TargetNode, nullptr); |
| DFSStack.trim(1); |
| // TODO: Update the histogram as well when we have it ready. |
| NT.TargetNode->CallCount += NT.OrigNode->CallCount; |
| NT.TargetNode->CumulativeLocalTime += NT.OrigNode->CumulativeLocalTime; |
| for (const auto Callee : NT.OrigNode->Callees) { |
| auto TargetCallee = NT.TargetNode->Callees.find_element( |
| [&](const FunctionCallTrie::NodeIdPair &C) { |
| return C.FId == Callee.FId; |
| }); |
| if (TargetCallee == nullptr) { |
| auto NewTargetNode = O.Nodes.AppendEmplace( |
| NT.TargetNode, *O.NodeIdPairAllocator, 0, 0, Callee.FId); |
| |
| if (UNLIKELY(NewTargetNode == nullptr)) |
| return; |
| |
| TargetCallee = |
| NT.TargetNode->Callees.AppendEmplace(NewTargetNode, Callee.FId); |
| } |
| DFSStack.AppendEmplace(Callee.NodePtr, TargetCallee->NodePtr); |
| } |
| } |
| } |
| } |
| }; |
| |
| } // namespace __xray |
| |
| #endif // XRAY_FUNCTION_CALL_TRIE_H |