Files
clang-p2996/compiler-rt/lib/xray/tests/unit/function_call_trie_test.cc
Dean Michael Berris 980d93d0e0 [XRay][profiler] Part 2: XRay Function Call Trie
Summary:
This is part of the larger XRay Profiling Mode effort.

This patch implements a central data structure for capturing statistics
about XRay instrumented function call stacks. The `FunctionCallTrie`
type does the following things:

*  It keeps track of a shadow function call stack of XRay instrumented
   functions as they are entered (function enter event) and as they are
   exited (function exit event).

*  When a function is entered, the shadow stack contains information
   about the entry TSC, and updates the trie (or prefix tree)
   representing the current function call stack. If we haven't
   encountered this function call before, this creates a unique node for
   the function in this position on the stack. We update the list of
   callees of the parent function as well to reflect this newly found
   path.

*  When a function is exited, we compute statistics (TSC deltas,
   function call count frequency) for the associated function(s) up the
   stack as we unwind to find the matching entry event.

This builds upon the XRay `Allocator` and `Array` types in Part 1 of
this series of patches.

Depends on D45756.

Reviewers: echristo, pelikan, kpw

Reviewed By: kpw

Subscribers: llvm-commits, mgorny

Differential Revision: https://reviews.llvm.org/D45757

llvm-svn: 332313
2018-05-15 00:42:36 +00:00

254 lines
7.4 KiB
C++

//===-- function_call_trie_test.cc ----------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file is a part of XRay, a function call tracing system.
//
//===----------------------------------------------------------------------===//
#include "gtest/gtest.h"
#include "xray_function_call_trie.h"
namespace __xray {
namespace {
TEST(FunctionCallTrieTest, Construction) {
// We want to make sure that we can create one of these without the set of
// allocators we need. This will by default use the global allocators.
FunctionCallTrie Trie;
}
TEST(FunctionCallTrieTest, ConstructWithTLSAllocators) {
// FIXME: Support passing in configuration for allocators in the allocator
// constructors.
profilerFlags()->setDefaults();
FunctionCallTrie::Allocators Allocators = FunctionCallTrie::InitAllocators();
FunctionCallTrie Trie(Allocators);
}
TEST(FunctionCallTrieTest, EnterAndExitFunction) {
profilerFlags()->setDefaults();
auto A = FunctionCallTrie::InitAllocators();
FunctionCallTrie Trie(A);
Trie.enterFunction(1, 1);
Trie.exitFunction(1, 2);
// We need a way to pull the data out. At this point, until we get a data
// collection service implemented, we're going to export the data as a list of
// roots, and manually walk through the structure ourselves.
const auto &R = Trie.getRoots();
ASSERT_EQ(R.size(), 1u);
ASSERT_EQ(R.front()->FId, 1);
ASSERT_EQ(R.front()->CallCount, 1);
ASSERT_EQ(R.front()->CumulativeLocalTime, 1u);
}
TEST(FunctionCallTrieTest, MissingFunctionEntry) {
auto A = FunctionCallTrie::InitAllocators();
FunctionCallTrie Trie(A);
Trie.exitFunction(1, 1);
const auto &R = Trie.getRoots();
ASSERT_TRUE(R.empty());
}
TEST(FunctionCallTrieTest, MissingFunctionExit) {
auto A = FunctionCallTrie::InitAllocators();
FunctionCallTrie Trie(A);
Trie.enterFunction(1, 1);
const auto &R = Trie.getRoots();
ASSERT_TRUE(R.empty());
}
TEST(FunctionCallTrieTest, MultipleRoots) {
profilerFlags()->setDefaults();
auto A = FunctionCallTrie::InitAllocators();
FunctionCallTrie Trie(A);
// Enter and exit FId = 1.
Trie.enterFunction(1, 1);
Trie.exitFunction(1, 2);
// Enter and exit FId = 2.
Trie.enterFunction(2, 3);
Trie.exitFunction(2, 4);
const auto &R = Trie.getRoots();
ASSERT_FALSE(R.empty());
ASSERT_EQ(R.size(), 2u);
// Make sure the roots have different IDs.
const auto R0 = R[0];
const auto R1 = R[1];
ASSERT_NE(R0->FId, R1->FId);
// Inspect the roots that they have the right data.
ASSERT_NE(R0, nullptr);
EXPECT_EQ(R0->CallCount, 1u);
EXPECT_EQ(R0->CumulativeLocalTime, 1u);
ASSERT_NE(R1, nullptr);
EXPECT_EQ(R1->CallCount, 1u);
EXPECT_EQ(R1->CumulativeLocalTime, 1u);
}
// While missing an intermediary entry may be rare in practice, we still enforce
// that we can handle the case where we've missed the entry event somehow, in
// between call entry/exits. To illustrate, imagine the following shadow call
// stack:
//
// f0@t0 -> f1@t1 -> f2@t2
//
// If for whatever reason we see an exit for `f2` @ t3, followed by an exit for
// `f0` @ t4 (i.e. no `f1` exit in between) then we need to handle the case of
// accounting local time to `f2` from d = (t3 - t2), then local time to `f1`
// as d' = (t3 - t1) - d, and then local time to `f0` as d'' = (t3 - t0) - d'.
TEST(FunctionCallTrieTest, MissingIntermediaryExit) {
profilerFlags()->setDefaults();
auto A = FunctionCallTrie::InitAllocators();
FunctionCallTrie Trie(A);
Trie.enterFunction(1, 0);
Trie.enterFunction(2, 100);
Trie.enterFunction(3, 200);
Trie.exitFunction(3, 300);
Trie.exitFunction(1, 400);
// What we should see at this point is all the functions in the trie in a
// specific order (1 -> 2 -> 3) with the appropriate count(s) and local
// latencies.
const auto &R = Trie.getRoots();
ASSERT_FALSE(R.empty());
ASSERT_EQ(R.size(), 1u);
const auto &F1 = *R[0];
ASSERT_EQ(F1.FId, 1);
ASSERT_FALSE(F1.Callees.empty());
const auto &F2 = *F1.Callees[0].NodePtr;
ASSERT_EQ(F2.FId, 2);
ASSERT_FALSE(F2.Callees.empty());
const auto &F3 = *F2.Callees[0].NodePtr;
ASSERT_EQ(F3.FId, 3);
ASSERT_TRUE(F3.Callees.empty());
// Now that we've established the preconditions, we check for specific aspects
// of the nodes.
EXPECT_EQ(F3.CallCount, 1);
EXPECT_EQ(F2.CallCount, 1);
EXPECT_EQ(F1.CallCount, 1);
EXPECT_EQ(F3.CumulativeLocalTime, 100);
EXPECT_EQ(F2.CumulativeLocalTime, 300);
EXPECT_EQ(F1.CumulativeLocalTime, 100);
}
// TODO: Test that we can handle cross-CPU migrations, where TSCs are not
// guaranteed to be synchronised.
TEST(FunctionCallTrieTest, DeepCopy) {
profilerFlags()->setDefaults();
auto A = FunctionCallTrie::InitAllocators();
FunctionCallTrie Trie(A);
Trie.enterFunction(1, 0);
Trie.enterFunction(2, 1);
Trie.exitFunction(2, 2);
Trie.enterFunction(3, 3);
Trie.exitFunction(3, 4);
Trie.exitFunction(1, 5);
// We want to make a deep copy and compare notes.
auto B = FunctionCallTrie::InitAllocators();
FunctionCallTrie Copy(B);
Trie.deepCopyInto(Copy);
ASSERT_NE(Trie.getRoots().size(), 0u);
ASSERT_EQ(Trie.getRoots().size(), Copy.getRoots().size());
const auto &R0Orig = *Trie.getRoots()[0];
const auto &R0Copy = *Copy.getRoots()[0];
EXPECT_EQ(R0Orig.FId, 1);
EXPECT_EQ(R0Orig.FId, R0Copy.FId);
ASSERT_EQ(R0Orig.Callees.size(), 2u);
ASSERT_EQ(R0Copy.Callees.size(), 2u);
const auto &F1Orig =
*R0Orig.Callees
.find_element(
[](const FunctionCallTrie::NodeIdPair &R) { return R.FId == 2; })
->NodePtr;
const auto &F1Copy =
*R0Copy.Callees
.find_element(
[](const FunctionCallTrie::NodeIdPair &R) { return R.FId == 2; })
->NodePtr;
EXPECT_EQ(&R0Orig, F1Orig.Parent);
EXPECT_EQ(&R0Copy, F1Copy.Parent);
}
TEST(FunctionCallTrieTest, MergeInto) {
profilerFlags()->setDefaults();
auto A = FunctionCallTrie::InitAllocators();
FunctionCallTrie T0(A);
FunctionCallTrie T1(A);
// 1 -> 2 -> 3
T0.enterFunction(1, 0);
T0.enterFunction(2, 1);
T0.enterFunction(3, 2);
T0.exitFunction(3, 3);
T0.exitFunction(2, 4);
T0.exitFunction(1, 5);
// 1 -> 2 -> 3
T1.enterFunction(1, 0);
T1.enterFunction(2, 1);
T1.enterFunction(3, 2);
T1.exitFunction(3, 3);
T1.exitFunction(2, 4);
T1.exitFunction(1, 5);
// We use a different allocator here to make sure that we're able to transfer
// data into a FunctionCallTrie which uses a different allocator. This
// reflects the inteded usage scenario for when we're collecting profiles that
// aggregate across threads.
auto B = FunctionCallTrie::InitAllocators();
FunctionCallTrie Merged(B);
T0.mergeInto(Merged);
T1.mergeInto(Merged);
ASSERT_EQ(Merged.getRoots().size(), 1u);
const auto &R0 = *Merged.getRoots()[0];
EXPECT_EQ(R0.FId, 1);
EXPECT_EQ(R0.CallCount, 2);
EXPECT_EQ(R0.CumulativeLocalTime, 10);
EXPECT_EQ(R0.Callees.size(), 1u);
const auto &F1 = *R0.Callees[0].NodePtr;
EXPECT_EQ(F1.FId, 2);
EXPECT_EQ(F1.CallCount, 2);
EXPECT_EQ(F1.CumulativeLocalTime, 6);
EXPECT_EQ(F1.Callees.size(), 1u);
const auto &F2 = *F1.Callees[0].NodePtr;
EXPECT_EQ(F2.FId, 3);
EXPECT_EQ(F2.CallCount, 2);
EXPECT_EQ(F2.CumulativeLocalTime, 2);
EXPECT_EQ(F2.Callees.size(), 0u);
}
} // namespace
} // namespace __xray