369 lines
13 KiB
C++
369 lines
13 KiB
C++
|
//===-- BlockCoverageInference.cpp - Minimal Execution Coverage -*- C++ -*-===//
|
|||
|
//
|
|||
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|||
|
// See https://llvm.org/LICENSE.txt for license information.
|
|||
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|||
|
//
|
|||
|
//===----------------------------------------------------------------------===//
|
|||
|
//
|
|||
|
// Our algorithm works by first identifying a subset of nodes that must always
|
|||
|
// be instrumented. We call these nodes ambiguous because knowing the coverage
|
|||
|
// of all remaining nodes is not enough to infer their coverage status.
|
|||
|
//
|
|||
|
// In general a node v is ambiguous if there exists two entry-to-terminal paths
|
|||
|
// P_1 and P_2 such that:
|
|||
|
// 1. v not in P_1 but P_1 visits a predecessor of v, and
|
|||
|
// 2. v not in P_2 but P_2 visits a successor of v.
|
|||
|
//
|
|||
|
// If a node v is not ambiguous, then if condition 1 fails, we can infer v’s
|
|||
|
// coverage from the coverage of its predecessors, or if condition 2 fails, we
|
|||
|
// can infer v’s coverage from the coverage of its successors.
|
|||
|
//
|
|||
|
// Sadly, there are example CFGs where it is not possible to infer all nodes
|
|||
|
// from the ambiguous nodes alone. Our algorithm selects a minimum number of
|
|||
|
// extra nodes to add to the ambiguous nodes to form a valid instrumentation S.
|
|||
|
//
|
|||
|
// Details on this algorithm can be found in https://arxiv.org/abs/2208.13907
|
|||
|
//
|
|||
|
//===----------------------------------------------------------------------===//
|
|||
|
|
|||
|
#include "llvm/Transforms/Instrumentation/BlockCoverageInference.h"
|
|||
|
#include "llvm/ADT/DepthFirstIterator.h"
|
|||
|
#include "llvm/ADT/Statistic.h"
|
|||
|
#include "llvm/Support/CRC.h"
|
|||
|
#include "llvm/Support/Debug.h"
|
|||
|
#include "llvm/Support/GraphWriter.h"
|
|||
|
#include "llvm/Support/raw_ostream.h"
|
|||
|
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
|
|||
|
|
|||
|
using namespace llvm;
|
|||
|
|
|||
|
#define DEBUG_TYPE "pgo-block-coverage"
|
|||
|
|
|||
|
STATISTIC(NumFunctions, "Number of total functions that BCI has processed");
|
|||
|
STATISTIC(NumIneligibleFunctions,
|
|||
|
"Number of functions for which BCI cannot run on");
|
|||
|
STATISTIC(NumBlocks, "Number of total basic blocks that BCI has processed");
|
|||
|
STATISTIC(NumInstrumentedBlocks,
|
|||
|
"Number of basic blocks instrumented for coverage");
|
|||
|
|
|||
|
BlockCoverageInference::BlockCoverageInference(const Function &F,
|
|||
|
bool ForceInstrumentEntry)
|
|||
|
: F(F), ForceInstrumentEntry(ForceInstrumentEntry) {
|
|||
|
findDependencies();
|
|||
|
assert(!ForceInstrumentEntry || shouldInstrumentBlock(F.getEntryBlock()));
|
|||
|
|
|||
|
++NumFunctions;
|
|||
|
for (auto &BB : F) {
|
|||
|
++NumBlocks;
|
|||
|
if (shouldInstrumentBlock(BB))
|
|||
|
++NumInstrumentedBlocks;
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
BlockCoverageInference::BlockSet
|
|||
|
BlockCoverageInference::getDependencies(const BasicBlock &BB) const {
|
|||
|
assert(BB.getParent() == &F);
|
|||
|
BlockSet Dependencies;
|
|||
|
auto It = PredecessorDependencies.find(&BB);
|
|||
|
if (It != PredecessorDependencies.end())
|
|||
|
Dependencies.set_union(It->second);
|
|||
|
It = SuccessorDependencies.find(&BB);
|
|||
|
if (It != SuccessorDependencies.end())
|
|||
|
Dependencies.set_union(It->second);
|
|||
|
return Dependencies;
|
|||
|
}
|
|||
|
|
|||
|
uint64_t BlockCoverageInference::getInstrumentedBlocksHash() const {
|
|||
|
JamCRC JC;
|
|||
|
uint64_t Index = 0;
|
|||
|
for (auto &BB : F) {
|
|||
|
if (shouldInstrumentBlock(BB)) {
|
|||
|
uint8_t Data[8];
|
|||
|
support::endian::write64le(Data, Index);
|
|||
|
JC.update(Data);
|
|||
|
}
|
|||
|
Index++;
|
|||
|
}
|
|||
|
return JC.getCRC();
|
|||
|
}
|
|||
|
|
|||
|
bool BlockCoverageInference::shouldInstrumentBlock(const BasicBlock &BB) const {
|
|||
|
assert(BB.getParent() == &F);
|
|||
|
auto It = PredecessorDependencies.find(&BB);
|
|||
|
if (It != PredecessorDependencies.end() && It->second.size())
|
|||
|
return false;
|
|||
|
It = SuccessorDependencies.find(&BB);
|
|||
|
if (It != SuccessorDependencies.end() && It->second.size())
|
|||
|
return false;
|
|||
|
return true;
|
|||
|
}
|
|||
|
|
|||
|
void BlockCoverageInference::findDependencies() {
|
|||
|
assert(PredecessorDependencies.empty() && SuccessorDependencies.empty());
|
|||
|
// Empirical analysis shows that this algorithm finishes within 5 seconds for
|
|||
|
// functions with fewer than 1.5K blocks.
|
|||
|
if (F.hasFnAttribute(Attribute::NoReturn) || F.size() > 1500) {
|
|||
|
++NumIneligibleFunctions;
|
|||
|
return;
|
|||
|
}
|
|||
|
|
|||
|
SmallVector<const BasicBlock *, 4> TerminalBlocks;
|
|||
|
for (auto &BB : F)
|
|||
|
if (succ_empty(&BB))
|
|||
|
TerminalBlocks.push_back(&BB);
|
|||
|
|
|||
|
// Traverse the CFG backwards from the terminal blocks to make sure every
|
|||
|
// block can reach some terminal block. Otherwise this algorithm will not work
|
|||
|
// and we must fall back to instrumenting every block.
|
|||
|
df_iterator_default_set<const BasicBlock *> Visited;
|
|||
|
for (auto *BB : TerminalBlocks)
|
|||
|
for (auto *N : inverse_depth_first_ext(BB, Visited))
|
|||
|
(void)N;
|
|||
|
if (F.size() != Visited.size()) {
|
|||
|
++NumIneligibleFunctions;
|
|||
|
return;
|
|||
|
}
|
|||
|
|
|||
|
// The current implementation for computing `PredecessorDependencies` and
|
|||
|
// `SuccessorDependencies` runs in quadratic time with respect to the number
|
|||
|
// of basic blocks. While we do have a more complicated linear time algorithm
|
|||
|
// in https://arxiv.org/abs/2208.13907 we do not know if it will give a
|
|||
|
// significant speedup in practice given that most functions tend to be
|
|||
|
// relatively small in size for intended use cases.
|
|||
|
auto &EntryBlock = F.getEntryBlock();
|
|||
|
for (auto &BB : F) {
|
|||
|
// The set of blocks that are reachable while avoiding BB.
|
|||
|
BlockSet ReachableFromEntry, ReachableFromTerminal;
|
|||
|
getReachableAvoiding(EntryBlock, BB, /*IsForward=*/true,
|
|||
|
ReachableFromEntry);
|
|||
|
for (auto *TerminalBlock : TerminalBlocks)
|
|||
|
getReachableAvoiding(*TerminalBlock, BB, /*IsForward=*/false,
|
|||
|
ReachableFromTerminal);
|
|||
|
|
|||
|
auto Preds = predecessors(&BB);
|
|||
|
bool HasSuperReachablePred = llvm::any_of(Preds, [&](auto *Pred) {
|
|||
|
return ReachableFromEntry.count(Pred) &&
|
|||
|
ReachableFromTerminal.count(Pred);
|
|||
|
});
|
|||
|
if (!HasSuperReachablePred)
|
|||
|
for (auto *Pred : Preds)
|
|||
|
if (ReachableFromEntry.count(Pred))
|
|||
|
PredecessorDependencies[&BB].insert(Pred);
|
|||
|
|
|||
|
auto Succs = successors(&BB);
|
|||
|
bool HasSuperReachableSucc = llvm::any_of(Succs, [&](auto *Succ) {
|
|||
|
return ReachableFromEntry.count(Succ) &&
|
|||
|
ReachableFromTerminal.count(Succ);
|
|||
|
});
|
|||
|
if (!HasSuperReachableSucc)
|
|||
|
for (auto *Succ : Succs)
|
|||
|
if (ReachableFromTerminal.count(Succ))
|
|||
|
SuccessorDependencies[&BB].insert(Succ);
|
|||
|
}
|
|||
|
|
|||
|
if (ForceInstrumentEntry) {
|
|||
|
// Force the entry block to be instrumented by clearing the blocks it can
|
|||
|
// infer coverage from.
|
|||
|
PredecessorDependencies[&EntryBlock].clear();
|
|||
|
SuccessorDependencies[&EntryBlock].clear();
|
|||
|
}
|
|||
|
|
|||
|
// Construct a graph where blocks are connected if there is a mutual
|
|||
|
// dependency between them. This graph has a special property that it contains
|
|||
|
// only paths.
|
|||
|
DenseMap<const BasicBlock *, BlockSet> AdjacencyList;
|
|||
|
for (auto &BB : F) {
|
|||
|
for (auto *Succ : successors(&BB)) {
|
|||
|
if (SuccessorDependencies[&BB].count(Succ) &&
|
|||
|
PredecessorDependencies[Succ].count(&BB)) {
|
|||
|
AdjacencyList[&BB].insert(Succ);
|
|||
|
AdjacencyList[Succ].insert(&BB);
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
// Given a path with at least one node, return the next node on the path.
|
|||
|
auto getNextOnPath = [&](BlockSet &Path) -> const BasicBlock * {
|
|||
|
assert(Path.size());
|
|||
|
auto &Neighbors = AdjacencyList[Path.back()];
|
|||
|
if (Path.size() == 1) {
|
|||
|
// This is the first node on the path, return its neighbor.
|
|||
|
assert(Neighbors.size() == 1);
|
|||
|
return Neighbors.front();
|
|||
|
} else if (Neighbors.size() == 2) {
|
|||
|
// This is the middle of the path, find the neighbor that is not on the
|
|||
|
// path already.
|
|||
|
assert(Path.size() >= 2);
|
|||
|
return Path.count(Neighbors[0]) ? Neighbors[1] : Neighbors[0];
|
|||
|
}
|
|||
|
// This is the end of the path.
|
|||
|
assert(Neighbors.size() == 1);
|
|||
|
return nullptr;
|
|||
|
};
|
|||
|
|
|||
|
// Remove all cycles in the inferencing graph.
|
|||
|
for (auto &BB : F) {
|
|||
|
if (AdjacencyList[&BB].size() == 1) {
|
|||
|
// We found the head of some path.
|
|||
|
BlockSet Path;
|
|||
|
Path.insert(&BB);
|
|||
|
while (const BasicBlock *Next = getNextOnPath(Path))
|
|||
|
Path.insert(Next);
|
|||
|
LLVM_DEBUG(dbgs() << "Found path: " << getBlockNames(Path) << "\n");
|
|||
|
|
|||
|
// Remove these nodes from the graph so we don't discover this path again.
|
|||
|
for (auto *BB : Path)
|
|||
|
AdjacencyList[BB].clear();
|
|||
|
|
|||
|
// Finally, remove the cycles.
|
|||
|
if (PredecessorDependencies[Path.front()].size()) {
|
|||
|
for (auto *BB : Path)
|
|||
|
if (BB != Path.back())
|
|||
|
SuccessorDependencies[BB].clear();
|
|||
|
} else {
|
|||
|
for (auto *BB : Path)
|
|||
|
if (BB != Path.front())
|
|||
|
PredecessorDependencies[BB].clear();
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
LLVM_DEBUG(dump(dbgs()));
|
|||
|
}
|
|||
|
|
|||
|
void BlockCoverageInference::getReachableAvoiding(const BasicBlock &Start,
|
|||
|
const BasicBlock &Avoid,
|
|||
|
bool IsForward,
|
|||
|
BlockSet &Reachable) const {
|
|||
|
df_iterator_default_set<const BasicBlock *> Visited;
|
|||
|
Visited.insert(&Avoid);
|
|||
|
if (IsForward) {
|
|||
|
auto Range = depth_first_ext(&Start, Visited);
|
|||
|
Reachable.insert(Range.begin(), Range.end());
|
|||
|
} else {
|
|||
|
auto Range = inverse_depth_first_ext(&Start, Visited);
|
|||
|
Reachable.insert(Range.begin(), Range.end());
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
namespace llvm {
|
|||
|
class DotFuncBCIInfo {
|
|||
|
private:
|
|||
|
const BlockCoverageInference *BCI;
|
|||
|
const DenseMap<const BasicBlock *, bool> *Coverage;
|
|||
|
|
|||
|
public:
|
|||
|
DotFuncBCIInfo(const BlockCoverageInference *BCI,
|
|||
|
const DenseMap<const BasicBlock *, bool> *Coverage)
|
|||
|
: BCI(BCI), Coverage(Coverage) {}
|
|||
|
|
|||
|
const Function &getFunction() { return BCI->F; }
|
|||
|
|
|||
|
bool isInstrumented(const BasicBlock *BB) const {
|
|||
|
return BCI->shouldInstrumentBlock(*BB);
|
|||
|
}
|
|||
|
|
|||
|
bool isCovered(const BasicBlock *BB) const {
|
|||
|
return Coverage && Coverage->lookup(BB);
|
|||
|
}
|
|||
|
|
|||
|
bool isDependent(const BasicBlock *Src, const BasicBlock *Dest) const {
|
|||
|
return BCI->getDependencies(*Src).count(Dest);
|
|||
|
}
|
|||
|
};
|
|||
|
|
|||
|
template <>
|
|||
|
struct GraphTraits<DotFuncBCIInfo *> : public GraphTraits<const BasicBlock *> {
|
|||
|
static NodeRef getEntryNode(DotFuncBCIInfo *Info) {
|
|||
|
return &(Info->getFunction().getEntryBlock());
|
|||
|
}
|
|||
|
|
|||
|
// nodes_iterator/begin/end - Allow iteration over all nodes in the graph
|
|||
|
using nodes_iterator = pointer_iterator<Function::const_iterator>;
|
|||
|
|
|||
|
static nodes_iterator nodes_begin(DotFuncBCIInfo *Info) {
|
|||
|
return nodes_iterator(Info->getFunction().begin());
|
|||
|
}
|
|||
|
|
|||
|
static nodes_iterator nodes_end(DotFuncBCIInfo *Info) {
|
|||
|
return nodes_iterator(Info->getFunction().end());
|
|||
|
}
|
|||
|
|
|||
|
static size_t size(DotFuncBCIInfo *Info) {
|
|||
|
return Info->getFunction().size();
|
|||
|
}
|
|||
|
};
|
|||
|
|
|||
|
template <>
|
|||
|
struct DOTGraphTraits<DotFuncBCIInfo *> : public DefaultDOTGraphTraits {
|
|||
|
|
|||
|
DOTGraphTraits(bool IsSimple = false) : DefaultDOTGraphTraits(IsSimple) {}
|
|||
|
|
|||
|
static std::string getGraphName(DotFuncBCIInfo *Info) {
|
|||
|
return "BCI CFG for " + Info->getFunction().getName().str();
|
|||
|
}
|
|||
|
|
|||
|
std::string getNodeLabel(const BasicBlock *Node, DotFuncBCIInfo *Info) {
|
|||
|
return Node->getName().str();
|
|||
|
}
|
|||
|
|
|||
|
std::string getEdgeAttributes(const BasicBlock *Src, const_succ_iterator I,
|
|||
|
DotFuncBCIInfo *Info) {
|
|||
|
const BasicBlock *Dest = *I;
|
|||
|
if (Info->isDependent(Src, Dest))
|
|||
|
return "color=red";
|
|||
|
if (Info->isDependent(Dest, Src))
|
|||
|
return "color=blue";
|
|||
|
return "";
|
|||
|
}
|
|||
|
|
|||
|
std::string getNodeAttributes(const BasicBlock *Node, DotFuncBCIInfo *Info) {
|
|||
|
std::string Result;
|
|||
|
if (Info->isInstrumented(Node))
|
|||
|
Result += "style=filled,fillcolor=gray";
|
|||
|
if (Info->isCovered(Node))
|
|||
|
Result += std::string(Result.empty() ? "" : ",") + "color=red";
|
|||
|
return Result;
|
|||
|
}
|
|||
|
};
|
|||
|
|
|||
|
} // namespace llvm
|
|||
|
|
|||
|
void BlockCoverageInference::viewBlockCoverageGraph(
|
|||
|
const DenseMap<const BasicBlock *, bool> *Coverage) const {
|
|||
|
DotFuncBCIInfo Info(this, Coverage);
|
|||
|
WriteGraph(&Info, "BCI", false,
|
|||
|
"Block Coverage Inference for " + F.getName());
|
|||
|
}
|
|||
|
|
|||
|
void BlockCoverageInference::dump(raw_ostream &OS) const {
|
|||
|
OS << "Minimal block coverage for function \'" << F.getName()
|
|||
|
<< "\' (Instrumented=*)\n";
|
|||
|
for (auto &BB : F) {
|
|||
|
OS << (shouldInstrumentBlock(BB) ? "* " : " ") << BB.getName() << "\n";
|
|||
|
auto It = PredecessorDependencies.find(&BB);
|
|||
|
if (It != PredecessorDependencies.end() && It->second.size())
|
|||
|
OS << " PredDeps = " << getBlockNames(It->second) << "\n";
|
|||
|
It = SuccessorDependencies.find(&BB);
|
|||
|
if (It != SuccessorDependencies.end() && It->second.size())
|
|||
|
OS << " SuccDeps = " << getBlockNames(It->second) << "\n";
|
|||
|
}
|
|||
|
OS << " Instrumented Blocks Hash = 0x"
|
|||
|
<< Twine::utohexstr(getInstrumentedBlocksHash()) << "\n";
|
|||
|
}
|
|||
|
|
|||
|
std::string
|
|||
|
BlockCoverageInference::getBlockNames(ArrayRef<const BasicBlock *> BBs) {
|
|||
|
std::string Result;
|
|||
|
raw_string_ostream OS(Result);
|
|||
|
OS << "[";
|
|||
|
if (!BBs.empty()) {
|
|||
|
OS << BBs.front()->getName();
|
|||
|
BBs = BBs.drop_front();
|
|||
|
}
|
|||
|
for (auto *BB : BBs)
|
|||
|
OS << ", " << BB->getName();
|
|||
|
OS << "]";
|
|||
|
return OS.str();
|
|||
|
}
|