368 lines
13 KiB
C++
368 lines
13 KiB
C++
//===-- BlockCoverageInference.cpp - Minimal Execution Coverage -*- C++ -*-===//
|
||
//
|
||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||
// See https://llvm.org/LICENSE.txt for license information.
|
||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||
//
|
||
//===----------------------------------------------------------------------===//
|
||
//
|
||
// Our algorithm works by first identifying a subset of nodes that must always
|
||
// be instrumented. We call these nodes ambiguous because knowing the coverage
|
||
// of all remaining nodes is not enough to infer their coverage status.
|
||
//
|
||
// In general a node v is ambiguous if there exists two entry-to-terminal paths
|
||
// P_1 and P_2 such that:
|
||
// 1. v not in P_1 but P_1 visits a predecessor of v, and
|
||
// 2. v not in P_2 but P_2 visits a successor of v.
|
||
//
|
||
// If a node v is not ambiguous, then if condition 1 fails, we can infer v’s
|
||
// coverage from the coverage of its predecessors, or if condition 2 fails, we
|
||
// can infer v’s coverage from the coverage of its successors.
|
||
//
|
||
// Sadly, there are example CFGs where it is not possible to infer all nodes
|
||
// from the ambiguous nodes alone. Our algorithm selects a minimum number of
|
||
// extra nodes to add to the ambiguous nodes to form a valid instrumentation S.
|
||
//
|
||
// Details on this algorithm can be found in https://arxiv.org/abs/2208.13907
|
||
//
|
||
//===----------------------------------------------------------------------===//
|
||
|
||
#include "llvm/Transforms/Instrumentation/BlockCoverageInference.h"
|
||
#include "llvm/ADT/DepthFirstIterator.h"
|
||
#include "llvm/ADT/Statistic.h"
|
||
#include "llvm/Support/CRC.h"
|
||
#include "llvm/Support/Debug.h"
|
||
#include "llvm/Support/GraphWriter.h"
|
||
#include "llvm/Support/raw_ostream.h"
|
||
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
|
||
|
||
using namespace llvm;
|
||
|
||
#define DEBUG_TYPE "pgo-block-coverage"
|
||
|
||
STATISTIC(NumFunctions, "Number of total functions that BCI has processed");
|
||
STATISTIC(NumIneligibleFunctions,
|
||
"Number of functions for which BCI cannot run on");
|
||
STATISTIC(NumBlocks, "Number of total basic blocks that BCI has processed");
|
||
STATISTIC(NumInstrumentedBlocks,
|
||
"Number of basic blocks instrumented for coverage");
|
||
|
||
BlockCoverageInference::BlockCoverageInference(const Function &F,
|
||
bool ForceInstrumentEntry)
|
||
: F(F), ForceInstrumentEntry(ForceInstrumentEntry) {
|
||
findDependencies();
|
||
assert(!ForceInstrumentEntry || shouldInstrumentBlock(F.getEntryBlock()));
|
||
|
||
++NumFunctions;
|
||
for (auto &BB : F) {
|
||
++NumBlocks;
|
||
if (shouldInstrumentBlock(BB))
|
||
++NumInstrumentedBlocks;
|
||
}
|
||
}
|
||
|
||
BlockCoverageInference::BlockSet
|
||
BlockCoverageInference::getDependencies(const BasicBlock &BB) const {
|
||
assert(BB.getParent() == &F);
|
||
BlockSet Dependencies;
|
||
auto It = PredecessorDependencies.find(&BB);
|
||
if (It != PredecessorDependencies.end())
|
||
Dependencies.set_union(It->second);
|
||
It = SuccessorDependencies.find(&BB);
|
||
if (It != SuccessorDependencies.end())
|
||
Dependencies.set_union(It->second);
|
||
return Dependencies;
|
||
}
|
||
|
||
uint64_t BlockCoverageInference::getInstrumentedBlocksHash() const {
|
||
JamCRC JC;
|
||
uint64_t Index = 0;
|
||
for (auto &BB : F) {
|
||
if (shouldInstrumentBlock(BB)) {
|
||
uint8_t Data[8];
|
||
support::endian::write64le(Data, Index);
|
||
JC.update(Data);
|
||
}
|
||
Index++;
|
||
}
|
||
return JC.getCRC();
|
||
}
|
||
|
||
bool BlockCoverageInference::shouldInstrumentBlock(const BasicBlock &BB) const {
|
||
assert(BB.getParent() == &F);
|
||
auto It = PredecessorDependencies.find(&BB);
|
||
if (It != PredecessorDependencies.end() && It->second.size())
|
||
return false;
|
||
It = SuccessorDependencies.find(&BB);
|
||
if (It != SuccessorDependencies.end() && It->second.size())
|
||
return false;
|
||
return true;
|
||
}
|
||
|
||
void BlockCoverageInference::findDependencies() {
|
||
assert(PredecessorDependencies.empty() && SuccessorDependencies.empty());
|
||
// Empirical analysis shows that this algorithm finishes within 5 seconds for
|
||
// functions with fewer than 1.5K blocks.
|
||
if (F.hasFnAttribute(Attribute::NoReturn) || F.size() > 1500) {
|
||
++NumIneligibleFunctions;
|
||
return;
|
||
}
|
||
|
||
SmallVector<const BasicBlock *, 4> TerminalBlocks;
|
||
for (auto &BB : F)
|
||
if (succ_empty(&BB))
|
||
TerminalBlocks.push_back(&BB);
|
||
|
||
// Traverse the CFG backwards from the terminal blocks to make sure every
|
||
// block can reach some terminal block. Otherwise this algorithm will not work
|
||
// and we must fall back to instrumenting every block.
|
||
df_iterator_default_set<const BasicBlock *> Visited;
|
||
for (auto *BB : TerminalBlocks)
|
||
for (auto *N : inverse_depth_first_ext(BB, Visited))
|
||
(void)N;
|
||
if (F.size() != Visited.size()) {
|
||
++NumIneligibleFunctions;
|
||
return;
|
||
}
|
||
|
||
// The current implementation for computing `PredecessorDependencies` and
|
||
// `SuccessorDependencies` runs in quadratic time with respect to the number
|
||
// of basic blocks. While we do have a more complicated linear time algorithm
|
||
// in https://arxiv.org/abs/2208.13907 we do not know if it will give a
|
||
// significant speedup in practice given that most functions tend to be
|
||
// relatively small in size for intended use cases.
|
||
auto &EntryBlock = F.getEntryBlock();
|
||
for (auto &BB : F) {
|
||
// The set of blocks that are reachable while avoiding BB.
|
||
BlockSet ReachableFromEntry, ReachableFromTerminal;
|
||
getReachableAvoiding(EntryBlock, BB, /*IsForward=*/true,
|
||
ReachableFromEntry);
|
||
for (auto *TerminalBlock : TerminalBlocks)
|
||
getReachableAvoiding(*TerminalBlock, BB, /*IsForward=*/false,
|
||
ReachableFromTerminal);
|
||
|
||
auto Preds = predecessors(&BB);
|
||
bool HasSuperReachablePred = llvm::any_of(Preds, [&](auto *Pred) {
|
||
return ReachableFromEntry.count(Pred) &&
|
||
ReachableFromTerminal.count(Pred);
|
||
});
|
||
if (!HasSuperReachablePred)
|
||
for (auto *Pred : Preds)
|
||
if (ReachableFromEntry.count(Pred))
|
||
PredecessorDependencies[&BB].insert(Pred);
|
||
|
||
auto Succs = successors(&BB);
|
||
bool HasSuperReachableSucc = llvm::any_of(Succs, [&](auto *Succ) {
|
||
return ReachableFromEntry.count(Succ) &&
|
||
ReachableFromTerminal.count(Succ);
|
||
});
|
||
if (!HasSuperReachableSucc)
|
||
for (auto *Succ : Succs)
|
||
if (ReachableFromTerminal.count(Succ))
|
||
SuccessorDependencies[&BB].insert(Succ);
|
||
}
|
||
|
||
if (ForceInstrumentEntry) {
|
||
// Force the entry block to be instrumented by clearing the blocks it can
|
||
// infer coverage from.
|
||
PredecessorDependencies[&EntryBlock].clear();
|
||
SuccessorDependencies[&EntryBlock].clear();
|
||
}
|
||
|
||
// Construct a graph where blocks are connected if there is a mutual
|
||
// dependency between them. This graph has a special property that it contains
|
||
// only paths.
|
||
DenseMap<const BasicBlock *, BlockSet> AdjacencyList;
|
||
for (auto &BB : F) {
|
||
for (auto *Succ : successors(&BB)) {
|
||
if (SuccessorDependencies[&BB].count(Succ) &&
|
||
PredecessorDependencies[Succ].count(&BB)) {
|
||
AdjacencyList[&BB].insert(Succ);
|
||
AdjacencyList[Succ].insert(&BB);
|
||
}
|
||
}
|
||
}
|
||
|
||
// Given a path with at least one node, return the next node on the path.
|
||
auto getNextOnPath = [&](BlockSet &Path) -> const BasicBlock * {
|
||
assert(Path.size());
|
||
auto &Neighbors = AdjacencyList[Path.back()];
|
||
if (Path.size() == 1) {
|
||
// This is the first node on the path, return its neighbor.
|
||
assert(Neighbors.size() == 1);
|
||
return Neighbors.front();
|
||
} else if (Neighbors.size() == 2) {
|
||
// This is the middle of the path, find the neighbor that is not on the
|
||
// path already.
|
||
assert(Path.size() >= 2);
|
||
return Path.count(Neighbors[0]) ? Neighbors[1] : Neighbors[0];
|
||
}
|
||
// This is the end of the path.
|
||
assert(Neighbors.size() == 1);
|
||
return nullptr;
|
||
};
|
||
|
||
// Remove all cycles in the inferencing graph.
|
||
for (auto &BB : F) {
|
||
if (AdjacencyList[&BB].size() == 1) {
|
||
// We found the head of some path.
|
||
BlockSet Path;
|
||
Path.insert(&BB);
|
||
while (const BasicBlock *Next = getNextOnPath(Path))
|
||
Path.insert(Next);
|
||
LLVM_DEBUG(dbgs() << "Found path: " << getBlockNames(Path) << "\n");
|
||
|
||
// Remove these nodes from the graph so we don't discover this path again.
|
||
for (auto *BB : Path)
|
||
AdjacencyList[BB].clear();
|
||
|
||
// Finally, remove the cycles.
|
||
if (PredecessorDependencies[Path.front()].size()) {
|
||
for (auto *BB : Path)
|
||
if (BB != Path.back())
|
||
SuccessorDependencies[BB].clear();
|
||
} else {
|
||
for (auto *BB : Path)
|
||
if (BB != Path.front())
|
||
PredecessorDependencies[BB].clear();
|
||
}
|
||
}
|
||
}
|
||
LLVM_DEBUG(dump(dbgs()));
|
||
}
|
||
|
||
void BlockCoverageInference::getReachableAvoiding(const BasicBlock &Start,
|
||
const BasicBlock &Avoid,
|
||
bool IsForward,
|
||
BlockSet &Reachable) const {
|
||
df_iterator_default_set<const BasicBlock *> Visited;
|
||
Visited.insert(&Avoid);
|
||
if (IsForward) {
|
||
auto Range = depth_first_ext(&Start, Visited);
|
||
Reachable.insert(Range.begin(), Range.end());
|
||
} else {
|
||
auto Range = inverse_depth_first_ext(&Start, Visited);
|
||
Reachable.insert(Range.begin(), Range.end());
|
||
}
|
||
}
|
||
|
||
namespace llvm {
|
||
class DotFuncBCIInfo {
|
||
private:
|
||
const BlockCoverageInference *BCI;
|
||
const DenseMap<const BasicBlock *, bool> *Coverage;
|
||
|
||
public:
|
||
DotFuncBCIInfo(const BlockCoverageInference *BCI,
|
||
const DenseMap<const BasicBlock *, bool> *Coverage)
|
||
: BCI(BCI), Coverage(Coverage) {}
|
||
|
||
const Function &getFunction() { return BCI->F; }
|
||
|
||
bool isInstrumented(const BasicBlock *BB) const {
|
||
return BCI->shouldInstrumentBlock(*BB);
|
||
}
|
||
|
||
bool isCovered(const BasicBlock *BB) const {
|
||
return Coverage && Coverage->lookup(BB);
|
||
}
|
||
|
||
bool isDependent(const BasicBlock *Src, const BasicBlock *Dest) const {
|
||
return BCI->getDependencies(*Src).count(Dest);
|
||
}
|
||
};
|
||
|
||
template <>
|
||
struct GraphTraits<DotFuncBCIInfo *> : public GraphTraits<const BasicBlock *> {
|
||
static NodeRef getEntryNode(DotFuncBCIInfo *Info) {
|
||
return &(Info->getFunction().getEntryBlock());
|
||
}
|
||
|
||
// nodes_iterator/begin/end - Allow iteration over all nodes in the graph
|
||
using nodes_iterator = pointer_iterator<Function::const_iterator>;
|
||
|
||
static nodes_iterator nodes_begin(DotFuncBCIInfo *Info) {
|
||
return nodes_iterator(Info->getFunction().begin());
|
||
}
|
||
|
||
static nodes_iterator nodes_end(DotFuncBCIInfo *Info) {
|
||
return nodes_iterator(Info->getFunction().end());
|
||
}
|
||
|
||
static size_t size(DotFuncBCIInfo *Info) {
|
||
return Info->getFunction().size();
|
||
}
|
||
};
|
||
|
||
template <>
|
||
struct DOTGraphTraits<DotFuncBCIInfo *> : public DefaultDOTGraphTraits {
|
||
|
||
DOTGraphTraits(bool IsSimple = false) : DefaultDOTGraphTraits(IsSimple) {}
|
||
|
||
static std::string getGraphName(DotFuncBCIInfo *Info) {
|
||
return "BCI CFG for " + Info->getFunction().getName().str();
|
||
}
|
||
|
||
std::string getNodeLabel(const BasicBlock *Node, DotFuncBCIInfo *Info) {
|
||
return Node->getName().str();
|
||
}
|
||
|
||
std::string getEdgeAttributes(const BasicBlock *Src, const_succ_iterator I,
|
||
DotFuncBCIInfo *Info) {
|
||
const BasicBlock *Dest = *I;
|
||
if (Info->isDependent(Src, Dest))
|
||
return "color=red";
|
||
if (Info->isDependent(Dest, Src))
|
||
return "color=blue";
|
||
return "";
|
||
}
|
||
|
||
std::string getNodeAttributes(const BasicBlock *Node, DotFuncBCIInfo *Info) {
|
||
std::string Result;
|
||
if (Info->isInstrumented(Node))
|
||
Result += "style=filled,fillcolor=gray";
|
||
if (Info->isCovered(Node))
|
||
Result += std::string(Result.empty() ? "" : ",") + "color=red";
|
||
return Result;
|
||
}
|
||
};
|
||
|
||
} // namespace llvm
|
||
|
||
void BlockCoverageInference::viewBlockCoverageGraph(
|
||
const DenseMap<const BasicBlock *, bool> *Coverage) const {
|
||
DotFuncBCIInfo Info(this, Coverage);
|
||
WriteGraph(&Info, "BCI", false,
|
||
"Block Coverage Inference for " + F.getName());
|
||
}
|
||
|
||
void BlockCoverageInference::dump(raw_ostream &OS) const {
|
||
OS << "Minimal block coverage for function \'" << F.getName()
|
||
<< "\' (Instrumented=*)\n";
|
||
for (auto &BB : F) {
|
||
OS << (shouldInstrumentBlock(BB) ? "* " : " ") << BB.getName() << "\n";
|
||
auto It = PredecessorDependencies.find(&BB);
|
||
if (It != PredecessorDependencies.end() && It->second.size())
|
||
OS << " PredDeps = " << getBlockNames(It->second) << "\n";
|
||
It = SuccessorDependencies.find(&BB);
|
||
if (It != SuccessorDependencies.end() && It->second.size())
|
||
OS << " SuccDeps = " << getBlockNames(It->second) << "\n";
|
||
}
|
||
OS << " Instrumented Blocks Hash = 0x"
|
||
<< Twine::utohexstr(getInstrumentedBlocksHash()) << "\n";
|
||
}
|
||
|
||
std::string
|
||
BlockCoverageInference::getBlockNames(ArrayRef<const BasicBlock *> BBs) {
|
||
std::string Result;
|
||
raw_string_ostream OS(Result);
|
||
OS << "[";
|
||
if (!BBs.empty()) {
|
||
OS << BBs.front()->getName();
|
||
BBs = BBs.drop_front();
|
||
}
|
||
for (auto *BB : BBs)
|
||
OS << ", " << BB->getName();
|
||
OS << "]";
|
||
return OS.str();
|
||
}
|