156 lines
5.1 KiB
C++
156 lines
5.1 KiB
C++
//===-- NVPTXLowerUnreachable.cpp - Lower unreachables to exit =====--===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// PTX does not have a notion of `unreachable`, which results in emitted basic
|
|
// blocks having an edge to the next block:
|
|
//
|
|
// block1:
|
|
// call @does_not_return();
|
|
// // unreachable
|
|
// block2:
|
|
// // ptxas will create a CFG edge from block1 to block2
|
|
//
|
|
// This may result in significant changes to the control flow graph, e.g., when
|
|
// LLVM moves unreachable blocks to the end of the function. That's a problem
|
|
// in the context of divergent control flow, as `ptxas` uses the CFG to
|
|
// determine divergent regions, and some intructions may not be executed
|
|
// divergently.
|
|
//
|
|
// For example, `bar.sync` is not allowed to be executed divergently on Pascal
|
|
// or earlier. If we start with the following:
|
|
//
|
|
// entry:
|
|
// // start of divergent region
|
|
// @%p0 bra cont;
|
|
// @%p1 bra unlikely;
|
|
// ...
|
|
// bra.uni cont;
|
|
// unlikely:
|
|
// ...
|
|
// // unreachable
|
|
// cont:
|
|
// // end of divergent region
|
|
// bar.sync 0;
|
|
// bra.uni exit;
|
|
// exit:
|
|
// ret;
|
|
//
|
|
// it is transformed by the branch-folder and block-placement passes to:
|
|
//
|
|
// entry:
|
|
// // start of divergent region
|
|
// @%p0 bra cont;
|
|
// @%p1 bra unlikely;
|
|
// ...
|
|
// bra.uni cont;
|
|
// cont:
|
|
// bar.sync 0;
|
|
// bra.uni exit;
|
|
// unlikely:
|
|
// ...
|
|
// // unreachable
|
|
// exit:
|
|
// // end of divergent region
|
|
// ret;
|
|
//
|
|
// After moving the `unlikely` block to the end of the function, it has an edge
|
|
// to the `exit` block, which widens the divergent region and makes the
|
|
// `bar.sync` instruction happen divergently.
|
|
//
|
|
// To work around this, we add an `exit` instruction before every `unreachable`,
|
|
// as `ptxas` understands that exit terminates the CFG. We do only do this if
|
|
// `unreachable` is not lowered to `trap`, which has the same effect (although
|
|
// with current versions of `ptxas` only because it is emited as `trap; exit;`).
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "NVPTX.h"
|
|
#include "llvm/IR/Function.h"
|
|
#include "llvm/IR/InlineAsm.h"
|
|
#include "llvm/IR/Instructions.h"
|
|
#include "llvm/IR/Type.h"
|
|
#include "llvm/Pass.h"
|
|
|
|
using namespace llvm;
|
|
|
|
namespace llvm {
|
|
void initializeNVPTXLowerUnreachablePass(PassRegistry &);
|
|
}
|
|
|
|
namespace {
|
|
class NVPTXLowerUnreachable : public FunctionPass {
|
|
StringRef getPassName() const override;
|
|
bool runOnFunction(Function &F) override;
|
|
bool isLoweredToTrap(const UnreachableInst &I) const;
|
|
|
|
public:
|
|
static char ID; // Pass identification, replacement for typeid
|
|
NVPTXLowerUnreachable(bool TrapUnreachable, bool NoTrapAfterNoreturn)
|
|
: FunctionPass(ID), TrapUnreachable(TrapUnreachable),
|
|
NoTrapAfterNoreturn(NoTrapAfterNoreturn) {}
|
|
|
|
private:
|
|
bool TrapUnreachable;
|
|
bool NoTrapAfterNoreturn;
|
|
};
|
|
} // namespace
|
|
|
|
char NVPTXLowerUnreachable::ID = 1;
|
|
|
|
INITIALIZE_PASS(NVPTXLowerUnreachable, "nvptx-lower-unreachable",
|
|
"Lower Unreachable", false, false)
|
|
|
|
StringRef NVPTXLowerUnreachable::getPassName() const {
|
|
return "add an exit instruction before every unreachable";
|
|
}
|
|
|
|
// =============================================================================
|
|
// Returns whether a `trap` intrinsic should be emitted before I.
|
|
//
|
|
// This is a copy of the logic in SelectionDAGBuilder::visitUnreachable().
|
|
// =============================================================================
|
|
bool NVPTXLowerUnreachable::isLoweredToTrap(const UnreachableInst &I) const {
|
|
if (!TrapUnreachable)
|
|
return false;
|
|
if (!NoTrapAfterNoreturn)
|
|
return true;
|
|
const CallInst *Call = dyn_cast_or_null<CallInst>(I.getPrevNode());
|
|
return Call && Call->doesNotReturn();
|
|
}
|
|
|
|
// =============================================================================
|
|
// Main function for this pass.
|
|
// =============================================================================
|
|
bool NVPTXLowerUnreachable::runOnFunction(Function &F) {
|
|
if (skipFunction(F))
|
|
return false;
|
|
// Early out iff isLoweredToTrap() always returns true.
|
|
if (TrapUnreachable && !NoTrapAfterNoreturn)
|
|
return false;
|
|
|
|
LLVMContext &C = F.getContext();
|
|
FunctionType *ExitFTy = FunctionType::get(Type::getVoidTy(C), false);
|
|
InlineAsm *Exit = InlineAsm::get(ExitFTy, "exit;", "", true);
|
|
|
|
bool Changed = false;
|
|
for (auto &BB : F)
|
|
for (auto &I : BB) {
|
|
if (auto unreachableInst = dyn_cast<UnreachableInst>(&I)) {
|
|
if (isLoweredToTrap(*unreachableInst))
|
|
continue; // trap is emitted as `trap; exit;`.
|
|
CallInst::Create(ExitFTy, Exit, "", unreachableInst);
|
|
Changed = true;
|
|
}
|
|
}
|
|
return Changed;
|
|
}
|
|
|
|
FunctionPass *llvm::createNVPTXLowerUnreachablePass(bool TrapUnreachable,
|
|
bool NoTrapAfterNoreturn) {
|
|
return new NVPTXLowerUnreachable(TrapUnreachable, NoTrapAfterNoreturn);
|
|
}
|