139 lines
5.5 KiB
C++
139 lines
5.5 KiB
C++
//===-- GCNPreRALongBranchReg.cpp ----------------------------------------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
// \file
|
|
// \brief Pass to estimate pre RA branch size and reserve a pair of SGPRs if
|
|
// there is a long branch. Branch size at this point is difficult to track since
|
|
// we have no idea what spills will be inserted later on. We just assume 8 bytes
|
|
// per instruction to compute approximations without computing the actual
|
|
// instruction size to see if we're in the neighborhood of the maximum branch
|
|
// distrance threshold tuning of what is considered "long" is handled through
|
|
// amdgpu-long-branch-factor cl argument which sets LongBranchFactor.
|
|
//===----------------------------------------------------------------------===//
|
|
#include "AMDGPU.h"
|
|
#include "GCNSubtarget.h"
|
|
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
|
|
#include "SIMachineFunctionInfo.h"
|
|
#include "llvm/CodeGen/MachineFunctionPass.h"
|
|
#include "llvm/InitializePasses.h"
|
|
|
|
using namespace llvm;
|
|
|
|
#define DEBUG_TYPE "amdgpu-pre-ra-long-branch-reg"
|
|
|
|
namespace {
|
|
|
|
static cl::opt<double> LongBranchFactor(
|
|
"amdgpu-long-branch-factor", cl::init(1.0), cl::Hidden,
|
|
cl::desc("Factor to apply to what qualifies as a long branch "
|
|
"to reserve a pair of scalar registers. If this value "
|
|
"is 0 the long branch registers are never reserved. As this "
|
|
"value grows the greater chance the branch distance will fall "
|
|
"within the threshold and the registers will be marked to be "
|
|
"reserved. We lean towards always reserving a register for "
|
|
"long jumps"));
|
|
|
|
class GCNPreRALongBranchReg : public MachineFunctionPass {
|
|
|
|
struct BasicBlockInfo {
|
|
// Offset - Distance from the beginning of the function to the beginning
|
|
// of this basic block.
|
|
uint64_t Offset = 0;
|
|
// Size - Size of the basic block in bytes
|
|
uint64_t Size = 0;
|
|
};
|
|
void generateBlockInfo(MachineFunction &MF,
|
|
SmallVectorImpl<BasicBlockInfo> &BlockInfo);
|
|
|
|
public:
|
|
static char ID;
|
|
GCNPreRALongBranchReg() : MachineFunctionPass(ID) {
|
|
initializeGCNPreRALongBranchRegPass(*PassRegistry::getPassRegistry());
|
|
}
|
|
bool runOnMachineFunction(MachineFunction &MF) override;
|
|
StringRef getPassName() const override {
|
|
return "AMDGPU Pre-RA Long Branch Reg";
|
|
}
|
|
void getAnalysisUsage(AnalysisUsage &AU) const override {
|
|
AU.setPreservesAll();
|
|
MachineFunctionPass::getAnalysisUsage(AU);
|
|
}
|
|
};
|
|
} // End anonymous namespace.
|
|
char GCNPreRALongBranchReg::ID = 0;
|
|
|
|
INITIALIZE_PASS(GCNPreRALongBranchReg, DEBUG_TYPE,
|
|
"AMDGPU Pre-RA Long Branch Reg", false, false)
|
|
|
|
char &llvm::GCNPreRALongBranchRegID = GCNPreRALongBranchReg::ID;
|
|
void GCNPreRALongBranchReg::generateBlockInfo(
|
|
MachineFunction &MF, SmallVectorImpl<BasicBlockInfo> &BlockInfo) {
|
|
|
|
BlockInfo.resize(MF.getNumBlockIDs());
|
|
|
|
// Approximate the size of all basic blocks by just
|
|
// assuming 8 bytes per instruction
|
|
for (const MachineBasicBlock &MBB : MF) {
|
|
uint64_t NumInstr = 0;
|
|
// Loop through the basic block and add up all non-debug
|
|
// non-meta instructions
|
|
for (const MachineInstr &MI : MBB) {
|
|
// isMetaInstruction is a superset of isDebugIstr
|
|
if (MI.isMetaInstruction())
|
|
continue;
|
|
NumInstr += 1;
|
|
}
|
|
// Approximate size as just 8 bytes per instruction
|
|
BlockInfo[MBB.getNumber()].Size = 8 * NumInstr;
|
|
}
|
|
uint64_t PrevNum = (&MF)->begin()->getNumber();
|
|
for (auto &MBB :
|
|
make_range(std::next(MachineFunction::iterator((&MF)->begin())),
|
|
(&MF)->end())) {
|
|
uint64_t Num = MBB.getNumber();
|
|
// Compute the offset immediately following this block.
|
|
BlockInfo[Num].Offset = BlockInfo[PrevNum].Offset + BlockInfo[PrevNum].Size;
|
|
PrevNum = Num;
|
|
}
|
|
}
|
|
bool GCNPreRALongBranchReg::runOnMachineFunction(MachineFunction &MF) {
|
|
const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
|
|
const SIInstrInfo *TII = STM.getInstrInfo();
|
|
const SIRegisterInfo *TRI = STM.getRegisterInfo();
|
|
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
|
|
MachineRegisterInfo &MRI = MF.getRegInfo();
|
|
|
|
// For now, reserve highest available SGPR pair. After RA,
|
|
// shift down to a lower unused pair of SGPRs
|
|
// If all registers are used, then findUnusedRegister will return
|
|
// AMDGPU::NoRegister.
|
|
constexpr bool ReserveHighestRegister = true;
|
|
Register LongBranchReservedReg = TRI->findUnusedRegister(
|
|
MRI, &AMDGPU::SGPR_64RegClass, MF, ReserveHighestRegister);
|
|
if (!LongBranchReservedReg)
|
|
return false;
|
|
|
|
// Approximate code size and offsets of each basic block
|
|
SmallVector<BasicBlockInfo, 16> BlockInfo;
|
|
generateBlockInfo(MF, BlockInfo);
|
|
|
|
for (const MachineBasicBlock &MBB : MF) {
|
|
MachineBasicBlock::const_iterator Last = MBB.getLastNonDebugInstr();
|
|
if (Last == MBB.end() || !Last->isUnconditionalBranch())
|
|
continue;
|
|
MachineBasicBlock *DestBB = TII->getBranchDestBlock(*Last);
|
|
uint64_t BlockDistance = static_cast<uint64_t>(
|
|
LongBranchFactor * BlockInfo[DestBB->getNumber()].Offset);
|
|
// If the distance falls outside the threshold assume it is a long branch
|
|
// and we need to reserve the registers
|
|
if (!TII->isBranchOffsetInRange(Last->getOpcode(), BlockDistance)) {
|
|
MFI->setLongBranchReservedReg(LongBranchReservedReg);
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
}
|