260 lines
8.7 KiB
C++
260 lines
8.7 KiB
C++
//=== AArch64PostSelectOptimize.cpp ---------------------------------------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// This pass does post-instruction-selection optimizations in the GlobalISel
|
|
// pipeline, before the rest of codegen runs.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "AArch64.h"
|
|
#include "AArch64TargetMachine.h"
|
|
#include "MCTargetDesc/AArch64MCTargetDesc.h"
|
|
#include "llvm/ADT/STLExtras.h"
|
|
#include "llvm/CodeGen/GlobalISel/Utils.h"
|
|
#include "llvm/CodeGen/MachineBasicBlock.h"
|
|
#include "llvm/CodeGen/MachineFunctionPass.h"
|
|
#include "llvm/CodeGen/MachineInstr.h"
|
|
#include "llvm/CodeGen/MachineOperand.h"
|
|
#include "llvm/CodeGen/TargetPassConfig.h"
|
|
#include "llvm/Support/Debug.h"
|
|
#include "llvm/Support/ErrorHandling.h"
|
|
|
|
#define DEBUG_TYPE "aarch64-post-select-optimize"
|
|
|
|
using namespace llvm;
|
|
|
|
namespace {
|
|
class AArch64PostSelectOptimize : public MachineFunctionPass {
|
|
public:
|
|
static char ID;
|
|
|
|
AArch64PostSelectOptimize();
|
|
|
|
StringRef getPassName() const override {
|
|
return "AArch64 Post Select Optimizer";
|
|
}
|
|
|
|
bool runOnMachineFunction(MachineFunction &MF) override;
|
|
|
|
void getAnalysisUsage(AnalysisUsage &AU) const override;
|
|
|
|
private:
|
|
bool optimizeNZCVDefs(MachineBasicBlock &MBB);
|
|
bool doPeepholeOpts(MachineBasicBlock &MBB);
|
|
/// Look for cross regclass copies that can be trivially eliminated.
|
|
bool foldSimpleCrossClassCopies(MachineInstr &MI);
|
|
};
|
|
} // end anonymous namespace
|
|
|
|
void AArch64PostSelectOptimize::getAnalysisUsage(AnalysisUsage &AU) const {
|
|
AU.addRequired<TargetPassConfig>();
|
|
AU.setPreservesCFG();
|
|
getSelectionDAGFallbackAnalysisUsage(AU);
|
|
MachineFunctionPass::getAnalysisUsage(AU);
|
|
}
|
|
|
|
AArch64PostSelectOptimize::AArch64PostSelectOptimize()
|
|
: MachineFunctionPass(ID) {
|
|
initializeAArch64PostSelectOptimizePass(*PassRegistry::getPassRegistry());
|
|
}
|
|
|
|
unsigned getNonFlagSettingVariant(unsigned Opc) {
|
|
switch (Opc) {
|
|
default:
|
|
return 0;
|
|
case AArch64::SUBSXrr:
|
|
return AArch64::SUBXrr;
|
|
case AArch64::SUBSWrr:
|
|
return AArch64::SUBWrr;
|
|
case AArch64::SUBSXrs:
|
|
return AArch64::SUBXrs;
|
|
case AArch64::SUBSWrs:
|
|
return AArch64::SUBWrs;
|
|
case AArch64::SUBSXri:
|
|
return AArch64::SUBXri;
|
|
case AArch64::SUBSWri:
|
|
return AArch64::SUBWri;
|
|
case AArch64::ADDSXrr:
|
|
return AArch64::ADDXrr;
|
|
case AArch64::ADDSWrr:
|
|
return AArch64::ADDWrr;
|
|
case AArch64::ADDSXrs:
|
|
return AArch64::ADDXrs;
|
|
case AArch64::ADDSWrs:
|
|
return AArch64::ADDWrs;
|
|
case AArch64::ADDSXri:
|
|
return AArch64::ADDXri;
|
|
case AArch64::ADDSWri:
|
|
return AArch64::ADDWri;
|
|
case AArch64::SBCSXr:
|
|
return AArch64::SBCXr;
|
|
case AArch64::SBCSWr:
|
|
return AArch64::SBCWr;
|
|
case AArch64::ADCSXr:
|
|
return AArch64::ADCXr;
|
|
case AArch64::ADCSWr:
|
|
return AArch64::ADCWr;
|
|
}
|
|
}
|
|
|
|
bool AArch64PostSelectOptimize::doPeepholeOpts(MachineBasicBlock &MBB) {
|
|
bool Changed = false;
|
|
for (auto &MI : make_early_inc_range(make_range(MBB.begin(), MBB.end()))) {
|
|
Changed |= foldSimpleCrossClassCopies(MI);
|
|
}
|
|
return Changed;
|
|
}
|
|
|
|
bool AArch64PostSelectOptimize::foldSimpleCrossClassCopies(MachineInstr &MI) {
|
|
auto *MF = MI.getMF();
|
|
auto &MRI = MF->getRegInfo();
|
|
|
|
if (!MI.isCopy())
|
|
return false;
|
|
|
|
if (MI.getOperand(1).getSubReg())
|
|
return false; // Don't deal with subreg copies
|
|
|
|
Register Src = MI.getOperand(1).getReg();
|
|
Register Dst = MI.getOperand(0).getReg();
|
|
|
|
if (Src.isPhysical() || Dst.isPhysical())
|
|
return false;
|
|
|
|
const TargetRegisterClass *SrcRC = MRI.getRegClass(Src);
|
|
const TargetRegisterClass *DstRC = MRI.getRegClass(Dst);
|
|
|
|
if (SrcRC == DstRC)
|
|
return false;
|
|
|
|
|
|
if (SrcRC->hasSubClass(DstRC)) {
|
|
// This is the case where the source class is a superclass of the dest, so
|
|
// if the copy is the only user of the source, we can just constrain the
|
|
// source reg to the dest class.
|
|
|
|
if (!MRI.hasOneNonDBGUse(Src))
|
|
return false; // Only constrain single uses of the source.
|
|
|
|
// Constrain to dst reg class as long as it's not a weird class that only
|
|
// has a few registers.
|
|
if (!MRI.constrainRegClass(Src, DstRC, /* MinNumRegs */ 25))
|
|
return false;
|
|
} else if (DstRC->hasSubClass(SrcRC)) {
|
|
// This is the inverse case, where the destination class is a superclass of
|
|
// the source. Here, if the copy is the only user, we can just constrain
|
|
// the user of the copy to use the smaller class of the source.
|
|
} else {
|
|
return false;
|
|
}
|
|
|
|
MRI.replaceRegWith(Dst, Src);
|
|
MI.eraseFromParent();
|
|
return true;
|
|
}
|
|
|
|
bool AArch64PostSelectOptimize::optimizeNZCVDefs(MachineBasicBlock &MBB) {
|
|
// If we find a dead NZCV implicit-def, we
|
|
// - try to convert the operation to a non-flag-setting equivalent
|
|
// - or mark the def as dead to aid later peephole optimizations.
|
|
|
|
// Use cases:
|
|
// 1)
|
|
// Consider the following code:
|
|
// FCMPSrr %0, %1, implicit-def $nzcv
|
|
// %sel1:gpr32 = CSELWr %_, %_, 12, implicit $nzcv
|
|
// %sub:gpr32 = SUBSWrr %_, %_, implicit-def $nzcv
|
|
// FCMPSrr %0, %1, implicit-def $nzcv
|
|
// %sel2:gpr32 = CSELWr %_, %_, 12, implicit $nzcv
|
|
// This kind of code where we have 2 FCMPs each feeding a CSEL can happen
|
|
// when we have a single IR fcmp being used by two selects. During selection,
|
|
// to ensure that there can be no clobbering of nzcv between the fcmp and the
|
|
// csel, we have to generate an fcmp immediately before each csel is
|
|
// selected.
|
|
// However, often we can essentially CSE these together later in MachineCSE.
|
|
// This doesn't work though if there are unrelated flag-setting instructions
|
|
// in between the two FCMPs. In this case, the SUBS defines NZCV
|
|
// but it doesn't have any users, being overwritten by the second FCMP.
|
|
//
|
|
// 2)
|
|
// The instruction selector always emits the flag-setting variant of ADC/SBC
|
|
// while selecting G_UADDE/G_SADDE/G_USUBE/G_SSUBE. If the carry-out of these
|
|
// instructions is never used, we can switch to the non-flag-setting variant.
|
|
|
|
bool Changed = false;
|
|
auto &MF = *MBB.getParent();
|
|
auto &Subtarget = MF.getSubtarget();
|
|
const auto &TII = Subtarget.getInstrInfo();
|
|
auto TRI = Subtarget.getRegisterInfo();
|
|
auto RBI = Subtarget.getRegBankInfo();
|
|
auto &MRI = MF.getRegInfo();
|
|
|
|
LiveRegUnits LRU(*MBB.getParent()->getSubtarget().getRegisterInfo());
|
|
LRU.addLiveOuts(MBB);
|
|
|
|
for (auto &II : instructionsWithoutDebug(MBB.rbegin(), MBB.rend())) {
|
|
bool NZCVDead = LRU.available(AArch64::NZCV);
|
|
if (NZCVDead && II.definesRegister(AArch64::NZCV)) {
|
|
// The instruction defines NZCV, but NZCV is dead.
|
|
unsigned NewOpc = getNonFlagSettingVariant(II.getOpcode());
|
|
int DeadNZCVIdx = II.findRegisterDefOperandIdx(AArch64::NZCV);
|
|
if (DeadNZCVIdx != -1) {
|
|
if (NewOpc) {
|
|
// If there is an equivalent non-flag-setting op, we convert.
|
|
LLVM_DEBUG(dbgs() << "Post-select optimizer: converting flag-setting "
|
|
"op: "
|
|
<< II);
|
|
II.setDesc(TII->get(NewOpc));
|
|
II.removeOperand(DeadNZCVIdx);
|
|
// Changing the opcode can result in differing regclass requirements,
|
|
// e.g. SUBSWri uses gpr32 for the dest, whereas SUBWri uses gpr32sp.
|
|
// Constrain the regclasses, possibly introducing a copy.
|
|
constrainOperandRegClass(MF, *TRI, MRI, *TII, *RBI, II, II.getDesc(),
|
|
II.getOperand(0), 0);
|
|
Changed |= true;
|
|
} else {
|
|
// Otherwise, we just set the nzcv imp-def operand to be dead, so the
|
|
// peephole optimizations can optimize them further.
|
|
II.getOperand(DeadNZCVIdx).setIsDead();
|
|
}
|
|
}
|
|
}
|
|
LRU.stepBackward(II);
|
|
}
|
|
return Changed;
|
|
}
|
|
|
|
bool AArch64PostSelectOptimize::runOnMachineFunction(MachineFunction &MF) {
|
|
if (MF.getProperties().hasProperty(
|
|
MachineFunctionProperties::Property::FailedISel))
|
|
return false;
|
|
assert(MF.getProperties().hasProperty(
|
|
MachineFunctionProperties::Property::Selected) &&
|
|
"Expected a selected MF");
|
|
|
|
bool Changed = false;
|
|
for (auto &BB : MF) {
|
|
Changed |= optimizeNZCVDefs(BB);
|
|
Changed |= doPeepholeOpts(BB);
|
|
}
|
|
return Changed;
|
|
}
|
|
|
|
char AArch64PostSelectOptimize::ID = 0;
|
|
INITIALIZE_PASS_BEGIN(AArch64PostSelectOptimize, DEBUG_TYPE,
|
|
"Optimize AArch64 selected instructions",
|
|
false, false)
|
|
INITIALIZE_PASS_END(AArch64PostSelectOptimize, DEBUG_TYPE,
|
|
"Optimize AArch64 selected instructions", false,
|
|
false)
|
|
|
|
namespace llvm {
|
|
FunctionPass *createAArch64PostSelectOptimize() {
|
|
return new AArch64PostSelectOptimize();
|
|
}
|
|
} // end namespace llvm
|