789 lines
31 KiB
C++
789 lines
31 KiB
C++
//===- PPCInstructionSelector.cpp --------------------------------*- C++ -*-==//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
/// \file
|
|
/// This file implements the targeting of the InstructionSelector class for
|
|
/// PowerPC.
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "PPC.h"
|
|
#include "PPCInstrInfo.h"
|
|
#include "PPCMachineFunctionInfo.h"
|
|
#include "PPCRegisterBankInfo.h"
|
|
#include "PPCSubtarget.h"
|
|
#include "PPCTargetMachine.h"
|
|
#include "llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h"
|
|
#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
|
|
#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
|
|
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
|
|
#include "llvm/CodeGen/MachineConstantPool.h"
|
|
#include "llvm/CodeGen/MachineFunction.h"
|
|
#include "llvm/IR/IntrinsicsPowerPC.h"
|
|
#include "llvm/Support/Debug.h"
|
|
|
|
#define DEBUG_TYPE "ppc-gisel"
|
|
|
|
using namespace llvm;
|
|
|
|
namespace {
|
|
|
|
#define GET_GLOBALISEL_PREDICATE_BITSET
|
|
#include "PPCGenGlobalISel.inc"
|
|
#undef GET_GLOBALISEL_PREDICATE_BITSET
|
|
|
|
class PPCInstructionSelector : public InstructionSelector {
|
|
public:
|
|
PPCInstructionSelector(const PPCTargetMachine &TM, const PPCSubtarget &STI,
|
|
const PPCRegisterBankInfo &RBI);
|
|
|
|
bool select(MachineInstr &I) override;
|
|
static const char *getName() { return DEBUG_TYPE; }
|
|
|
|
private:
|
|
/// tblgen generated 'select' implementation that is used as the initial
|
|
/// selector for the patterns that do not require complex C++.
|
|
bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
|
|
|
|
bool selectFPToInt(MachineInstr &I, MachineBasicBlock &MBB,
|
|
MachineRegisterInfo &MRI) const;
|
|
bool selectIntToFP(MachineInstr &I, MachineBasicBlock &MBB,
|
|
MachineRegisterInfo &MRI) const;
|
|
|
|
bool selectZExt(MachineInstr &I, MachineBasicBlock &MBB,
|
|
MachineRegisterInfo &MRI) const;
|
|
bool selectConstantPool(MachineInstr &I, MachineBasicBlock &MBB,
|
|
MachineRegisterInfo &MRI) const;
|
|
|
|
std::optional<bool> selectI64ImmDirect(MachineInstr &I,
|
|
MachineBasicBlock &MBB,
|
|
MachineRegisterInfo &MRI, Register Reg,
|
|
uint64_t Imm) const;
|
|
bool selectI64Imm(MachineInstr &I, MachineBasicBlock &MBB,
|
|
MachineRegisterInfo &MRI) const;
|
|
|
|
const PPCTargetMachine &TM;
|
|
const PPCSubtarget &STI;
|
|
const PPCInstrInfo &TII;
|
|
const PPCRegisterInfo &TRI;
|
|
const PPCRegisterBankInfo &RBI;
|
|
|
|
#define GET_GLOBALISEL_PREDICATES_DECL
|
|
#include "PPCGenGlobalISel.inc"
|
|
#undef GET_GLOBALISEL_PREDICATES_DECL
|
|
|
|
#define GET_GLOBALISEL_TEMPORARIES_DECL
|
|
#include "PPCGenGlobalISel.inc"
|
|
#undef GET_GLOBALISEL_TEMPORARIES_DECL
|
|
};
|
|
|
|
} // end anonymous namespace
|
|
|
|
#define GET_GLOBALISEL_IMPL
|
|
#include "PPCGenGlobalISel.inc"
|
|
#undef GET_GLOBALISEL_IMPL
|
|
|
|
PPCInstructionSelector::PPCInstructionSelector(const PPCTargetMachine &TM,
|
|
const PPCSubtarget &STI,
|
|
const PPCRegisterBankInfo &RBI)
|
|
: TM(TM), STI(STI), TII(*STI.getInstrInfo()), TRI(*STI.getRegisterInfo()),
|
|
RBI(RBI),
|
|
#define GET_GLOBALISEL_PREDICATES_INIT
|
|
#include "PPCGenGlobalISel.inc"
|
|
#undef GET_GLOBALISEL_PREDICATES_INIT
|
|
#define GET_GLOBALISEL_TEMPORARIES_INIT
|
|
#include "PPCGenGlobalISel.inc"
|
|
#undef GET_GLOBALISEL_TEMPORARIES_INIT
|
|
{
|
|
}
|
|
|
|
static const TargetRegisterClass *getRegClass(LLT Ty, const RegisterBank *RB) {
|
|
if (RB->getID() == PPC::GPRRegBankID) {
|
|
if (Ty.getSizeInBits() == 64)
|
|
return &PPC::G8RCRegClass;
|
|
if (Ty.getSizeInBits() <= 32)
|
|
return &PPC::GPRCRegClass;
|
|
}
|
|
if (RB->getID() == PPC::FPRRegBankID) {
|
|
if (Ty.getSizeInBits() == 32)
|
|
return &PPC::F4RCRegClass;
|
|
if (Ty.getSizeInBits() == 64)
|
|
return &PPC::F8RCRegClass;
|
|
}
|
|
if (RB->getID() == PPC::VECRegBankID) {
|
|
if (Ty.getSizeInBits() == 128)
|
|
return &PPC::VSRCRegClass;
|
|
}
|
|
if (RB->getID() == PPC::CRRegBankID) {
|
|
if (Ty.getSizeInBits() == 1)
|
|
return &PPC::CRBITRCRegClass;
|
|
if (Ty.getSizeInBits() == 4)
|
|
return &PPC::CRRCRegClass;
|
|
}
|
|
|
|
llvm_unreachable("Unknown RegBank!");
|
|
}
|
|
|
|
static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
|
|
MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
|
|
const RegisterBankInfo &RBI) {
|
|
Register DstReg = I.getOperand(0).getReg();
|
|
|
|
if (DstReg.isPhysical())
|
|
return true;
|
|
|
|
const RegisterBank *DstRegBank = RBI.getRegBank(DstReg, MRI, TRI);
|
|
const TargetRegisterClass *DstRC =
|
|
getRegClass(MRI.getType(DstReg), DstRegBank);
|
|
|
|
// No need to constrain SrcReg. It will get constrained when we hit another of
|
|
// its use or its defs.
|
|
// Copies do not have constraints.
|
|
if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
|
|
LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
|
|
<< " operand\n");
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
static unsigned selectLoadStoreOp(unsigned GenericOpc, unsigned RegBankID,
|
|
unsigned OpSize) {
|
|
const bool IsStore = GenericOpc == TargetOpcode::G_STORE;
|
|
switch (RegBankID) {
|
|
case PPC::GPRRegBankID:
|
|
switch (OpSize) {
|
|
case 32:
|
|
return IsStore ? PPC::STW : PPC::LWZ;
|
|
case 64:
|
|
return IsStore ? PPC::STD : PPC::LD;
|
|
default:
|
|
llvm_unreachable("Unexpected size!");
|
|
}
|
|
break;
|
|
case PPC::FPRRegBankID:
|
|
switch (OpSize) {
|
|
case 32:
|
|
return IsStore ? PPC::STFS : PPC::LFS;
|
|
case 64:
|
|
return IsStore ? PPC::STFD : PPC::LFD;
|
|
default:
|
|
llvm_unreachable("Unexpected size!");
|
|
}
|
|
break;
|
|
default:
|
|
llvm_unreachable("Unexpected register bank!");
|
|
}
|
|
return GenericOpc;
|
|
}
|
|
|
|
bool PPCInstructionSelector::selectIntToFP(MachineInstr &I,
|
|
MachineBasicBlock &MBB,
|
|
MachineRegisterInfo &MRI) const {
|
|
if (!STI.hasDirectMove() || !STI.isPPC64() || !STI.hasFPCVT())
|
|
return false;
|
|
|
|
const DebugLoc &DbgLoc = I.getDebugLoc();
|
|
const Register DstReg = I.getOperand(0).getReg();
|
|
const Register SrcReg = I.getOperand(1).getReg();
|
|
|
|
Register MoveReg = MRI.createVirtualRegister(&PPC::VSFRCRegClass);
|
|
|
|
// For now, only handle the case for 64 bit integer.
|
|
BuildMI(MBB, I, DbgLoc, TII.get(PPC::MTVSRD), MoveReg).addReg(SrcReg);
|
|
|
|
bool IsSingle = MRI.getType(DstReg).getSizeInBits() == 32;
|
|
bool IsSigned = I.getOpcode() == TargetOpcode::G_SITOFP;
|
|
unsigned ConvOp = IsSingle ? (IsSigned ? PPC::XSCVSXDSP : PPC::XSCVUXDSP)
|
|
: (IsSigned ? PPC::XSCVSXDDP : PPC::XSCVUXDDP);
|
|
|
|
MachineInstr *MI =
|
|
BuildMI(MBB, I, DbgLoc, TII.get(ConvOp), DstReg).addReg(MoveReg);
|
|
|
|
I.eraseFromParent();
|
|
return constrainSelectedInstRegOperands(*MI, TII, TRI, RBI);
|
|
}
|
|
|
|
bool PPCInstructionSelector::selectFPToInt(MachineInstr &I,
|
|
MachineBasicBlock &MBB,
|
|
MachineRegisterInfo &MRI) const {
|
|
if (!STI.hasDirectMove() || !STI.isPPC64() || !STI.hasFPCVT())
|
|
return false;
|
|
|
|
const DebugLoc &DbgLoc = I.getDebugLoc();
|
|
const Register DstReg = I.getOperand(0).getReg();
|
|
const Register SrcReg = I.getOperand(1).getReg();
|
|
|
|
Register CopyReg = MRI.createVirtualRegister(&PPC::VSFRCRegClass);
|
|
BuildMI(MBB, I, DbgLoc, TII.get(TargetOpcode::COPY), CopyReg).addReg(SrcReg);
|
|
|
|
Register ConvReg = MRI.createVirtualRegister(&PPC::VSFRCRegClass);
|
|
|
|
bool IsSigned = I.getOpcode() == TargetOpcode::G_FPTOSI;
|
|
|
|
// single-precision is stored as double-precision on PPC in registers, so
|
|
// always use double-precision convertions.
|
|
unsigned ConvOp = IsSigned ? PPC::XSCVDPSXDS : PPC::XSCVDPUXDS;
|
|
|
|
BuildMI(MBB, I, DbgLoc, TII.get(ConvOp), ConvReg).addReg(CopyReg);
|
|
|
|
MachineInstr *MI =
|
|
BuildMI(MBB, I, DbgLoc, TII.get(PPC::MFVSRD), DstReg).addReg(ConvReg);
|
|
|
|
I.eraseFromParent();
|
|
return constrainSelectedInstRegOperands(*MI, TII, TRI, RBI);
|
|
}
|
|
|
|
bool PPCInstructionSelector::selectZExt(MachineInstr &I, MachineBasicBlock &MBB,
|
|
MachineRegisterInfo &MRI) const {
|
|
const Register DstReg = I.getOperand(0).getReg();
|
|
const LLT DstTy = MRI.getType(DstReg);
|
|
const RegisterBank *DstRegBank = RBI.getRegBank(DstReg, MRI, TRI);
|
|
|
|
const Register SrcReg = I.getOperand(1).getReg();
|
|
|
|
assert(DstTy.getSizeInBits() == 64 && "Unexpected dest size!");
|
|
assert(MRI.getType(SrcReg).getSizeInBits() == 32 && "Unexpected src size!");
|
|
|
|
Register ImpDefReg =
|
|
MRI.createVirtualRegister(getRegClass(DstTy, DstRegBank));
|
|
BuildMI(MBB, I, I.getDebugLoc(), TII.get(TargetOpcode::IMPLICIT_DEF),
|
|
ImpDefReg);
|
|
|
|
Register NewDefReg =
|
|
MRI.createVirtualRegister(getRegClass(DstTy, DstRegBank));
|
|
BuildMI(MBB, I, I.getDebugLoc(), TII.get(TargetOpcode::INSERT_SUBREG),
|
|
NewDefReg)
|
|
.addReg(ImpDefReg)
|
|
.addReg(SrcReg)
|
|
.addImm(PPC::sub_32);
|
|
|
|
MachineInstr *MI =
|
|
BuildMI(MBB, I, I.getDebugLoc(), TII.get(PPC::RLDICL), DstReg)
|
|
.addReg(NewDefReg)
|
|
.addImm(0)
|
|
.addImm(32);
|
|
|
|
I.eraseFromParent();
|
|
return constrainSelectedInstRegOperands(*MI, TII, TRI, RBI);
|
|
}
|
|
|
|
// For any 32 < Num < 64, check if the Imm contains at least Num consecutive
|
|
// zeros and return the number of bits by the left of these consecutive zeros.
|
|
static uint32_t findContiguousZerosAtLeast(uint64_t Imm, unsigned Num) {
|
|
uint32_t HiTZ = llvm::countr_zero<uint32_t>(Hi_32(Imm));
|
|
uint32_t LoLZ = llvm::countl_zero<uint32_t>(Lo_32(Imm));
|
|
if ((HiTZ + LoLZ) >= Num)
|
|
return (32 + HiTZ);
|
|
return 0;
|
|
}
|
|
|
|
// Direct materialization of 64-bit constants by enumerated patterns.
|
|
// Similar to PPCISelDAGToDAG::selectI64ImmDirect().
|
|
std::optional<bool> PPCInstructionSelector::selectI64ImmDirect(MachineInstr &I,
|
|
MachineBasicBlock &MBB,
|
|
MachineRegisterInfo &MRI,
|
|
Register Reg,
|
|
uint64_t Imm) const {
|
|
unsigned TZ = llvm::countr_zero<uint64_t>(Imm);
|
|
unsigned LZ = llvm::countl_zero<uint64_t>(Imm);
|
|
unsigned TO = llvm::countr_one<uint64_t>(Imm);
|
|
unsigned LO = llvm::countl_one<uint64_t>(Imm);
|
|
uint32_t Hi32 = Hi_32(Imm);
|
|
uint32_t Lo32 = Lo_32(Imm);
|
|
uint32_t Shift = 0;
|
|
|
|
// Following patterns use 1 instructions to materialize the Imm.
|
|
|
|
// 1-1) Patterns : {zeros}{15-bit valve}
|
|
// {ones}{15-bit valve}
|
|
if (isInt<16>(Imm))
|
|
return BuildMI(MBB, I, I.getDebugLoc(), TII.get(PPC::LI8), Reg)
|
|
.addImm(Imm)
|
|
.constrainAllUses(TII, TRI, RBI);
|
|
// 1-2) Patterns : {zeros}{15-bit valve}{16 zeros}
|
|
// {ones}{15-bit valve}{16 zeros}
|
|
if (TZ > 15 && (LZ > 32 || LO > 32))
|
|
return BuildMI(MBB, I, I.getDebugLoc(), TII.get(PPC::LIS8), Reg)
|
|
.addImm((Imm >> 16) & 0xffff)
|
|
.constrainAllUses(TII, TRI, RBI);
|
|
|
|
// Following patterns use 2 instructions to materialize the Imm.
|
|
|
|
assert(LZ < 64 && "Unexpected leading zeros here.");
|
|
// Count of ones follwing the leading zeros.
|
|
unsigned FO = llvm::countl_one<uint64_t>(Imm << LZ);
|
|
// 2-1) Patterns : {zeros}{31-bit value}
|
|
// {ones}{31-bit value}
|
|
if (isInt<32>(Imm)) {
|
|
uint64_t ImmHi16 = (Imm >> 16) & 0xffff;
|
|
unsigned Opcode = ImmHi16 ? PPC::LIS8 : PPC::LI8;
|
|
Register TmpReg = MRI.createVirtualRegister(&PPC::G8RCRegClass);
|
|
if (!BuildMI(MBB, I, I.getDebugLoc(), TII.get(Opcode), TmpReg)
|
|
.addImm((Imm >> 16) & 0xffff)
|
|
.constrainAllUses(TII, TRI, RBI))
|
|
return false;
|
|
return BuildMI(MBB, I, I.getDebugLoc(), TII.get(PPC::ORI8), Reg)
|
|
.addReg(TmpReg, RegState::Kill)
|
|
.addImm(Imm & 0xffff)
|
|
.constrainAllUses(TII, TRI, RBI);
|
|
}
|
|
// 2-2) Patterns : {zeros}{ones}{15-bit value}{zeros}
|
|
// {zeros}{15-bit value}{zeros}
|
|
// {zeros}{ones}{15-bit value}
|
|
// {ones}{15-bit value}{zeros}
|
|
// We can take advantage of LI's sign-extension semantics to generate leading
|
|
// ones, and then use RLDIC to mask off the ones in both sides after rotation.
|
|
if ((LZ + FO + TZ) > 48) {
|
|
Register TmpReg = MRI.createVirtualRegister(&PPC::G8RCRegClass);
|
|
if (!BuildMI(MBB, I, I.getDebugLoc(), TII.get(PPC::LI8), TmpReg)
|
|
.addImm((Imm >> TZ) & 0xffff)
|
|
.constrainAllUses(TII, TRI, RBI))
|
|
return false;
|
|
return BuildMI(MBB, I, I.getDebugLoc(), TII.get(PPC::RLDIC), Reg)
|
|
.addReg(TmpReg, RegState::Kill)
|
|
.addImm(TZ)
|
|
.addImm(LZ)
|
|
.constrainAllUses(TII, TRI, RBI);
|
|
}
|
|
// 2-3) Pattern : {zeros}{15-bit value}{ones}
|
|
// Shift right the Imm by (48 - LZ) bits to construct a negtive 16 bits value,
|
|
// therefore we can take advantage of LI's sign-extension semantics, and then
|
|
// mask them off after rotation.
|
|
//
|
|
// +--LZ--||-15-bit-||--TO--+ +-------------|--16-bit--+
|
|
// |00000001bbbbbbbbb1111111| -> |00000000000001bbbbbbbbb1|
|
|
// +------------------------+ +------------------------+
|
|
// 63 0 63 0
|
|
// Imm (Imm >> (48 - LZ) & 0xffff)
|
|
// +----sext-----|--16-bit--+ +clear-|-----------------+
|
|
// |11111111111111bbbbbbbbb1| -> |00000001bbbbbbbbb1111111|
|
|
// +------------------------+ +------------------------+
|
|
// 63 0 63 0
|
|
// LI8: sext many leading zeros RLDICL: rotate left (48 - LZ), clear left LZ
|
|
if ((LZ + TO) > 48) {
|
|
// Since the immediates with (LZ > 32) have been handled by previous
|
|
// patterns, here we have (LZ <= 32) to make sure we will not shift right
|
|
// the Imm by a negative value.
|
|
assert(LZ <= 32 && "Unexpected shift value.");
|
|
Register TmpReg = MRI.createVirtualRegister(&PPC::G8RCRegClass);
|
|
if (!BuildMI(MBB, I, I.getDebugLoc(), TII.get(PPC::LI8), TmpReg)
|
|
.addImm(Imm >> (48 - LZ) & 0xffff)
|
|
.constrainAllUses(TII, TRI, RBI))
|
|
return false;
|
|
return BuildMI(MBB, I, I.getDebugLoc(), TII.get(PPC::RLDICL), Reg)
|
|
.addReg(TmpReg, RegState::Kill)
|
|
.addImm(48 - LZ)
|
|
.addImm(LZ)
|
|
.constrainAllUses(TII, TRI, RBI);
|
|
}
|
|
// 2-4) Patterns : {zeros}{ones}{15-bit value}{ones}
|
|
// {ones}{15-bit value}{ones}
|
|
// We can take advantage of LI's sign-extension semantics to generate leading
|
|
// ones, and then use RLDICL to mask off the ones in left sides (if required)
|
|
// after rotation.
|
|
//
|
|
// +-LZ-FO||-15-bit-||--TO--+ +-------------|--16-bit--+
|
|
// |00011110bbbbbbbbb1111111| -> |000000000011110bbbbbbbbb|
|
|
// +------------------------+ +------------------------+
|
|
// 63 0 63 0
|
|
// Imm (Imm >> TO) & 0xffff
|
|
// +----sext-----|--16-bit--+ +LZ|---------------------+
|
|
// |111111111111110bbbbbbbbb| -> |00011110bbbbbbbbb1111111|
|
|
// +------------------------+ +------------------------+
|
|
// 63 0 63 0
|
|
// LI8: sext many leading zeros RLDICL: rotate left TO, clear left LZ
|
|
if ((LZ + FO + TO) > 48) {
|
|
Register TmpReg = MRI.createVirtualRegister(&PPC::G8RCRegClass);
|
|
if (!BuildMI(MBB, I, I.getDebugLoc(), TII.get(PPC::LI8), TmpReg)
|
|
.addImm((Imm >> TO) & 0xffff)
|
|
.constrainAllUses(TII, TRI, RBI))
|
|
return false;
|
|
return BuildMI(MBB, I, I.getDebugLoc(), TII.get(PPC::RLDICL), Reg)
|
|
.addReg(TmpReg, RegState::Kill)
|
|
.addImm(TO)
|
|
.addImm(LZ)
|
|
.constrainAllUses(TII, TRI, RBI);
|
|
}
|
|
// 2-5) Pattern : {32 zeros}{****}{0}{15-bit value}
|
|
// If Hi32 is zero and the Lo16(in Lo32) can be presented as a positive 16 bit
|
|
// value, we can use LI for Lo16 without generating leading ones then add the
|
|
// Hi16(in Lo32).
|
|
if (LZ == 32 && ((Lo32 & 0x8000) == 0)) {
|
|
Register TmpReg = MRI.createVirtualRegister(&PPC::G8RCRegClass);
|
|
if (!BuildMI(MBB, I, I.getDebugLoc(), TII.get(PPC::LI8), TmpReg)
|
|
.addImm(Lo32 & 0xffff)
|
|
.constrainAllUses(TII, TRI, RBI))
|
|
return false;
|
|
return BuildMI(MBB, I, I.getDebugLoc(), TII.get(PPC::ORIS8), Reg)
|
|
.addReg(TmpReg, RegState::Kill)
|
|
.addImm(Lo32 >> 16)
|
|
.constrainAllUses(TII, TRI, RBI);
|
|
}
|
|
// 2-6) Patterns : {******}{49 zeros}{******}
|
|
// {******}{49 ones}{******}
|
|
// If the Imm contains 49 consecutive zeros/ones, it means that a total of 15
|
|
// bits remain on both sides. Rotate right the Imm to construct an int<16>
|
|
// value, use LI for int<16> value and then use RLDICL without mask to rotate
|
|
// it back.
|
|
//
|
|
// 1) findContiguousZerosAtLeast(Imm, 49)
|
|
// +------|--zeros-|------+ +---ones--||---15 bit--+
|
|
// |bbbbbb0000000000aaaaaa| -> |0000000000aaaaaabbbbbb|
|
|
// +----------------------+ +----------------------+
|
|
// 63 0 63 0
|
|
//
|
|
// 2) findContiguousZerosAtLeast(~Imm, 49)
|
|
// +------|--ones--|------+ +---ones--||---15 bit--+
|
|
// |bbbbbb1111111111aaaaaa| -> |1111111111aaaaaabbbbbb|
|
|
// +----------------------+ +----------------------+
|
|
// 63 0 63 0
|
|
if ((Shift = findContiguousZerosAtLeast(Imm, 49)) ||
|
|
(Shift = findContiguousZerosAtLeast(~Imm, 49))) {
|
|
uint64_t RotImm = APInt(64, Imm).rotr(Shift).getZExtValue();
|
|
Register TmpReg = MRI.createVirtualRegister(&PPC::G8RCRegClass);
|
|
if (!BuildMI(MBB, I, I.getDebugLoc(), TII.get(PPC::LI8), TmpReg)
|
|
.addImm(RotImm & 0xffff)
|
|
.constrainAllUses(TII, TRI, RBI))
|
|
return false;
|
|
return BuildMI(MBB, I, I.getDebugLoc(), TII.get(PPC::RLDICL), Reg)
|
|
.addReg(TmpReg, RegState::Kill)
|
|
.addImm(Shift)
|
|
.addImm(0)
|
|
.constrainAllUses(TII, TRI, RBI);
|
|
}
|
|
|
|
// Following patterns use 3 instructions to materialize the Imm.
|
|
|
|
// 3-1) Patterns : {zeros}{ones}{31-bit value}{zeros}
|
|
// {zeros}{31-bit value}{zeros}
|
|
// {zeros}{ones}{31-bit value}
|
|
// {ones}{31-bit value}{zeros}
|
|
// We can take advantage of LIS's sign-extension semantics to generate leading
|
|
// ones, add the remaining bits with ORI, and then use RLDIC to mask off the
|
|
// ones in both sides after rotation.
|
|
if ((LZ + FO + TZ) > 32) {
|
|
uint64_t ImmHi16 = (Imm >> (TZ + 16)) & 0xffff;
|
|
unsigned Opcode = ImmHi16 ? PPC::LIS8 : PPC::LI8;
|
|
Register TmpReg = MRI.createVirtualRegister(&PPC::G8RCRegClass);
|
|
Register Tmp2Reg = MRI.createVirtualRegister(&PPC::G8RCRegClass);
|
|
if (!BuildMI(MBB, I, I.getDebugLoc(), TII.get(Opcode), TmpReg)
|
|
.addImm(ImmHi16)
|
|
.constrainAllUses(TII, TRI, RBI))
|
|
return false;
|
|
if (!BuildMI(MBB, I, I.getDebugLoc(), TII.get(PPC::ORI8), Tmp2Reg)
|
|
.addReg(TmpReg, RegState::Kill)
|
|
.addImm((Imm >> TZ) & 0xffff)
|
|
.constrainAllUses(TII, TRI, RBI))
|
|
return false;
|
|
return BuildMI(MBB, I, I.getDebugLoc(), TII.get(PPC::RLDIC), Reg)
|
|
.addReg(Tmp2Reg, RegState::Kill)
|
|
.addImm(TZ)
|
|
.addImm(LZ)
|
|
.constrainAllUses(TII, TRI, RBI);
|
|
}
|
|
// 3-2) Pattern : {zeros}{31-bit value}{ones}
|
|
// Shift right the Imm by (32 - LZ) bits to construct a negative 32 bits
|
|
// value, therefore we can take advantage of LIS's sign-extension semantics,
|
|
// add the remaining bits with ORI, and then mask them off after rotation.
|
|
// This is similar to Pattern 2-3, please refer to the diagram there.
|
|
if ((LZ + TO) > 32) {
|
|
// Since the immediates with (LZ > 32) have been handled by previous
|
|
// patterns, here we have (LZ <= 32) to make sure we will not shift right
|
|
// the Imm by a negative value.
|
|
assert(LZ <= 32 && "Unexpected shift value.");
|
|
Register TmpReg = MRI.createVirtualRegister(&PPC::G8RCRegClass);
|
|
Register Tmp2Reg = MRI.createVirtualRegister(&PPC::G8RCRegClass);
|
|
if (!BuildMI(MBB, I, I.getDebugLoc(), TII.get(PPC::LIS8), TmpReg)
|
|
.addImm((Imm >> (48 - LZ)) & 0xffff)
|
|
.constrainAllUses(TII, TRI, RBI))
|
|
return false;
|
|
if (!BuildMI(MBB, I, I.getDebugLoc(), TII.get(PPC::ORI8), Tmp2Reg)
|
|
.addReg(TmpReg, RegState::Kill)
|
|
.addImm((Imm >> (32 - LZ)) & 0xffff)
|
|
.constrainAllUses(TII, TRI, RBI))
|
|
return false;
|
|
return BuildMI(MBB, I, I.getDebugLoc(), TII.get(PPC::RLDICL), Reg)
|
|
.addReg(Tmp2Reg, RegState::Kill)
|
|
.addImm(32 - LZ)
|
|
.addImm(LZ)
|
|
.constrainAllUses(TII, TRI, RBI);
|
|
}
|
|
// 3-3) Patterns : {zeros}{ones}{31-bit value}{ones}
|
|
// {ones}{31-bit value}{ones}
|
|
// We can take advantage of LIS's sign-extension semantics to generate leading
|
|
// ones, add the remaining bits with ORI, and then use RLDICL to mask off the
|
|
// ones in left sides (if required) after rotation.
|
|
// This is similar to Pattern 2-4, please refer to the diagram there.
|
|
if ((LZ + FO + TO) > 32) {
|
|
Register TmpReg = MRI.createVirtualRegister(&PPC::G8RCRegClass);
|
|
Register Tmp2Reg = MRI.createVirtualRegister(&PPC::G8RCRegClass);
|
|
if (!BuildMI(MBB, I, I.getDebugLoc(), TII.get(PPC::LIS8), TmpReg)
|
|
.addImm((Imm >> (TO + 16)) & 0xffff)
|
|
.constrainAllUses(TII, TRI, RBI))
|
|
return false;
|
|
if (!BuildMI(MBB, I, I.getDebugLoc(), TII.get(PPC::ORI8), Tmp2Reg)
|
|
.addReg(TmpReg, RegState::Kill)
|
|
.addImm((Imm >> TO) & 0xffff)
|
|
.constrainAllUses(TII, TRI, RBI))
|
|
return false;
|
|
return BuildMI(MBB, I, I.getDebugLoc(), TII.get(PPC::RLDICL), Reg)
|
|
.addReg(Tmp2Reg, RegState::Kill)
|
|
.addImm(TO)
|
|
.addImm(LZ)
|
|
.constrainAllUses(TII, TRI, RBI);
|
|
}
|
|
// 3-4) Patterns : High word == Low word
|
|
if (Hi32 == Lo32) {
|
|
// Handle the first 32 bits.
|
|
uint64_t ImmHi16 = (Lo32 >> 16) & 0xffff;
|
|
unsigned Opcode = ImmHi16 ? PPC::LIS8 : PPC::LI8;
|
|
Register TmpReg = MRI.createVirtualRegister(&PPC::G8RCRegClass);
|
|
Register Tmp2Reg = MRI.createVirtualRegister(&PPC::G8RCRegClass);
|
|
if (!BuildMI(MBB, I, I.getDebugLoc(), TII.get(Opcode), TmpReg)
|
|
.addImm(ImmHi16)
|
|
.constrainAllUses(TII, TRI, RBI))
|
|
return false;
|
|
if (!BuildMI(MBB, I, I.getDebugLoc(), TII.get(PPC::ORI8), Tmp2Reg)
|
|
.addReg(TmpReg, RegState::Kill)
|
|
.addImm(Lo32 & 0xffff)
|
|
.constrainAllUses(TII, TRI, RBI))
|
|
return false;
|
|
return BuildMI(MBB, I, I.getDebugLoc(), TII.get(PPC::RLDIMI), Reg)
|
|
.addReg(Tmp2Reg)
|
|
.addReg(Tmp2Reg, RegState::Kill)
|
|
.addImm(32)
|
|
.addImm(0)
|
|
.constrainAllUses(TII, TRI, RBI);
|
|
}
|
|
// 3-5) Patterns : {******}{33 zeros}{******}
|
|
// {******}{33 ones}{******}
|
|
// If the Imm contains 33 consecutive zeros/ones, it means that a total of 31
|
|
// bits remain on both sides. Rotate right the Imm to construct an int<32>
|
|
// value, use LIS + ORI for int<32> value and then use RLDICL without mask to
|
|
// rotate it back.
|
|
// This is similar to Pattern 2-6, please refer to the diagram there.
|
|
if ((Shift = findContiguousZerosAtLeast(Imm, 33)) ||
|
|
(Shift = findContiguousZerosAtLeast(~Imm, 33))) {
|
|
uint64_t RotImm = APInt(64, Imm).rotr(Shift).getZExtValue();
|
|
uint64_t ImmHi16 = (RotImm >> 16) & 0xffff;
|
|
unsigned Opcode = ImmHi16 ? PPC::LIS8 : PPC::LI8;
|
|
Register TmpReg = MRI.createVirtualRegister(&PPC::G8RCRegClass);
|
|
Register Tmp2Reg = MRI.createVirtualRegister(&PPC::G8RCRegClass);
|
|
if (!BuildMI(MBB, I, I.getDebugLoc(), TII.get(Opcode), TmpReg)
|
|
.addImm(ImmHi16)
|
|
.constrainAllUses(TII, TRI, RBI))
|
|
return false;
|
|
if (!BuildMI(MBB, I, I.getDebugLoc(), TII.get(PPC::ORI8), Tmp2Reg)
|
|
.addReg(TmpReg, RegState::Kill)
|
|
.addImm(RotImm & 0xffff)
|
|
.constrainAllUses(TII, TRI, RBI))
|
|
return false;
|
|
return BuildMI(MBB, I, I.getDebugLoc(), TII.get(PPC::RLDICL), Reg)
|
|
.addReg(Tmp2Reg, RegState::Kill)
|
|
.addImm(Shift)
|
|
.addImm(0)
|
|
.constrainAllUses(TII, TRI, RBI);
|
|
}
|
|
|
|
// If we end up here then no instructions were inserted.
|
|
return std::nullopt;
|
|
}
|
|
|
|
// Derived from PPCISelDAGToDAG::selectI64Imm().
|
|
// TODO: Add support for prefixed instructions.
|
|
bool PPCInstructionSelector::selectI64Imm(MachineInstr &I,
|
|
MachineBasicBlock &MBB,
|
|
MachineRegisterInfo &MRI) const {
|
|
assert(I.getOpcode() == TargetOpcode::G_CONSTANT && "Unexpected G code");
|
|
|
|
Register DstReg = I.getOperand(0).getReg();
|
|
int64_t Imm = I.getOperand(1).getCImm()->getValue().getZExtValue();
|
|
// No more than 3 instructions are used if we can select the i64 immediate
|
|
// directly.
|
|
if (std::optional<bool> Res = selectI64ImmDirect(I, MBB, MRI, DstReg, Imm)) {
|
|
I.eraseFromParent();
|
|
return *Res;
|
|
}
|
|
|
|
// Calculate the last bits as required.
|
|
uint32_t Hi16 = (Lo_32(Imm) >> 16) & 0xffff;
|
|
uint32_t Lo16 = Lo_32(Imm) & 0xffff;
|
|
|
|
Register Reg =
|
|
(Hi16 || Lo16) ? MRI.createVirtualRegister(&PPC::G8RCRegClass) : DstReg;
|
|
|
|
// Handle the upper 32 bit value.
|
|
std::optional<bool> Res =
|
|
selectI64ImmDirect(I, MBB, MRI, Reg, Imm & 0xffffffff00000000);
|
|
if (!Res || !*Res)
|
|
return false;
|
|
|
|
// Add in the last bits as required.
|
|
if (Hi16) {
|
|
Register TmpReg =
|
|
Lo16 ? MRI.createVirtualRegister(&PPC::G8RCRegClass) : DstReg;
|
|
if (!BuildMI(MBB, I, I.getDebugLoc(), TII.get(PPC::ORIS8), TmpReg)
|
|
.addReg(Reg, RegState::Kill)
|
|
.addImm(Hi16)
|
|
.constrainAllUses(TII, TRI, RBI))
|
|
return false;
|
|
Reg = TmpReg;
|
|
}
|
|
if (Lo16) {
|
|
if (!BuildMI(MBB, I, I.getDebugLoc(), TII.get(PPC::ORI8), DstReg)
|
|
.addReg(Reg, RegState::Kill)
|
|
.addImm(Lo16)
|
|
.constrainAllUses(TII, TRI, RBI))
|
|
return false;
|
|
}
|
|
I.eraseFromParent();
|
|
return true;
|
|
}
|
|
|
|
bool PPCInstructionSelector::selectConstantPool(
|
|
MachineInstr &I, MachineBasicBlock &MBB, MachineRegisterInfo &MRI) const {
|
|
const DebugLoc &DbgLoc = I.getDebugLoc();
|
|
MachineFunction *MF = MBB.getParent();
|
|
|
|
// TODO: handle 32-bit.
|
|
// TODO: Enabling floating point constant pool selection on AIX requires
|
|
// global isel on big endian target enabled first.
|
|
// See CallLowering::enableBigEndian().
|
|
if (!STI.isPPC64() || !STI.isLittleEndian())
|
|
return false;
|
|
|
|
MF->getInfo<PPCFunctionInfo>()->setUsesTOCBasePtr();
|
|
|
|
const Register DstReg = I.getOperand(0).getReg();
|
|
unsigned CPI = I.getOperand(1).getIndex();
|
|
|
|
// Address stored in the TOC entry. This is related to code model and the ABI
|
|
// we are currently using. For now we only handle 64-bit Linux LE. PowerPC
|
|
// only supports small, medium and large code model.
|
|
const CodeModel::Model CModel = TM.getCodeModel();
|
|
assert(!(CModel == CodeModel::Tiny || CModel == CodeModel::Kernel) &&
|
|
"PowerPC doesn't support tiny or kernel code models.");
|
|
|
|
const MCRegister TOCReg = STI.getTOCPointerRegister();
|
|
MachineMemOperand *MMO = MF->getMachineMemOperand(
|
|
MachinePointerInfo::getGOT(*MF), MachineMemOperand::MOLoad,
|
|
MRI.getType(DstReg), MF->getDataLayout().getPointerABIAlignment(0));
|
|
|
|
MachineInstr *MI = nullptr;
|
|
// For now we only handle 64-bit Linux.
|
|
if (CModel == CodeModel::Small) {
|
|
// For small code model, generate LDtocCPT(CPI, X2).
|
|
MI = BuildMI(MBB, I, DbgLoc, TII.get(PPC::LDtocCPT), DstReg)
|
|
.addConstantPoolIndex(CPI)
|
|
.addReg(TOCReg)
|
|
.addMemOperand(MMO);
|
|
} else {
|
|
Register HaAddrReg = MRI.createVirtualRegister(&PPC::G8RCRegClass);
|
|
BuildMI(MBB, I, DbgLoc, TII.get(PPC::ADDIStocHA8), HaAddrReg)
|
|
.addReg(TOCReg)
|
|
.addConstantPoolIndex(CPI);
|
|
|
|
if (CModel == CodeModel::Large)
|
|
// For large code model, generate LDtocL(CPI, ADDIStocHA8(X2, CPI))
|
|
MI = BuildMI(MBB, I, DbgLoc, TII.get(PPC::LDtocL), DstReg)
|
|
.addConstantPoolIndex(CPI)
|
|
.addReg(HaAddrReg)
|
|
.addMemOperand(MMO);
|
|
else
|
|
// For medium code model, generate ADDItocL(CPI, ADDIStocHA8(X2, CPI))
|
|
MI = BuildMI(MBB, I, DbgLoc, TII.get(PPC::ADDItocL), DstReg)
|
|
.addReg(HaAddrReg)
|
|
.addConstantPoolIndex(CPI);
|
|
}
|
|
|
|
I.eraseFromParent();
|
|
return constrainSelectedInstRegOperands(*MI, TII, TRI, RBI);
|
|
}
|
|
|
|
bool PPCInstructionSelector::select(MachineInstr &I) {
|
|
auto &MBB = *I.getParent();
|
|
auto &MF = *MBB.getParent();
|
|
auto &MRI = MF.getRegInfo();
|
|
|
|
if (!isPreISelGenericOpcode(I.getOpcode())) {
|
|
if (I.isCopy())
|
|
return selectCopy(I, TII, MRI, TRI, RBI);
|
|
|
|
return true;
|
|
}
|
|
|
|
if (selectImpl(I, *CoverageInfo))
|
|
return true;
|
|
|
|
unsigned Opcode = I.getOpcode();
|
|
|
|
switch (Opcode) {
|
|
default:
|
|
return false;
|
|
case TargetOpcode::G_LOAD:
|
|
case TargetOpcode::G_STORE: {
|
|
GLoadStore &LdSt = cast<GLoadStore>(I);
|
|
LLT PtrTy = MRI.getType(LdSt.getPointerReg());
|
|
|
|
if (PtrTy != LLT::pointer(0, 64)) {
|
|
LLVM_DEBUG(dbgs() << "Load/Store pointer has type: " << PtrTy
|
|
<< ", expected: " << LLT::pointer(0, 64) << '\n');
|
|
return false;
|
|
}
|
|
|
|
auto SelectLoadStoreAddressingMode = [&]() -> MachineInstr * {
|
|
const unsigned NewOpc = selectLoadStoreOp(
|
|
I.getOpcode(), RBI.getRegBank(LdSt.getReg(0), MRI, TRI)->getID(),
|
|
LdSt.getMemSizeInBits());
|
|
|
|
if (NewOpc == I.getOpcode())
|
|
return nullptr;
|
|
|
|
// For now, simply use DForm with load/store addr as base and 0 as imm.
|
|
// FIXME: optimize load/store with some specific address patterns.
|
|
I.setDesc(TII.get(NewOpc));
|
|
Register AddrReg = I.getOperand(1).getReg();
|
|
bool IsKill = I.getOperand(1).isKill();
|
|
I.getOperand(1).ChangeToImmediate(0);
|
|
I.addOperand(*I.getParent()->getParent(),
|
|
MachineOperand::CreateReg(AddrReg, /* isDef */ false,
|
|
/* isImp */ false, IsKill));
|
|
return &I;
|
|
};
|
|
|
|
MachineInstr *LoadStore = SelectLoadStoreAddressingMode();
|
|
if (!LoadStore)
|
|
return false;
|
|
|
|
return constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);
|
|
}
|
|
case TargetOpcode::G_SITOFP:
|
|
case TargetOpcode::G_UITOFP:
|
|
return selectIntToFP(I, MBB, MRI);
|
|
case TargetOpcode::G_FPTOSI:
|
|
case TargetOpcode::G_FPTOUI:
|
|
return selectFPToInt(I, MBB, MRI);
|
|
// G_SEXT will be selected in tb-gen pattern.
|
|
case TargetOpcode::G_ZEXT:
|
|
return selectZExt(I, MBB, MRI);
|
|
case TargetOpcode::G_CONSTANT:
|
|
return selectI64Imm(I, MBB, MRI);
|
|
case TargetOpcode::G_CONSTANT_POOL:
|
|
return selectConstantPool(I, MBB, MRI);
|
|
}
|
|
return false;
|
|
}
|
|
|
|
namespace llvm {
|
|
InstructionSelector *
|
|
createPPCInstructionSelector(const PPCTargetMachine &TM,
|
|
const PPCSubtarget &Subtarget,
|
|
const PPCRegisterBankInfo &RBI) {
|
|
return new PPCInstructionSelector(TM, Subtarget, RBI);
|
|
}
|
|
} // end namespace llvm
|