cl::opt<bool> ANDIGlueBug("expose-ppc-andi-glue-bug",
cl::desc("expose the ANDI glue bug on PPC"), cl::Hidden);
-cl::opt<bool> UseBitPermRewriter("ppc-use-bit-perm-rewriter", cl::init(true),
- cl::desc("use aggressive ppc isel for bit permutations"), cl::Hidden);
-cl::opt<bool> BPermRewriterNoMasking("ppc-bit-perm-rewriter-stress-rotates",
- cl::desc("stress rotate selection in aggressive ppc isel for "
- "bit permutations"), cl::Hidden);
+static cl::opt<bool>
+ UseBitPermRewriter("ppc-use-bit-perm-rewriter", cl::init(true),
+ cl::desc("use aggressive ppc isel for bit permutations"),
+ cl::Hidden);
+static cl::opt<bool> BPermRewriterNoMasking(
+ "ppc-bit-perm-rewriter-stress-rotates",
+ cl::desc("stress rotate selection in aggressive ppc isel for "
+ "bit permutations"),
+ cl::Hidden);
namespace llvm {
void initializePPCDAGToDAGISelPass(PassRegistry&);
///
class PPCDAGToDAGISel : public SelectionDAGISel {
const PPCTargetMachine &TM;
- const PPCTargetLowering *PPCLowering;
const PPCSubtarget *PPCSubTarget;
+ const PPCTargetLowering *PPCLowering;
unsigned GlobalBaseReg;
public:
explicit PPCDAGToDAGISel(PPCTargetMachine &tm)
- : SelectionDAGISel(tm), TM(tm),
- PPCLowering(TM.getSubtargetImpl()->getTargetLowering()),
- PPCSubTarget(TM.getSubtargetImpl()) {
+ : SelectionDAGISel(tm), TM(tm) {
initializePPCDAGToDAGISelPass(*PassRegistry::getPassRegistry());
}
bool runOnMachineFunction(MachineFunction &MF) override {
// Make sure we re-emit a set of the global base reg if necessary
GlobalBaseReg = 0;
- PPCLowering = TM.getSubtargetImpl()->getTargetLowering();
- PPCSubTarget = TM.getSubtargetImpl();
+ PPCSubTarget = &MF.getSubtarget<PPCSubtarget>();
+ PPCLowering = PPCSubTarget->getTargetLowering();
SelectionDAGISel::runOnMachineFunction(MF);
if (!PPCSubTarget->isSVR4ABI())
std::vector<SDValue> &OutOps) override {
// We need to make sure that this one operand does not end up in r0
// (because we might end up lowering this as 0(%op)).
- const TargetRegisterInfo *TRI = TM.getSubtargetImpl()->getRegisterInfo();
+ const TargetRegisterInfo *TRI = PPCSubTarget->getRegisterInfo();
const TargetRegisterClass *TRC = TRI->getPointerRegClass(*MF, /*Kind=*/1);
SDValue RC = CurDAG->getTargetConstant(TRC->getID(), MVT::i32);
SDValue NewOp =
void PeepholeCROps();
SDValue combineToCMPB(SDNode *N);
+ void foldBoolExts(SDValue &Res, SDNode *&N);
bool AllUsersSelectZero(SDNode *N);
void SwapAllSelectUsers(SDNode *N);
+
+ SDNode *transferMemOperands(SDNode *N, SDNode *Result);
};
}
unsigned InVRSAVE = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
unsigned UpdatedVRSAVE = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
- const TargetInstrInfo &TII = *TM.getSubtargetImpl()->getInstrInfo();
+ const TargetInstrInfo &TII = *PPCSubTarget->getInstrInfo();
MachineBasicBlock &EntryBB = *Fn.begin();
DebugLoc dl;
// Emit the following code into the entry block:
///
SDNode *PPCDAGToDAGISel::getGlobalBaseReg() {
if (!GlobalBaseReg) {
- const TargetInstrInfo &TII = *TM.getSubtargetImpl()->getInstrInfo();
+ const TargetInstrInfo &TII = *PPCSubTarget->getInstrInfo();
// Insert the set of GlobalBaseReg into the first MBB of the function
MachineBasicBlock &FirstMBB = MF->front();
MachineBasicBlock::iterator MBBI = FirstMBB.begin();
if (M->getPICLevel() == PICLevel::Small) {
BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MoveGOTtoLR));
BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
+ MF->getInfo<PPCFunctionInfo>()->setUsesPICBase(true);
} else {
BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR));
BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
unsigned TempReg = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
BuildMI(FirstMBB, MBBI, dl,
- TII.get(PPC::UpdateGBR)).addReg(GlobalBaseReg)
+ TII.get(PPC::UpdateGBR), GlobalBaseReg)
.addReg(TempReg, RegState::Define).addReg(GlobalBaseReg);
MF->getInfo<PPCFunctionInfo>()->setUsesPICBase(true);
}
return Result;
}
+static uint64_t Rot64(uint64_t Imm, unsigned R) {
+ return (Imm << R) | (Imm >> (64 - R));
+}
+
static unsigned SelectInt64Count(int64_t Imm) {
- unsigned DirectCount = SelectInt64CountDirect(Imm);
+ unsigned Count = SelectInt64CountDirect(Imm);
+ if (Count == 1)
+ return Count;
+
+ for (unsigned r = 1; r < 63; ++r) {
+ uint64_t RImm = Rot64(Imm, r);
+ unsigned RCount = SelectInt64CountDirect(RImm) + 1;
+ Count = std::min(Count, RCount);
+
+ // See comments in SelectInt64 for an explanation of the logic below.
+ unsigned LS = findLastSet(RImm);
+ if (LS != r-1)
+ continue;
- // If might be cheaper to materialize the bit-inverted constant, and then
- // flip the bits (which takes one nor instruction).
- unsigned NotDirectCount = SelectInt64CountDirect(~(uint64_t) Imm) + 1;
- if (NotDirectCount < DirectCount)
- return NotDirectCount;
+ uint64_t OnesMask = -(int64_t) (UINT64_C(1) << (LS+1));
+ uint64_t RImmWithOnes = RImm | OnesMask;
- return DirectCount;
+ RCount = SelectInt64CountDirect(RImmWithOnes) + 1;
+ Count = std::min(Count, RCount);
+ }
+
+ return Count;
}
// Select a 64-bit constant. For cost-modeling purposes, SelectInt64Count
}
static SDNode *SelectInt64(SelectionDAG *CurDAG, SDLoc dl, int64_t Imm) {
- unsigned DirectCount = SelectInt64CountDirect(Imm);
-
- // If might be cheaper to materialize the bit-inverted constant, and then
- // flip the bits (which takes one nor instruction).
- unsigned NotDirectCount = SelectInt64CountDirect(~(uint64_t) Imm) + 1;
- if (NotDirectCount < DirectCount) {
- SDValue NotDirectVal =
- SDValue(SelectInt64Direct(CurDAG, dl, ~(uint64_t) Imm), 0);
- return CurDAG->getMachineNode(PPC::NOR8, dl, MVT::i64, NotDirectVal,
- NotDirectVal);
+ unsigned Count = SelectInt64CountDirect(Imm);
+ if (Count == 1)
+ return SelectInt64Direct(CurDAG, dl, Imm);
+
+ unsigned RMin = 0;
+
+ int64_t MatImm;
+ unsigned MaskEnd;
+
+ for (unsigned r = 1; r < 63; ++r) {
+ uint64_t RImm = Rot64(Imm, r);
+ unsigned RCount = SelectInt64CountDirect(RImm) + 1;
+ if (RCount < Count) {
+ Count = RCount;
+ RMin = r;
+ MatImm = RImm;
+ MaskEnd = 63;
+ }
+
+ // If the immediate to generate has many trailing zeros, it might be
+ // worthwhile to generate a rotated value with too many leading ones
+ // (because that's free with li/lis's sign-extension semantics), and then
+ // mask them off after rotation.
+
+ unsigned LS = findLastSet(RImm);
+ // We're adding (63-LS) higher-order ones, and we expect to mask them off
+ // after performing the inverse rotation by (64-r). So we need that:
+ // 63-LS == 64-r => LS == r-1
+ if (LS != r-1)
+ continue;
+
+ uint64_t OnesMask = -(int64_t) (UINT64_C(1) << (LS+1));
+ uint64_t RImmWithOnes = RImm | OnesMask;
+
+ RCount = SelectInt64CountDirect(RImmWithOnes) + 1;
+ if (RCount < Count) {
+ Count = RCount;
+ RMin = r;
+ MatImm = RImmWithOnes;
+ MaskEnd = LS;
+ }
}
- return SelectInt64Direct(CurDAG, dl, Imm);
+ if (!RMin)
+ return SelectInt64Direct(CurDAG, dl, Imm);
+
+ auto getI32Imm = [CurDAG](unsigned Imm) {
+ return CurDAG->getTargetConstant(Imm, MVT::i32);
+ };
+
+ SDValue Val = SDValue(SelectInt64Direct(CurDAG, dl, MatImm), 0);
+ return CurDAG->getMachineNode(PPC::RLDICR, dl, MVT::i64, Val,
+ getI32Imm(64 - RMin), getI32Imm(MaskEnd));
}
// Select a 64-bit constant.
// Altivec Vector compare instructions do not set any CR register by default and
// vector compare operations return the same type as the operands.
if (LHS.getValueType().isVector()) {
+ if (PPCSubTarget->hasQPX())
+ return nullptr;
+
EVT VecVT = LHS.getValueType();
bool Swap, Negate;
unsigned int VCmpInst = getVCmpInst(VecVT.getSimpleVT(), CC,
return CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Tmp, getI32Imm(1));
}
+SDNode *PPCDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) {
+ // Transfer memoperands.
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = cast<MemSDNode>(N)->getMemOperand();
+ cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1);
+ return Result;
+}
+
// Select - Convert the specified operand from a target-independent to a
// target-specific node if it hasn't already been changed.
SDValue Chain = LD->getChain();
SDValue Base = LD->getBasePtr();
SDValue Ops[] = { Offset, Base, Chain };
- return CurDAG->getMachineNode(Opcode, dl, LD->getValueType(0),
- PPCLowering->getPointerTy(),
- MVT::Other, Ops);
+ return transferMemOperands(N, CurDAG->getMachineNode(Opcode, dl,
+ LD->getValueType(0),
+ PPCLowering->getPointerTy(),
+ MVT::Other, Ops));
} else {
unsigned Opcode;
bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD;
assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load");
switch (LoadedVT.getSimpleVT().SimpleTy) {
default: llvm_unreachable("Invalid PPC load type!");
+ case MVT::v4f64: Opcode = PPC::QVLFDUX; break; // QPX
+ case MVT::v4f32: Opcode = PPC::QVLFSUX; break; // QPX
case MVT::f64: Opcode = PPC::LFDUX; break;
case MVT::f32: Opcode = PPC::LFSUX; break;
case MVT::i32: Opcode = PPC::LWZUX; break;
SDValue Chain = LD->getChain();
SDValue Base = LD->getBasePtr();
SDValue Ops[] = { Base, Offset, Chain };
- return CurDAG->getMachineNode(Opcode, dl, LD->getValueType(0),
- PPCLowering->getPointerTy(),
- MVT::Other, Ops);
+ return transferMemOperands(N, CurDAG->getMachineNode(Opcode, dl,
+ LD->getValueType(0),
+ PPCLowering->getPointerTy(),
+ MVT::Other, Ops));
}
}
if (isInt64Immediate(N->getOperand(1).getNode(), Imm64) &&
isMask_64(Imm64)) {
SDValue Val = N->getOperand(0);
- MB = 64 - CountTrailingOnes_64(Imm64);
+ MB = 64 - countTrailingOnes(Imm64);
SH = 0;
// If the operand is a logical right shift, we can fold it into this
SelectCCOp = PPC::SELECT_CC_VSFRC;
else
SelectCCOp = PPC::SELECT_CC_F8;
+ else if (PPCSubTarget->hasQPX() && N->getValueType(0) == MVT::v4f64)
+ SelectCCOp = PPC::SELECT_CC_QFRC;
+ else if (PPCSubTarget->hasQPX() && N->getValueType(0) == MVT::v4f32)
+ SelectCCOp = PPC::SELECT_CC_QSRC;
+ else if (PPCSubTarget->hasQPX() && N->getValueType(0) == MVT::v4i1)
+ SelectCCOp = PPC::SELECT_CC_QBRC;
else if (N->getValueType(0) == MVT::v2f64 ||
N->getValueType(0) == MVT::v2i64)
SelectCCOp = PPC::SELECT_CC_VSRC;
"Only supported for 64-bit ABI and 32-bit SVR4");
if (PPCSubTarget->isSVR4ABI() && !PPCSubTarget->isPPC64()) {
SDValue GA = N->getOperand(0);
- return CurDAG->getMachineNode(PPC::LWZtoc, dl, MVT::i32, GA,
- N->getOperand(1));
+ return transferMemOperands(N, CurDAG->getMachineNode(PPC::LWZtoc, dl,
+ MVT::i32, GA, N->getOperand(1)));
}
// For medium and large code model, we generate two instructions as
SDValue GA = N->getOperand(0);
SDValue TOCbase = N->getOperand(1);
SDNode *Tmp = CurDAG->getMachineNode(PPC::ADDIStocHA, dl, MVT::i64,
- TOCbase, GA);
+ TOCbase, GA);
if (isa<JumpTableSDNode>(GA) || isa<BlockAddressSDNode>(GA) ||
CModel == CodeModel::Large)
- return CurDAG->getMachineNode(PPC::LDtocL, dl, MVT::i64, GA,
- SDValue(Tmp, 0));
+ return transferMemOperands(N, CurDAG->getMachineNode(PPC::LDtocL, dl,
+ MVT::i64, GA, SDValue(Tmp, 0)));
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(GA)) {
const GlobalValue *GValue = G->getGlobal();
(GValue->isDeclaration() || GValue->isWeakForLinker())) ||
GValue->isDeclaration() || GValue->hasCommonLinkage() ||
GValue->hasAvailableExternallyLinkage())
- return CurDAG->getMachineNode(PPC::LDtocL, dl, MVT::i64, GA,
- SDValue(Tmp, 0));
+ return transferMemOperands(N, CurDAG->getMachineNode(PPC::LDtocL, dl,
+ MVT::i64, GA, SDValue(Tmp, 0)));
}
return CurDAG->getMachineNode(PPC::ADDItocL, dl, MVT::i64,
return Res;
}
+// When CR bit registers are enabled, an extension of an i1 variable to a i32
+// or i64 value is lowered in terms of a SELECT_I[48] operation, and thus
+// involves constant materialization of a 0 or a 1 or both. If the result of
+// the extension is then operated upon by some operator that can be constant
+// folded with a constant 0 or 1, and that constant can be materialized using
+// only one instruction (like a zero or one), then we should fold in those
+// operations with the select.
+void PPCDAGToDAGISel::foldBoolExts(SDValue &Res, SDNode *&N) {
+ if (!PPCSubTarget->useCRBits())
+ return;
+
+ if (N->getOpcode() != ISD::ZERO_EXTEND &&
+ N->getOpcode() != ISD::SIGN_EXTEND &&
+ N->getOpcode() != ISD::ANY_EXTEND)
+ return;
+
+ if (N->getOperand(0).getValueType() != MVT::i1)
+ return;
+
+ if (!N->hasOneUse())
+ return;
+
+ SDLoc dl(N);
+ EVT VT = N->getValueType(0);
+ SDValue Cond = N->getOperand(0);
+ SDValue ConstTrue =
+ CurDAG->getConstant(N->getOpcode() == ISD::SIGN_EXTEND ? -1 : 1, VT);
+ SDValue ConstFalse = CurDAG->getConstant(0, VT);
+
+ do {
+ SDNode *User = *N->use_begin();
+ if (User->getNumOperands() != 2)
+ break;
+
+ auto TryFold = [this, N, User](SDValue Val) {
+ SDValue UserO0 = User->getOperand(0), UserO1 = User->getOperand(1);
+ SDValue O0 = UserO0.getNode() == N ? Val : UserO0;
+ SDValue O1 = UserO1.getNode() == N ? Val : UserO1;
+
+ return CurDAG->FoldConstantArithmetic(User->getOpcode(),
+ User->getValueType(0),
+ O0.getNode(), O1.getNode());
+ };
+
+ SDValue TrueRes = TryFold(ConstTrue);
+ if (!TrueRes)
+ break;
+ SDValue FalseRes = TryFold(ConstFalse);
+ if (!FalseRes)
+ break;
+
+ // For us to materialize these using one instruction, we must be able to
+ // represent them as signed 16-bit integers.
+ uint64_t True = cast<ConstantSDNode>(TrueRes)->getZExtValue(),
+ False = cast<ConstantSDNode>(FalseRes)->getZExtValue();
+ if (!isInt<16>(True) || !isInt<16>(False))
+ break;
+
+ // We can replace User with a new SELECT node, and try again to see if we
+ // can fold the select with its user.
+ Res = CurDAG->getSelect(dl, User->getValueType(0), Cond, TrueRes, FalseRes);
+ N = User;
+ ConstTrue = TrueRes;
+ ConstFalse = FalseRes;
+ } while (N->hasOneUse());
+}
+
void PPCDAGToDAGISel::PreprocessISelDAG() {
SelectionDAG::allnodes_iterator Position(CurDAG->getRoot().getNode());
++Position;
break;
}
+ if (!Res)
+ foldBoolExts(Res, N);
+
if (Res) {
DEBUG(dbgs() << "PPC DAG preprocessing replacing:\nOld: ");
DEBUG(N->dump(CurDAG));
case PPC::SELECT_I8:
case PPC::SELECT_F4:
case PPC::SELECT_F8:
+ case PPC::SELECT_QFRC:
+ case PPC::SELECT_QSRC:
+ case PPC::SELECT_QBRC:
case PPC::SELECT_VRRC:
case PPC::SELECT_VSFRC:
case PPC::SELECT_VSRC: {
case PPC::SELECT_I8:
case PPC::SELECT_F4:
case PPC::SELECT_F8:
+ case PPC::SELECT_QFRC:
+ case PPC::SELECT_QSRC:
+ case PPC::SELECT_QBRC:
case PPC::SELECT_VRRC:
case PPC::SELECT_VSFRC:
case PPC::SELECT_VSRC:
return true;
}
+ // LHBRX and LWBRX always clear the higher-order bits.
+ if (Op32.getMachineOpcode() == PPC::LHBRX ||
+ Op32.getMachineOpcode() == PPC::LWBRX) {
+ ToPromote.insert(Op32.getNode());
+ return true;
+ }
+
+ // CNTLZW always produces a 64-bit value in [0,32], and so is zero extended.
+ if (Op32.getMachineOpcode() == PPC::CNTLZW) {
+ ToPromote.insert(Op32.getNode());
+ return true;
+ }
+
// Next, check for those instructions we can look through.
// Assuming the mask does not wrap around, then the higher-order bits are
case PPC::SRW: NewOpcode = PPC::SRW8; break;
case PPC::LI: NewOpcode = PPC::LI8; break;
case PPC::LIS: NewOpcode = PPC::LIS8; break;
+ case PPC::LHBRX: NewOpcode = PPC::LHBRX8; break;
+ case PPC::LWBRX: NewOpcode = PPC::LWBRX8; break;
+ case PPC::CNTLZW: NewOpcode = PPC::CNTLZW8; break;
case PPC::RLWIMI: NewOpcode = PPC::RLWIMI8; break;
case PPC::OR: NewOpcode = PPC::OR8; break;
case PPC::SELECT_I4: NewOpcode = PPC::SELECT_I8; break;