/// this target.
bool isSetCCExpensive() const { return SetCCIsExpensive; }
- /// isIntDivExpensive() - Return true if integer divide is more expensive than
+ /// isIntDivCheap() - Return true if integer divide is usually cheaper than
/// a sequence of several shifts, adds, and multiplies for this target.
- bool isIntDivExpensive() const { return IntDivIsExpensive; }
+ bool isIntDivCheap() const { return IntDivIsCheap; }
+ /// isPow2DivCheap() - Return true if pow2 div is cheaper than a chain of
+ /// srl/add/sra.
+ bool isPow2DivCheap() const { return Pow2DivIsCheap; }
+
/// getSetCCResultTy - Return the ValueType of the result of setcc operations.
///
MVT::ValueType getSetCCResultTy() const { return SetCCResultTy; }
/// setcc operations into other operations if possible.
void setSetCCIsExpensive() { SetCCIsExpensive = true; }
- /// setIntDivIsExpensive - Tells the code generator that integer divide is
+ /// setIntDivIsCheap - Tells the code generator that integer divide is
/// expensive, and if possible, should be replaced by an alternate sequence
/// of instructions not containing an integer divide.
- void setIntDivIsExpensive() { IntDivIsExpensive = true; }
+ void setIntDivIsCheap(bool isCheap = true) { IntDivIsCheap = isCheap; }
+
+ /// setPow2DivIsCheap - Tells the code generator that it shouldn't generate
+ /// srl/add/sra for a signed divide by power of two, and let the target handle
+ /// it.
+ void setPow2DivIsCheap(bool isCheap = true) { Pow2DivIsCheap = isCheap; }
/// addRegisterClass - Add the specified register class as an available
/// regclass for the specified value type. This indicates the selector can
/// setcc operations into other operations if possible.
bool SetCCIsExpensive;
- /// IntDivIsExpensive - This is a hack until a real costs model is in place
- /// that tells the code generator whether integer divide will always be more
- /// expensive than a sequence of multiplies, shifts, and adds that performs
- /// the same operation. If we ever optimize for size, this will be set to
- /// false unconditionally.
- bool IntDivIsExpensive;
+ /// IntDivIsCheap - Tells the code generator not to expand integer divides by
+ /// constants into a sequence of muls, adds, and shifts. This is a hack until
+ /// a real cost model is in place. If we ever optimize for size, this will be
+ /// set to true unconditionally.
+ bool IntDivIsCheap;
+
+ /// Pow2DivIsCheap - Tells the code generator that it shouldn't generate
+ /// srl/add/sra for a signed divide by power of two, and let the target handle
+ /// it.
+ bool Pow2DivIsCheap;
/// SetCCResultTy - The type that SetCC operations use. This defaults to the
/// PointerTy.
return N1;
// fold (mul x, -1) -> 0-x
if (N1C && N1C->isAllOnesValue())
- return DAG.getNode(ISD::SUB, N->getValueType(0),
- DAG.getConstant(0, N->getValueType(0)), N0);
+ return DAG.getNode(ISD::SUB, VT, DAG.getConstant(0, VT), N0);
// fold (mul x, (1 << c)) -> x << c
if (N1C && isPowerOf2_64(N1C->getValue()))
return DAG.getNode(ISD::SHL, N->getValueType(0), N0,
if (N0C && N1C && !N1C->isNullValue())
return DAG.getConstant(N0C->getSignExtended() / N1C->getSignExtended(),
N->getValueType(0));
+ // fold (sdiv X, 1) -> X
+ if (N1C && N1C->getSignExtended() == 1LL)
+ return N0;
+ // fold (sdiv X, -1) -> 0-X
+ if (N1C && N1C->isAllOnesValue())
+ return DAG.getNode(ISD::SUB, VT, DAG.getConstant(0, VT), N0);
// If we know the sign bits of both operands are zero, strength reduce to a
// udiv instead. Handles (X&15) /s 4 -> X&15 >> 2
uint64_t SignBit = 1ULL << (MVT::getSizeInBits(VT)-1);
if (MaskedValueIsZero(N1, SignBit, TLI) &&
MaskedValueIsZero(N0, SignBit, TLI))
return DAG.getNode(ISD::UDIV, N1.getValueType(), N0, N1);
+ // fold (sdiv X, pow2) -> (add (sra X, log(pow2)), (srl X, sizeof(X)-1))
+ if (N1C && N1C->getValue() && !TLI.isIntDivCheap() &&
+ (isPowerOf2_64(N1C->getSignExtended()) ||
+ isPowerOf2_64(-N1C->getSignExtended()))) {
+ // If dividing by powers of two is cheap, then don't perform the following
+ // fold.
+ if (TLI.isPow2DivCheap())
+ return SDOperand();
+ int64_t pow2 = N1C->getSignExtended();
+ int64_t abs2 = pow2 > 0 ? pow2 : -pow2;
+ SDOperand SRL = DAG.getNode(ISD::SRL, VT, N0,
+ DAG.getConstant(MVT::getSizeInBits(VT)-1,
+ TLI.getShiftAmountTy()));
+ WorkList.push_back(SRL.Val);
+ SDOperand SGN = DAG.getNode(ISD::ADD, VT, N0, SRL);
+ WorkList.push_back(SGN.Val);
+ SDOperand SRA = DAG.getNode(ISD::SRA, VT, SGN,
+ DAG.getConstant(Log2_64(abs2),
+ TLI.getShiftAmountTy()));
+ // If we're dividing by a positive value, we're done. Otherwise, we must
+ // negate the result.
+ if (pow2 > 0)
+ return SRA;
+ WorkList.push_back(SRA.Val);
+ return DAG.getNode(ISD::SUB, VT, DAG.getConstant(0, VT), SRA);
+ }
// if integer divide is expensive and we satisfy the requirements, emit an
// alternate sequence.
- // FIXME: This currently opts out powers of two, since targets can often be
- // more clever in those cases. In an idea world, we would have some way to
- // detect that too.
- if (N1C && !isPowerOf2_64(N1C->getSignExtended()) &&
- (N1C->getSignExtended() < -1 || N1C->getSignExtended() > 1) &&
- TLI.isOperationLegal(ISD::MULHS, VT) && TLI.isTypeLegal(VT) &&
- TLI.isIntDivExpensive()) {
+ if (N1C && (N1C->getSignExtended() < -1 || N1C->getSignExtended() > 1) &&
+ !TLI.isIntDivCheap() &&
+ TLI.isOperationLegal(ISD::MULHS, VT) && TLI.isTypeLegal(VT)) {
return BuildSDIV(N);
}
return SDOperand();
TLI.getShiftAmountTy()));
// fold (udiv x, c) -> alternate
if (N1C && N1C->getValue() && TLI.isOperationLegal(ISD::MULHU, VT) &&
- TLI.isTypeLegal(VT) && TLI.isIntDivExpensive())
+ TLI.isTypeLegal(VT) && !TLI.isIntDivCheap())
return BuildUDIV(N);
return SDOperand();
}
maxStoresPerMemSet = maxStoresPerMemCpy = maxStoresPerMemMove = 8;
allowUnalignedMemoryAccesses = false;
UseUnderscoreSetJmpLongJmp = false;
+ IntDivIsCheap = false;
+ Pow2DivIsCheap = false;
}
TargetLowering::~TargetLowering() {}
return SDOperand(N, 0);
}
case ISD::SDIV: {
+ // FIXME: since this depends on the setting of the carry flag from the srawi
+ // we should really be making notes about that for the scheduler.
+ // FIXME: It sure would be nice if we could cheaply recognize the
+ // srl/add/sra pattern the dag combiner will generate for this as
+ // sra/addze rather than having to handle sdiv ourselves. oh well.
unsigned Imm;
if (isIntImmediate(N->getOperand(1), Imm)) {
if ((signed)Imm > 0 && isPowerOf2_32(Imm)) {
// Fold away setcc operations if possible.
setSetCCIsExpensive();
- // Fold constant integer div/rem into an alternate sequence of instructions
- setIntDivIsExpensive();
+ setPow2DivIsCheap();
// Use _setjmp/_longjmp instead of setjmp/longjmp.
setUseUnderscoreSetJmpLongJmp(true);
return Result;
}
}
-
- if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
- // FIXME: These special cases should be handled by the lowering impl!
- unsigned RHS = CN->getValue();
- bool isNeg = false;
- if ((int)RHS < 0) {
- isNeg = true;
- RHS = -RHS;
- }
- if (RHS && (RHS & (RHS-1)) == 0) { // Signed division by power of 2?
- unsigned Log = Log2_32(RHS);
- unsigned SAROpc, SHROpc, ADDOpc, NEGOpc;
- switch (N.getValueType()) {
- default: assert("Unknown type to signed divide!");
- case MVT::i8:
- SAROpc = X86::SAR8ri;
- SHROpc = X86::SHR8ri;
- ADDOpc = X86::ADD8rr;
- NEGOpc = X86::NEG8r;
- break;
- case MVT::i16:
- SAROpc = X86::SAR16ri;
- SHROpc = X86::SHR16ri;
- ADDOpc = X86::ADD16rr;
- NEGOpc = X86::NEG16r;
- break;
- case MVT::i32:
- SAROpc = X86::SAR32ri;
- SHROpc = X86::SHR32ri;
- ADDOpc = X86::ADD32rr;
- NEGOpc = X86::NEG32r;
- break;
- }
- unsigned RegSize = MVT::getSizeInBits(N.getValueType());
- Tmp1 = SelectExpr(N.getOperand(0));
- unsigned TmpReg;
- if (Log != 1) {
- TmpReg = MakeReg(N.getValueType());
- BuildMI(BB, SAROpc, 2, TmpReg).addReg(Tmp1).addImm(Log-1);
- } else {
- TmpReg = Tmp1;
- }
- unsigned TmpReg2 = MakeReg(N.getValueType());
- BuildMI(BB, SHROpc, 2, TmpReg2).addReg(TmpReg).addImm(RegSize-Log);
- unsigned TmpReg3 = MakeReg(N.getValueType());
- BuildMI(BB, ADDOpc, 2, TmpReg3).addReg(Tmp1).addReg(TmpReg2);
-
- unsigned TmpReg4 = isNeg ? MakeReg(N.getValueType()) : Result;
- BuildMI(BB, SAROpc, 2, TmpReg4).addReg(TmpReg3).addImm(Log);
- if (isNeg)
- BuildMI(BB, NEGOpc, 1, Result).addReg(TmpReg4);
- return Result;
- }
- }
}
if (getRegPressure(N.getOperand(0)) > getRegPressure(N.getOperand(1))) {