void deleteAndRecombine(SDNode *N);
bool recursivelyDeleteUnusedNodes(SDNode *N);
+ /// Replaces all uses of the results of one DAG node with new values.
SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
bool AddTo = true);
+ /// Replaces all uses of the results of one DAG node with new values.
SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
return CombineTo(N, &Res, 1, AddTo);
}
+ /// Replaces all uses of the results of one DAG node with new values.
SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
bool AddTo = true) {
SDValue To[] = { Res0, Res1 };
SDValue visitADDE(SDNode *N);
SDValue visitSUBE(SDNode *N);
SDValue visitMUL(SDNode *N);
+ SDValue useDivRem(SDNode *N);
SDValue visitSDIV(SDNode *N);
SDValue visitUDIV(SDNode *N);
- SDValue visitSREM(SDNode *N);
- SDValue visitUREM(SDNode *N);
+ SDValue visitREM(SDNode *N);
SDValue visitMULHU(SDNode *N);
SDValue visitMULHS(SDNode *N);
SDValue visitSMUL_LOHI(SDNode *N);
SDValue visitUMUL_LOHI(SDNode *N);
SDValue visitSMULO(SDNode *N);
SDValue visitUMULO(SDNode *N);
- SDValue visitSDIVREM(SDNode *N);
- SDValue visitUDIVREM(SDNode *N);
SDValue visitIMINMAX(SDNode *N);
SDValue visitAND(SDNode *N);
SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *LocReference);
SDValue visitVSELECT(SDNode *N);
SDValue visitSELECT_CC(SDNode *N);
SDValue visitSETCC(SDNode *N);
+ SDValue visitSETCCE(SDNode *N);
SDValue visitSIGN_EXTEND(SDNode *N);
SDValue visitZERO_EXTEND(SDNode *N);
SDValue visitANY_EXTEND(SDNode *N);
unsigned SequenceNum;
};
+ /// This is a helper function for visitMUL to check the profitability
+ /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
+ /// MulNode is the original multiply, AddNode is (add x, c1),
+ /// and ConstNode is c2.
+ bool isMulAddWithConstProfitable(SDNode *MulNode,
+ SDValue &AddNode,
+ SDValue &ConstNode);
+
/// This is a helper function for MergeStoresOfConstantsOrVecElts. Returns a
/// constant build_vector of the stored constant values in Stores.
SDValue getMergedConstantVectorStore(SelectionDAG &DAG,
SmallVectorImpl<SDValue> &Chains,
EVT Ty) const;
+ /// This is a helper function for visitAND and visitZERO_EXTEND. Returns
+ /// true if the (and (load x) c) pattern matches an extload. ExtVT returns
+ /// the type of the loaded value to be extended. LoadedVT returns the type
+ /// of the original loaded value. NarrowLoad returns whether the load would
+ /// need to be narrowed in order to match.
+ bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
+ EVT LoadResultTy, EVT &ExtVT, EVT &LoadedVT,
+ bool &NarrowLoad);
+
/// This is a helper function for MergeConsecutiveStores. When the source
/// elements of the consecutive stores are all constants or all extracted
/// vector elements, try to merge them into one larger store.
bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) {
TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
APInt KnownZero, KnownOne;
+
+ // XXX-disabled:
+ auto Opcode = Op.getOpcode();
+ if (Opcode == ISD::AND || Opcode == ISD::OR) {
+ auto* Op1 = Op.getOperand(0).getNode();
+ auto* Op2 = Op.getOperand(1).getNode();
+ auto* Op1C = dyn_cast<ConstantSDNode>(Op1);
+ auto* Op2C = dyn_cast<ConstantSDNode>(Op2);
+
+ // and X, 0
+ if (Opcode == ISD::AND && !Op1C && Op2C && Op2C->isNullValue()) {
+ return false;
+ }
+
+ // or (and X, 0), Y
+ if (Opcode == ISD::OR) {
+ if (Op1->getOpcode() == ISD::AND) {
+ auto* Op11 = Op1->getOperand(0).getNode();
+ auto* Op12 = Op1->getOperand(1).getNode();
+ auto* Op11C = dyn_cast<ConstantSDNode>(Op11);
+ auto* Op12C = dyn_cast<ConstantSDNode>(Op12);
+ if (!Op11C && Op12C && Op12C->isNullValue()) {
+ return false;
+ }
+ }
+ if (Op1->getOpcode() == ISD::TRUNCATE) {
+ // or (trunc (and %0, 0)), Y
+ auto* Op11 = Op1->getOperand(0).getNode();
+ if (Op11->getOpcode() == ISD::AND) {
+ auto* Op111 = Op11->getOperand(0).getNode();
+ auto* Op112 = Op11->getOperand(1).getNode();
+ auto* Op111C = dyn_cast<ConstantSDNode>(Op111);
+ auto* Op112C = dyn_cast<ConstantSDNode>(Op112);
+ if (!Op111C && Op112C && Op112C->isNullValue()) {
+ // or (and X, 0), Y
+ return false;
+ }
+ }
+ }
+ }
+ }
+
+ // trunc (and X, 0)
+ if (Opcode == ISD::TRUNCATE) {
+ auto* Op1 = Op.getOperand(0).getNode();
+ if (Op1->getOpcode() == ISD::AND) {
+ auto* Op11 = Op1->getOperand(0).getNode();
+ auto* Op12 = Op1->getOperand(1).getNode();
+ auto* Op11C = dyn_cast<ConstantSDNode>(Op11);
+ auto* Op12C = dyn_cast<ConstantSDNode>(Op12);
+ if (!Op11C && Op12C && Op12C->isNullValue()) {
+ return false;
+ }
+ }
+ }
+
if (!TLI.SimplifyDemandedBits(Op, Demanded, KnownZero, KnownOne, TLO))
return false;
case ISD::MUL: return visitMUL(N);
case ISD::SDIV: return visitSDIV(N);
case ISD::UDIV: return visitUDIV(N);
- case ISD::SREM: return visitSREM(N);
- case ISD::UREM: return visitUREM(N);
+ case ISD::SREM:
+ case ISD::UREM: return visitREM(N);
case ISD::MULHU: return visitMULHU(N);
case ISD::MULHS: return visitMULHS(N);
case ISD::SMUL_LOHI: return visitSMUL_LOHI(N);
case ISD::UMUL_LOHI: return visitUMUL_LOHI(N);
case ISD::SMULO: return visitSMULO(N);
case ISD::UMULO: return visitUMULO(N);
- case ISD::SDIVREM: return visitSDIVREM(N);
- case ISD::UDIVREM: return visitUDIVREM(N);
case ISD::SMIN:
case ISD::SMAX:
case ISD::UMIN:
case ISD::VSELECT: return visitVSELECT(N);
case ISD::SELECT_CC: return visitSELECT_CC(N);
case ISD::SETCC: return visitSETCC(N);
+ case ISD::SETCCE: return visitSETCCE(N);
case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N);
case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N);
case ISD::ANY_EXTEND: return visitANY_EXTEND(N);
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
-static bool isNullConstant(SDValue V) {
- ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V);
- return Const != nullptr && Const->isNullValue();
-}
-
-static bool isNullFPConstant(SDValue V) {
- ConstantFPSDNode *Const = dyn_cast<ConstantFPSDNode>(V);
- return Const != nullptr && Const->isZero() && !Const->isNegative();
-}
-
-static bool isAllOnesConstant(SDValue V) {
- ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V);
- return Const != nullptr && Const->isAllOnesValue();
-}
-
-static bool isOneConstant(SDValue V) {
- ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V);
- return Const != nullptr && Const->isOne();
-}
-
/// If \p N is a ContantSDNode with isOpaque() == false return it casted to a
/// ContantSDNode pointer else nullptr.
static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) {
return SDValue(N, 0);
// fold (a+b) -> (a|b) iff a and b share no bits.
- if (VT.isInteger() && !VT.isVector()) {
- APInt LHSZero, LHSOne;
- APInt RHSZero, RHSOne;
- DAG.computeKnownBits(N0, LHSZero, LHSOne);
-
- if (LHSZero.getBoolValue()) {
- DAG.computeKnownBits(N1, RHSZero, RHSOne);
-
- // If all possibly-set bits on the LHS are clear on the RHS, return an OR.
- // If all possibly-set bits on the RHS are clear on the LHS, return an OR.
- if ((RHSZero & ~LHSZero) == ~LHSZero || (LHSZero & ~RHSZero) == ~RHSZero){
- if (!LegalOperations || TLI.isOperationLegal(ISD::OR, VT))
- return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N1);
- }
- }
- }
+ if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
+ VT.isInteger() && !VT.isVector() && DAG.haveNoCommonBitsSet(N0, N1))
+ return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N1);
// fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB &&
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N0.getValueType();
+ SDLoc DL(N);
// If the flag result is dead, turn this into an SUB.
if (!N->hasAnyUseOfValue(1))
- return CombineTo(N, DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, N1),
- DAG.getNode(ISD::CARRY_FALSE, SDLoc(N),
- MVT::Glue));
+ return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
+ DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
// fold (subc x, x) -> 0 + no borrow
- if (N0 == N1) {
- SDLoc DL(N);
+ if (N0 == N1)
return CombineTo(N, DAG.getConstant(0, DL, VT),
- DAG.getNode(ISD::CARRY_FALSE, DL,
- MVT::Glue));
- }
+ DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
// fold (subc x, 0) -> x + no borrow
if (isNullConstant(N1))
- return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, SDLoc(N),
- MVT::Glue));
+ return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
// Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
if (isAllOnesConstant(N0))
- return CombineTo(N, DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0),
- DAG.getNode(ISD::CARRY_FALSE, SDLoc(N),
- MVT::Glue));
+ return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
+ DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
return SDValue();
}
}
// fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
- if (N1IsConst && N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse() &&
- (isConstantSplatVector(N0.getOperand(1).getNode(), Val) ||
- isa<ConstantSDNode>(N0.getOperand(1))))
- return DAG.getNode(ISD::ADD, SDLoc(N), VT,
- DAG.getNode(ISD::MUL, SDLoc(N0), VT,
- N0.getOperand(0), N1),
- DAG.getNode(ISD::MUL, SDLoc(N1), VT,
- N0.getOperand(1), N1));
+ if (isConstantIntBuildVectorOrConstantInt(N1) &&
+ N0.getOpcode() == ISD::ADD &&
+ isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)) &&
+ isMulAddWithConstProfitable(N, N0, N1))
+ return DAG.getNode(ISD::ADD, SDLoc(N), VT,
+ DAG.getNode(ISD::MUL, SDLoc(N0), VT,
+ N0.getOperand(0), N1),
+ DAG.getNode(ISD::MUL, SDLoc(N1), VT,
+ N0.getOperand(1), N1));
// reassociate mul
if (SDValue RMUL = ReassociateOps(ISD::MUL, SDLoc(N), N0, N1))
return SDValue();
}
+/// Return true if divmod libcall is available.
+static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
+ const TargetLowering &TLI) {
+ RTLIB::Libcall LC;
+ switch (Node->getSimpleValueType(0).SimpleTy) {
+ default: return false; // No libcall for vector types.
+ case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
+ case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
+ case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
+ case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
+ case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
+ }
+
+ return TLI.getLibcallName(LC) != nullptr;
+}
+
+/// Issue divrem if both quotient and remainder are needed.
+SDValue DAGCombiner::useDivRem(SDNode *Node) {
+ if (Node->use_empty())
+ return SDValue(); // This is a dead node, leave it alone.
+
+ EVT VT = Node->getValueType(0);
+ if (!TLI.isTypeLegal(VT))
+ return SDValue();
+
+ unsigned Opcode = Node->getOpcode();
+ bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM);
+
+ unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
+ // If DIVREM is going to get expanded into a libcall,
+ // but there is no libcall available, then don't combine.
+ if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) &&
+ !isDivRemLibcallAvailable(Node, isSigned, TLI))
+ return SDValue();
+
+ // If div is legal, it's better to do the normal expansion
+ unsigned OtherOpcode = 0;
+ if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) {
+ OtherOpcode = isSigned ? ISD::SREM : ISD::UREM;
+ if (TLI.isOperationLegalOrCustom(Opcode, VT))
+ return SDValue();
+ } else {
+ OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
+ if (TLI.isOperationLegalOrCustom(OtherOpcode, VT))
+ return SDValue();
+ }
+
+ SDValue Op0 = Node->getOperand(0);
+ SDValue Op1 = Node->getOperand(1);
+ SDValue combined;
+ for (SDNode::use_iterator UI = Op0.getNode()->use_begin(),
+ UE = Op0.getNode()->use_end(); UI != UE; ++UI) {
+ SDNode *User = *UI;
+ if (User == Node || User->use_empty())
+ continue;
+ // Convert the other matching node(s), too;
+ // otherwise, the DIVREM may get target-legalized into something
+ // target-specific that we won't be able to recognize.
+ unsigned UserOpc = User->getOpcode();
+ if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) &&
+ User->getOperand(0) == Op0 &&
+ User->getOperand(1) == Op1) {
+ if (!combined) {
+ if (UserOpc == OtherOpcode) {
+ SDVTList VTs = DAG.getVTList(VT, VT);
+ combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1);
+ } else if (UserOpc == DivRemOpc) {
+ combined = SDValue(User, 0);
+ } else {
+ assert(UserOpc == Opcode);
+ continue;
+ }
+ }
+ if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV)
+ CombineTo(User, combined);
+ else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM)
+ CombineTo(User, combined.getValue(1));
+ }
+ }
+ return combined;
+}
+
SDValue DAGCombiner::visitSDIV(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
if (SDValue FoldedVOp = SimplifyVBinOp(N))
return FoldedVOp;
+ SDLoc DL(N);
+
// fold (sdiv c1, c2) -> c1/c2
ConstantSDNode *N0C = isConstOrConstSplat(N0);
ConstantSDNode *N1C = isConstOrConstSplat(N1);
if (N0C && N1C && !N0C->isOpaque() && !N1C->isOpaque())
- return DAG.FoldConstantArithmetic(ISD::SDIV, SDLoc(N), VT, N0C, N1C);
+ return DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, N0C, N1C);
// fold (sdiv X, 1) -> X
if (N1C && N1C->isOne())
return N0;
// fold (sdiv X, -1) -> 0-X
- if (N1C && N1C->isAllOnesValue()) {
- SDLoc DL(N);
+ if (N1C && N1C->isAllOnesValue())
return DAG.getNode(ISD::SUB, DL, VT,
DAG.getConstant(0, DL, VT), N0);
- }
+
// If we know the sign bits of both operands are zero, strength reduce to a
// udiv instead. Handles (X&15) /s 4 -> X&15 >> 2
if (!VT.isVector()) {
if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
- return DAG.getNode(ISD::UDIV, SDLoc(N), N1.getValueType(),
- N0, N1);
+ return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
}
// fold (sdiv X, pow2) -> simple ops after legalize
return Res;
unsigned lg2 = N1C->getAPIntValue().countTrailingZeros();
- SDLoc DL(N);
// Splat the sign bit into the register
SDValue SGN =
if (SDValue Op = BuildSDIV(N))
return Op;
+ // sdiv, srem -> sdivrem
+ // If the divisor is constant, then return DIVREM only if isIntDivCheap() is true.
+ // Otherwise, we break the simplification logic in visitREM().
+ if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
+ if (SDValue DivRem = useDivRem(N))
+ return DivRem;
+
// undef / X -> 0
if (N0.getOpcode() == ISD::UNDEF)
- return DAG.getConstant(0, SDLoc(N), VT);
+ return DAG.getConstant(0, DL, VT);
// X / undef -> undef
if (N1.getOpcode() == ISD::UNDEF)
return N1;
if (SDValue FoldedVOp = SimplifyVBinOp(N))
return FoldedVOp;
+ SDLoc DL(N);
+
// fold (udiv c1, c2) -> c1/c2
ConstantSDNode *N0C = isConstOrConstSplat(N0);
ConstantSDNode *N1C = isConstOrConstSplat(N1);
if (N0C && N1C)
- if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UDIV, SDLoc(N), VT,
+ if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT,
N0C, N1C))
return Folded;
// fold (udiv x, (1 << c)) -> x >>u c
- if (N1C && !N1C->isOpaque() && N1C->getAPIntValue().isPowerOf2()) {
- SDLoc DL(N);
+ if (N1C && !N1C->isOpaque() && N1C->getAPIntValue().isPowerOf2())
return DAG.getNode(ISD::SRL, DL, VT, N0,
DAG.getConstant(N1C->getAPIntValue().logBase2(), DL,
getShiftAmountTy(N0.getValueType())));
- }
+
// fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
if (N1.getOpcode() == ISD::SHL) {
if (ConstantSDNode *SHC = getAsNonOpaqueConstant(N1.getOperand(0))) {
if (SHC->getAPIntValue().isPowerOf2()) {
EVT ADDVT = N1.getOperand(1).getValueType();
- SDLoc DL(N);
SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT,
N1.getOperand(1),
DAG.getConstant(SHC->getAPIntValue()
if (SDValue Op = BuildUDIV(N))
return Op;
+ // sdiv, srem -> sdivrem
+ // If the divisor is constant, then return DIVREM only if isIntDivCheap() is true.
+ // Otherwise, we break the simplification logic in visitREM().
+ if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
+ if (SDValue DivRem = useDivRem(N))
+ return DivRem;
+
// undef / X -> 0
if (N0.getOpcode() == ISD::UNDEF)
- return DAG.getConstant(0, SDLoc(N), VT);
+ return DAG.getConstant(0, DL, VT);
// X / undef -> undef
if (N1.getOpcode() == ISD::UNDEF)
return N1;
return SDValue();
}
-SDValue DAGCombiner::visitSREM(SDNode *N) {
+// handles ISD::SREM and ISD::UREM
+SDValue DAGCombiner::visitREM(SDNode *N) {
+ unsigned Opcode = N->getOpcode();
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N->getValueType(0);
+ bool isSigned = (Opcode == ISD::SREM);
+ SDLoc DL(N);
- // fold (srem c1, c2) -> c1%c2
+ // fold (rem c1, c2) -> c1%c2
ConstantSDNode *N0C = isConstOrConstSplat(N0);
ConstantSDNode *N1C = isConstOrConstSplat(N1);
if (N0C && N1C)
- if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::SREM, SDLoc(N), VT,
- N0C, N1C))
+ if (SDValue Folded = DAG.FoldConstantArithmetic(Opcode, DL, VT, N0C, N1C))
return Folded;
- // If we know the sign bits of both operands are zero, strength reduce to a
- // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15
- if (!VT.isVector()) {
- if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
- return DAG.getNode(ISD::UREM, SDLoc(N), VT, N0, N1);
- }
- // If X/C can be simplified by the division-by-constant logic, lower
- // X%C to the equivalent of X-X/C*C.
- if (N1C && !N1C->isNullValue()) {
- SDValue Div = DAG.getNode(ISD::SDIV, SDLoc(N), VT, N0, N1);
- AddToWorklist(Div.getNode());
- SDValue OptimizedDiv = combine(Div.getNode());
- if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) {
- SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT,
- OptimizedDiv, N1);
- SDValue Sub = DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, Mul);
- AddToWorklist(Mul.getNode());
- return Sub;
+ if (isSigned) {
+ // If we know the sign bits of both operands are zero, strength reduce to a
+ // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15
+ if (!VT.isVector()) {
+ if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
+ return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
}
- }
-
- // undef % X -> 0
- if (N0.getOpcode() == ISD::UNDEF)
- return DAG.getConstant(0, SDLoc(N), VT);
- // X % undef -> undef
- if (N1.getOpcode() == ISD::UNDEF)
- return N1;
-
- return SDValue();
-}
-
-SDValue DAGCombiner::visitUREM(SDNode *N) {
- SDValue N0 = N->getOperand(0);
- SDValue N1 = N->getOperand(1);
- EVT VT = N->getValueType(0);
-
- // fold (urem c1, c2) -> c1%c2
- ConstantSDNode *N0C = isConstOrConstSplat(N0);
- ConstantSDNode *N1C = isConstOrConstSplat(N1);
- if (N0C && N1C)
- if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UREM, SDLoc(N), VT,
- N0C, N1C))
- return Folded;
- // fold (urem x, pow2) -> (and x, pow2-1)
- if (N1C && !N1C->isNullValue() && !N1C->isOpaque() &&
- N1C->getAPIntValue().isPowerOf2()) {
- SDLoc DL(N);
- return DAG.getNode(ISD::AND, DL, VT, N0,
- DAG.getConstant(N1C->getAPIntValue() - 1, DL, VT));
- }
- // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
- if (N1.getOpcode() == ISD::SHL) {
- if (ConstantSDNode *SHC = getAsNonOpaqueConstant(N1.getOperand(0))) {
- if (SHC->getAPIntValue().isPowerOf2()) {
- SDLoc DL(N);
- SDValue Add =
- DAG.getNode(ISD::ADD, DL, VT, N1,
+ } else {
+ // fold (urem x, pow2) -> (and x, pow2-1)
+ if (N1C && !N1C->isNullValue() && !N1C->isOpaque() &&
+ N1C->getAPIntValue().isPowerOf2()) {
+ return DAG.getNode(ISD::AND, DL, VT, N0,
+ DAG.getConstant(N1C->getAPIntValue() - 1, DL, VT));
+ }
+ // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
+ if (N1.getOpcode() == ISD::SHL) {
+ if (ConstantSDNode *SHC = getAsNonOpaqueConstant(N1.getOperand(0))) {
+ if (SHC->getAPIntValue().isPowerOf2()) {
+ SDValue Add =
+ DAG.getNode(ISD::ADD, DL, VT, N1,
DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), DL,
VT));
- AddToWorklist(Add.getNode());
- return DAG.getNode(ISD::AND, DL, VT, N0, Add);
+ AddToWorklist(Add.getNode());
+ return DAG.getNode(ISD::AND, DL, VT, N0, Add);
+ }
}
}
}
+ AttributeSet Attr = DAG.getMachineFunction().getFunction()->getAttributes();
+
// If X/C can be simplified by the division-by-constant logic, lower
// X%C to the equivalent of X-X/C*C.
- if (N1C && !N1C->isNullValue()) {
- SDValue Div = DAG.getNode(ISD::UDIV, SDLoc(N), VT, N0, N1);
+ // To avoid mangling nodes, this simplification requires that the combine()
+ // call for the speculative DIV must not cause a DIVREM conversion. We guard
+ // against this by skipping the simplification if isIntDivCheap(). When
+ // div is not cheap, combine will not return a DIVREM. Regardless,
+ // checking cheapness here makes sense since the simplification results in
+ // fatter code.
+ if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap(VT, Attr)) {
+ unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
+ SDValue Div = DAG.getNode(DivOpcode, DL, VT, N0, N1);
AddToWorklist(Div.getNode());
SDValue OptimizedDiv = combine(Div.getNode());
if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) {
- SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT,
- OptimizedDiv, N1);
- SDValue Sub = DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, Mul);
+ assert((OptimizedDiv.getOpcode() != ISD::UDIVREM) &&
+ (OptimizedDiv.getOpcode() != ISD::SDIVREM));
+ SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1);
+ SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
AddToWorklist(Mul.getNode());
return Sub;
}
}
+ // sdiv, srem -> sdivrem
+ if (SDValue DivRem = useDivRem(N))
+ return DivRem.getValue(1);
+
// undef % X -> 0
if (N0.getOpcode() == ISD::UNDEF)
- return DAG.getConstant(0, SDLoc(N), VT);
+ return DAG.getConstant(0, DL, VT);
// X % undef -> undef
if (N1.getOpcode() == ISD::UNDEF)
return N1;
return SDValue();
}
-SDValue DAGCombiner::visitSDIVREM(SDNode *N) {
- if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::SDIV, ISD::SREM))
- return Res;
-
- return SDValue();
-}
-
-SDValue DAGCombiner::visitUDIVREM(SDNode *N) {
- if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::UDIV, ISD::UREM))
- return Res;
-
- return SDValue();
-}
-
SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
return SDValue();
}
+bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
+ EVT LoadResultTy, EVT &ExtVT, EVT &LoadedVT,
+ bool &NarrowLoad) {
+ uint32_t ActiveBits = AndC->getAPIntValue().getActiveBits();
+
+ if (ActiveBits == 0 || !APIntOps::isMask(ActiveBits, AndC->getAPIntValue()))
+ return false;
+
+ ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
+ LoadedVT = LoadN->getMemoryVT();
+
+ if (ExtVT == LoadedVT &&
+ (!LegalOperations ||
+ TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) {
+ // ZEXTLOAD will match without needing to change the size of the value being
+ // loaded.
+ NarrowLoad = false;
+ return true;
+ }
+
+ // Do not change the width of a volatile load.
+ if (LoadN->isVolatile())
+ return false;
+
+ // Do not generate loads of non-round integer types since these can
+ // be expensive (and would be wrong if the type is not byte sized).
+ if (!LoadedVT.bitsGT(ExtVT) || !ExtVT.isRound())
+ return false;
+
+ if (LegalOperations &&
+ !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))
+ return false;
+
+ if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT))
+ return false;
+
+ NarrowLoad = true;
+ return true;
+}
+
SDValue DAGCombiner::visitAND(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
// fold (and c1, c2) -> c1&c2
ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+
+ // XXX-disabled: (and x, 0) should not be folded.
+ if (!N0C && N1C->isNullValue()) {
+ return SDValue();
+ }
+
if (N0C && N1C && !N1C->isOpaque())
return DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, N0C, N1C);
// canonicalize constant to RHS
: cast<LoadSDNode>(N0);
if (LN0->getExtensionType() != ISD::SEXTLOAD &&
LN0->isUnindexed() && N0.hasOneUse() && SDValue(LN0, 0).hasOneUse()) {
- uint32_t ActiveBits = N1C->getAPIntValue().getActiveBits();
- if (ActiveBits > 0 && APIntOps::isMask(ActiveBits, N1C->getAPIntValue())){
- EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
- EVT LoadedVT = LN0->getMemoryVT();
- EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT;
-
- if (ExtVT == LoadedVT &&
- (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy,
- ExtVT))) {
-
+ auto NarrowLoad = false;
+ EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT;
+ EVT ExtVT, LoadedVT;
+ if (isAndLoadExtLoad(N1C, LN0, LoadResultTy, ExtVT, LoadedVT,
+ NarrowLoad)) {
+ if (!NarrowLoad) {
SDValue NewLoad =
DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy,
LN0->getChain(), LN0->getBasePtr(), ExtVT,
AddToWorklist(N);
CombineTo(LN0, NewLoad, NewLoad.getValue(1));
return SDValue(N, 0); // Return N so it doesn't get rechecked!
- }
-
- // Do not change the width of a volatile load.
- // Do not generate loads of non-round integer types since these can
- // be expensive (and would be wrong if the type is not byte sized).
- if (!LN0->isVolatile() && LoadedVT.bitsGT(ExtVT) && ExtVT.isRound() &&
- (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy,
- ExtVT))) {
+ } else {
EVT PtrType = LN0->getOperand(1).getValueType();
unsigned Alignment = LN0->getAlignment();
/// Match "(X shl/srl V1) & V2" where V2 may not be present.
static bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) {
if (Op.getOpcode() == ISD::AND) {
- if (isa<ConstantSDNode>(Op.getOperand(1))) {
+ if (isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) {
Mask = Op.getOperand(1);
Op = Op.getOperand(0);
} else {
}
// Return true if we can prove that, whenever Neg and Pos are both in the
-// range [0, OpSize), Neg == (Pos == 0 ? 0 : OpSize - Pos). This means that
+// range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos). This means that
// for two opposing shifts shift1 and shift2 and a value X with OpBits bits:
//
// (or (shift1 X, Neg), (shift2 X, Pos))
//
// reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
-// in direction shift1 by Neg. The range [0, OpSize) means that we only need
+// in direction shift1 by Neg. The range [0, EltSize) means that we only need
// to consider shift amounts with defined behavior.
-static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned OpSize) {
- // If OpSize is a power of 2 then:
+static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize) {
+ // If EltSize is a power of 2 then:
//
- // (a) (Pos == 0 ? 0 : OpSize - Pos) == (OpSize - Pos) & (OpSize - 1)
- // (b) Neg == Neg & (OpSize - 1) whenever Neg is in [0, OpSize).
+ // (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1)
+ // (b) Neg == Neg & (EltSize - 1) whenever Neg is in [0, EltSize).
//
- // So if OpSize is a power of 2 and Neg is (and Neg', OpSize-1), we check
+ // So if EltSize is a power of 2 and Neg is (and Neg', EltSize-1), we check
// for the stronger condition:
//
- // Neg & (OpSize - 1) == (OpSize - Pos) & (OpSize - 1) [A]
+ // Neg & (EltSize - 1) == (EltSize - Pos) & (EltSize - 1) [A]
//
- // for all Neg and Pos. Since Neg & (OpSize - 1) == Neg' & (OpSize - 1)
+ // for all Neg and Pos. Since Neg & (EltSize - 1) == Neg' & (EltSize - 1)
// we can just replace Neg with Neg' for the rest of the function.
//
// In other cases we check for the even stronger condition:
//
- // Neg == OpSize - Pos [B]
+ // Neg == EltSize - Pos [B]
//
// for all Neg and Pos. Note that the (or ...) then invokes undefined
- // behavior if Pos == 0 (and consequently Neg == OpSize).
+ // behavior if Pos == 0 (and consequently Neg == EltSize).
//
- // We could actually use [A] whenever OpSize is a power of 2, but the
+ // We could actually use [A] whenever EltSize is a power of 2, but the
// only extra cases that it would match are those uninteresting ones
// where Neg and Pos are never in range at the same time. E.g. for
- // OpSize == 32, using [A] would allow a Neg of the form (sub 64, Pos)
+ // EltSize == 32, using [A] would allow a Neg of the form (sub 64, Pos)
// as well as (sub 32, Pos), but:
//
// (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos))
//
// always invokes undefined behavior for 32-bit X.
//
- // Below, Mask == OpSize - 1 when using [A] and is all-ones otherwise.
+ // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise.
unsigned MaskLoBits = 0;
- if (Neg.getOpcode() == ISD::AND &&
- isPowerOf2_64(OpSize) &&
- Neg.getOperand(1).getOpcode() == ISD::Constant &&
- cast<ConstantSDNode>(Neg.getOperand(1))->getAPIntValue() == OpSize - 1) {
- Neg = Neg.getOperand(0);
- MaskLoBits = Log2_64(OpSize);
+ if (Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) {
+ if (ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(1))) {
+ if (NegC->getAPIntValue() == EltSize - 1) {
+ Neg = Neg.getOperand(0);
+ MaskLoBits = Log2_64(EltSize);
+ }
+ }
}
// Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1.
if (Neg.getOpcode() != ISD::SUB)
- return 0;
- ConstantSDNode *NegC = dyn_cast<ConstantSDNode>(Neg.getOperand(0));
+ return false;
+ ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(0));
if (!NegC)
- return 0;
+ return false;
SDValue NegOp1 = Neg.getOperand(1);
- // On the RHS of [A], if Pos is Pos' & (OpSize - 1), just replace Pos with
+ // On the RHS of [A], if Pos is Pos' & (EltSize - 1), just replace Pos with
// Pos'. The truncation is redundant for the purpose of the equality.
- if (MaskLoBits &&
- Pos.getOpcode() == ISD::AND &&
- Pos.getOperand(1).getOpcode() == ISD::Constant &&
- cast<ConstantSDNode>(Pos.getOperand(1))->getAPIntValue() == OpSize - 1)
- Pos = Pos.getOperand(0);
+ if (MaskLoBits && Pos.getOpcode() == ISD::AND)
+ if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
+ if (PosC->getAPIntValue() == EltSize - 1)
+ Pos = Pos.getOperand(0);
// The condition we need is now:
//
- // (NegC - NegOp1) & Mask == (OpSize - Pos) & Mask
+ // (NegC - NegOp1) & Mask == (EltSize - Pos) & Mask
//
// If NegOp1 == Pos then we need:
//
- // OpSize & Mask == NegC & Mask
+ // EltSize & Mask == NegC & Mask
//
// (because "x & Mask" is a truncation and distributes through subtraction).
APInt Width;
if (Pos == NegOp1)
Width = NegC->getAPIntValue();
+
// Check for cases where Pos has the form (add NegOp1, PosC) for some PosC.
// Then the condition we want to prove becomes:
//
- // (NegC - NegOp1) & Mask == (OpSize - (NegOp1 + PosC)) & Mask
+ // (NegC - NegOp1) & Mask == (EltSize - (NegOp1 + PosC)) & Mask
//
// which, again because "x & Mask" is a truncation, becomes:
//
- // NegC & Mask == (OpSize - PosC) & Mask
- // OpSize & Mask == (NegC + PosC) & Mask
- else if (Pos.getOpcode() == ISD::ADD &&
- Pos.getOperand(0) == NegOp1 &&
- Pos.getOperand(1).getOpcode() == ISD::Constant)
- Width = (cast<ConstantSDNode>(Pos.getOperand(1))->getAPIntValue() +
- NegC->getAPIntValue());
- else
+ // NegC & Mask == (EltSize - PosC) & Mask
+ // EltSize & Mask == (NegC + PosC) & Mask
+ else if (Pos.getOpcode() == ISD::ADD && Pos.getOperand(0) == NegOp1) {
+ if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
+ Width = PosC->getAPIntValue() + NegC->getAPIntValue();
+ else
+ return false;
+ } else
return false;
- // Now we just need to check that OpSize & Mask == Width & Mask.
+ // Now we just need to check that EltSize & Mask == Width & Mask.
if (MaskLoBits)
- // Opsize & Mask is 0 since Mask is Opsize - 1.
+ // EltSize & Mask is 0 since Mask is EltSize - 1.
return Width.getLoBits(MaskLoBits) == 0;
- return Width == OpSize;
+ return Width == EltSize;
}
// A subroutine of MatchRotate used once we have found an OR of two opposite
// (srl x, (*ext y))) ->
// (rotr x, y) or (rotl x, (sub 32, y))
EVT VT = Shifted.getValueType();
- if (matchRotateSub(InnerPos, InnerNeg, VT.getSizeInBits())) {
+ if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits())) {
bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
HasPos ? Pos : Neg).getNode();
if (RHSShift.getOpcode() == ISD::SHL) {
std::swap(LHS, RHS);
std::swap(LHSShift, RHSShift);
- std::swap(LHSMask , RHSMask );
+ std::swap(LHSMask, RHSMask);
}
- unsigned OpSizeInBits = VT.getSizeInBits();
+ unsigned EltSizeInBits = VT.getScalarSizeInBits();
SDValue LHSShiftArg = LHSShift.getOperand(0);
SDValue LHSShiftAmt = LHSShift.getOperand(1);
SDValue RHSShiftArg = RHSShift.getOperand(0);
// fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
// fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
- if (LHSShiftAmt.getOpcode() == ISD::Constant &&
- RHSShiftAmt.getOpcode() == ISD::Constant) {
- uint64_t LShVal = cast<ConstantSDNode>(LHSShiftAmt)->getZExtValue();
- uint64_t RShVal = cast<ConstantSDNode>(RHSShiftAmt)->getZExtValue();
- if ((LShVal + RShVal) != OpSizeInBits)
+ if (isConstOrConstSplat(LHSShiftAmt) && isConstOrConstSplat(RHSShiftAmt)) {
+ uint64_t LShVal = isConstOrConstSplat(LHSShiftAmt)->getZExtValue();
+ uint64_t RShVal = isConstOrConstSplat(RHSShiftAmt)->getZExtValue();
+ if ((LShVal + RShVal) != EltSizeInBits)
return nullptr;
SDValue Rot = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT,
// If there is an AND of either shifted operand, apply it to the result.
if (LHSMask.getNode() || RHSMask.getNode()) {
- APInt Mask = APInt::getAllOnesValue(OpSizeInBits);
+ APInt AllBits = APInt::getAllOnesValue(EltSizeInBits);
+ SDValue Mask = DAG.getConstant(AllBits, DL, VT);
if (LHSMask.getNode()) {
- APInt RHSBits = APInt::getLowBitsSet(OpSizeInBits, LShVal);
- Mask &= cast<ConstantSDNode>(LHSMask)->getAPIntValue() | RHSBits;
+ APInt RHSBits = APInt::getLowBitsSet(EltSizeInBits, LShVal);
+ Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
+ DAG.getNode(ISD::OR, DL, VT, LHSMask,
+ DAG.getConstant(RHSBits, DL, VT)));
}
if (RHSMask.getNode()) {
- APInt LHSBits = APInt::getHighBitsSet(OpSizeInBits, RShVal);
- Mask &= cast<ConstantSDNode>(RHSMask)->getAPIntValue() | LHSBits;
+ APInt LHSBits = APInt::getHighBitsSet(EltSizeInBits, RShVal);
+ Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
+ DAG.getNode(ISD::OR, DL, VT, RHSMask,
+ DAG.getConstant(LHSBits, DL, VT)));
}
- Rot = DAG.getNode(ISD::AND, DL, VT, Rot, DAG.getConstant(Mask, DL, VT));
+ Rot = DAG.getNode(ISD::AND, DL, VT, Rot, Mask);
}
return Rot.getNode();
SDLoc(N));
}
+SDValue DAGCombiner::visitSETCCE(SDNode *N) {
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ SDValue Carry = N->getOperand(2);
+ SDValue Cond = N->getOperand(3);
+
+ // If Carry is false, fold to a regular SETCC.
+ if (Carry.getOpcode() == ISD::CARRY_FALSE)
+ return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond);
+
+ return SDValue();
+}
+
/// Try to fold a sext/zext/aext dag node into a ConstantSDNode or
/// a build_vector of constants.
/// This function is called by the DAGCombiner when visiting sext/zext/aext
// fold (zext (and/or/xor (load x), cst)) ->
// (and/or/xor (zextload x), (zext cst))
+ // Unless (and (load x) cst) will match as a zextload already and has
+ // additional users.
if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
N0.getOpcode() == ISD::XOR) &&
isa<LoadSDNode>(N0.getOperand(0)) &&
if (LN0->getExtensionType() != ISD::SEXTLOAD && LN0->isUnindexed()) {
bool DoXform = true;
SmallVector<SDNode*, 4> SetCCs;
- if (!N0.hasOneUse())
- DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0), ISD::ZERO_EXTEND,
- SetCCs, TLI);
+ if (!N0.hasOneUse()) {
+ if (N0.getOpcode() == ISD::AND) {
+ auto *AndC = cast<ConstantSDNode>(N0.getOperand(1));
+ auto NarrowLoad = false;
+ EVT LoadResultTy = AndC->getValueType(0);
+ EVT ExtVT, LoadedVT;
+ if (isAndLoadExtLoad(AndC, LN0, LoadResultTy, ExtVT, LoadedVT,
+ NarrowLoad))
+ DoXform = false;
+ }
+ if (DoXform)
+ DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0),
+ ISD::ZERO_EXTEND, SetCCs, TLI);
+ }
if (DoXform) {
SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), VT,
LN0->getChain(), LN0->getBasePtr(),
uint64_t PtrOff = ShAmt / 8;
unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff);
SDLoc DL(LN0);
+ // The original load itself didn't wrap, so an offset within it doesn't.
+ SDNodeFlags Flags;
+ Flags.setNoUnsignedWrap(true);
SDValue NewPtr = DAG.getNode(ISD::ADD, DL,
PtrType, LN0->getBasePtr(),
- DAG.getConstant(PtrOff, DL, PtrType));
+ DAG.getConstant(PtrOff, DL, PtrType),
+ &Flags);
AddToWorklist(NewPtr.getNode());
SDValue Load;
unsigned VTBits = VT.getScalarType().getSizeInBits();
unsigned EVTBits = EVT.getScalarType().getSizeInBits();
+ if (N0.isUndef())
+ return DAG.getUNDEF(VT);
+
// fold (sext_in_reg c1) -> c1
- if (isa<ConstantSDNode>(N0) || N0.getOpcode() == ISD::UNDEF)
+ if (isConstantIntBuildVectorOrConstantInt(N0))
return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1);
// If the input is already sign extended, just drop the extension.
BSwap, N1);
}
- // Fold a sext_inreg of a build_vector of ConstantSDNodes or undefs
- // into a build_vector.
- if (ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) {
- SmallVector<SDValue, 8> Elts;
- unsigned NumElts = N0->getNumOperands();
- unsigned ShAmt = VTBits - EVTBits;
-
- for (unsigned i = 0; i != NumElts; ++i) {
- SDValue Op = N0->getOperand(i);
- if (Op->getOpcode() == ISD::UNDEF) {
- Elts.push_back(Op);
- continue;
- }
-
- ConstantSDNode *CurrentND = cast<ConstantSDNode>(Op);
- const APInt &C = APInt(VTBits, CurrentND->getAPIntValue().getZExtValue());
- Elts.push_back(DAG.getConstant(C.shl(ShAmt).ashr(ShAmt).getZExtValue(),
- SDLoc(Op), Op.getValueType()));
- }
-
- return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Elts);
- }
-
return SDValue();
}
return SDValue();
}
+static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) {
+ // On little-endian machines, bitcasting from ppcf128 to i128 does swap the Hi
+ // and Lo parts; on big-endian machines it doesn't.
+ return DAG.getDataLayout().isBigEndian() ? 1 : 0;
+}
+
SDValue DAGCombiner::visitBITCAST(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
// fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
// fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
+ //
+ // For ppc_fp128:
+ // fold (bitcast (fneg x)) ->
+ // flipbit = signbit
+ // (xor (bitcast x) (build_pair flipbit, flipbit))
+ //
+ // fold (bitcast (fabs x)) ->
+ // flipbit = (and (extract_element (bitcast x), 0), signbit)
+ // (xor (bitcast x) (build_pair flipbit, flipbit))
// This often reduces constant pool loads.
if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) ||
(N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) &&
AddToWorklist(NewConv.getNode());
SDLoc DL(N);
+ if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
+ assert(VT.getSizeInBits() == 128);
+ SDValue SignBit = DAG.getConstant(
+ APInt::getSignBit(VT.getSizeInBits() / 2), SDLoc(N0), MVT::i64);
+ SDValue FlipBit;
+ if (N0.getOpcode() == ISD::FNEG) {
+ FlipBit = SignBit;
+ AddToWorklist(FlipBit.getNode());
+ } else {
+ assert(N0.getOpcode() == ISD::FABS);
+ SDValue Hi =
+ DAG.getNode(ISD::EXTRACT_ELEMENT, SDLoc(NewConv), MVT::i64, NewConv,
+ DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
+ SDLoc(NewConv)));
+ AddToWorklist(Hi.getNode());
+ FlipBit = DAG.getNode(ISD::AND, SDLoc(N0), MVT::i64, Hi, SignBit);
+ AddToWorklist(FlipBit.getNode());
+ }
+ SDValue FlipBits =
+ DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
+ AddToWorklist(FlipBits.getNode());
+ return DAG.getNode(ISD::XOR, DL, VT, NewConv, FlipBits);
+ }
APInt SignBit = APInt::getSignBit(VT.getSizeInBits());
if (N0.getOpcode() == ISD::FNEG)
return DAG.getNode(ISD::XOR, DL, VT,
// (or (and (bitconvert x), sign), (and cst, (not sign)))
// Note that we don't handle (copysign x, cst) because this can always be
// folded to an fneg or fabs.
+ //
+ // For ppc_fp128:
+ // fold (bitcast (fcopysign cst, x)) ->
+ // flipbit = (and (extract_element
+ // (xor (bitcast cst), (bitcast x)), 0),
+ // signbit)
+ // (xor (bitcast cst) (build_pair flipbit, flipbit))
if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() &&
isa<ConstantFPSDNode>(N0.getOperand(0)) &&
VT.isInteger() && !VT.isVector()) {
AddToWorklist(X.getNode());
}
+ if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
+ APInt SignBit = APInt::getSignBit(VT.getSizeInBits() / 2);
+ SDValue Cst = DAG.getNode(ISD::BITCAST, SDLoc(N0.getOperand(0)), VT,
+ N0.getOperand(0));
+ AddToWorklist(Cst.getNode());
+ SDValue X = DAG.getNode(ISD::BITCAST, SDLoc(N0.getOperand(1)), VT,
+ N0.getOperand(1));
+ AddToWorklist(X.getNode());
+ SDValue XorResult = DAG.getNode(ISD::XOR, SDLoc(N0), VT, Cst, X);
+ AddToWorklist(XorResult.getNode());
+ SDValue XorResult64 = DAG.getNode(
+ ISD::EXTRACT_ELEMENT, SDLoc(XorResult), MVT::i64, XorResult,
+ DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
+ SDLoc(XorResult)));
+ AddToWorklist(XorResult64.getNode());
+ SDValue FlipBit =
+ DAG.getNode(ISD::AND, SDLoc(XorResult64), MVT::i64, XorResult64,
+ DAG.getConstant(SignBit, SDLoc(XorResult64), MVT::i64));
+ AddToWorklist(FlipBit.getNode());
+ SDValue FlipBits =
+ DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
+ AddToWorklist(FlipBits.getNode());
+ return DAG.getNode(ISD::XOR, SDLoc(N), VT, Cst, FlipBits);
+ }
APInt SignBit = APInt::getSignBit(VT.getSizeInBits());
X = DAG.getNode(ISD::AND, SDLoc(X), VT,
X, DAG.getConstant(SignBit, SDLoc(X), VT));
SDValue DAGCombiner::visitFADD(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
- ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+ bool N0CFP = isConstantFPBuildVectorOrConstantFP(N0);
+ bool N1CFP = isConstantFPBuildVectorOrConstantFP(N1);
EVT VT = N->getValueType(0);
SDLoc DL(N);
const TargetOptions &Options = DAG.getTarget().Options;
bool AllowNewConst = (Level < AfterLegalizeDAG);
// fold (fadd A, 0) -> A
- if (N1CFP && N1CFP->isZero())
- return N0;
+ if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1))
+ if (N1C->isZero())
+ return N0;
// fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2))
if (N1CFP && N0.getOpcode() == ISD::FADD && N0.getNode()->hasOneUse() &&
- isa<ConstantFPSDNode>(N0.getOperand(1)))
+ isConstantFPBuildVectorOrConstantFP(N0.getOperand(1)))
return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0),
DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1,
Flags),
// of rounding steps.
if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) {
if (N0.getOpcode() == ISD::FMUL) {
- ConstantFPSDNode *CFP00 = dyn_cast<ConstantFPSDNode>(N0.getOperand(0));
- ConstantFPSDNode *CFP01 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1));
+ bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
+ bool CFP01 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(1));
// (fadd (fmul x, c), x) -> (fmul x, c+1)
if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
- SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, SDValue(CFP01, 0),
+ SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
DAG.getConstantFP(1.0, DL, VT), Flags);
return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP, Flags);
}
if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
N1.getOperand(0) == N1.getOperand(1) &&
N0.getOperand(0) == N1.getOperand(0)) {
- SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, SDValue(CFP01, 0),
+ SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
DAG.getConstantFP(2.0, DL, VT), Flags);
return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP, Flags);
}
}
if (N1.getOpcode() == ISD::FMUL) {
- ConstantFPSDNode *CFP10 = dyn_cast<ConstantFPSDNode>(N1.getOperand(0));
- ConstantFPSDNode *CFP11 = dyn_cast<ConstantFPSDNode>(N1.getOperand(1));
+ bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
+ bool CFP11 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(1));
// (fadd x, (fmul x, c)) -> (fmul x, c+1)
if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
- SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, SDValue(CFP11, 0),
+ SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
DAG.getConstantFP(1.0, DL, VT), Flags);
return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP, Flags);
}
if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD &&
N0.getOperand(0) == N0.getOperand(1) &&
N1.getOperand(0) == N0.getOperand(0)) {
- SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, SDValue(CFP11, 0),
+ SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
DAG.getConstantFP(2.0, DL, VT), Flags);
return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP, Flags);
}
}
if (N0.getOpcode() == ISD::FADD && AllowNewConst) {
- ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N0.getOperand(0));
+ bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
// (fadd (fadd x, x), x) -> (fmul x, 3.0)
- if (!CFP && N0.getOperand(0) == N0.getOperand(1) &&
+ if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) &&
(N0.getOperand(0) == N1)) {
return DAG.getNode(ISD::FMUL, DL, VT,
N1, DAG.getConstantFP(3.0, DL, VT), Flags);
}
if (N1.getOpcode() == ISD::FADD && AllowNewConst) {
- ConstantFPSDNode *CFP10 = dyn_cast<ConstantFPSDNode>(N1.getOperand(0));
+ bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
// (fadd x, (fadd x, x)) -> (fmul x, 3.0)
if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
N1.getOperand(0) == N0) {
return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N2);
// Canonicalize (fma c, x, y) -> (fma x, c, y)
- if (N0CFP && !N1CFP)
+ if (isConstantFPBuildVectorOrConstantFP(N0) &&
+ !isConstantFPBuildVectorOrConstantFP(N1))
return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2);
// TODO: FMA nodes should have flags that propagate to the created nodes.
SDNodeFlags Flags;
Flags.setUnsafeAlgebra(true);
- // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
- if (Options.UnsafeFPMath && N1CFP &&
- N2.getOpcode() == ISD::FMUL &&
- N0 == N2.getOperand(0) &&
- N2.getOperand(1).getOpcode() == ISD::ConstantFP) {
- return DAG.getNode(ISD::FMUL, dl, VT, N0,
- DAG.getNode(ISD::FADD, dl, VT, N1, N2.getOperand(1),
- &Flags), &Flags);
- }
-
+ if (Options.UnsafeFPMath) {
+ // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
+ if (N2.getOpcode() == ISD::FMUL && N0 == N2.getOperand(0) &&
+ isConstantFPBuildVectorOrConstantFP(N1) &&
+ isConstantFPBuildVectorOrConstantFP(N2.getOperand(1))) {
+ return DAG.getNode(ISD::FMUL, dl, VT, N0,
+ DAG.getNode(ISD::FADD, dl, VT, N1, N2.getOperand(1),
+ &Flags), &Flags);
+ }
- // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
- if (Options.UnsafeFPMath &&
- N0.getOpcode() == ISD::FMUL && N1CFP &&
- N0.getOperand(1).getOpcode() == ISD::ConstantFP) {
- return DAG.getNode(ISD::FMA, dl, VT,
- N0.getOperand(0),
- DAG.getNode(ISD::FMUL, dl, VT, N1, N0.getOperand(1),
- &Flags),
- N2);
+ // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
+ if (N0.getOpcode() == ISD::FMUL &&
+ isConstantFPBuildVectorOrConstantFP(N1) &&
+ isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
+ return DAG.getNode(ISD::FMA, dl, VT,
+ N0.getOperand(0),
+ DAG.getNode(ISD::FMUL, dl, VT, N1, N0.getOperand(1),
+ &Flags),
+ N2);
+ }
}
// (fma x, 1, y) -> (fadd x, y)
}
}
- // (fma x, c, x) -> (fmul x, (c+1))
- if (Options.UnsafeFPMath && N1CFP && N0 == N2) {
- return DAG.getNode(ISD::FMUL, dl, VT, N0,
- DAG.getNode(ISD::FADD, dl, VT,
- N1, DAG.getConstantFP(1.0, dl, VT),
- &Flags), &Flags);
- }
- // (fma x, c, (fneg x)) -> (fmul x, (c-1))
- if (Options.UnsafeFPMath && N1CFP &&
- N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) {
+ if (Options.UnsafeFPMath) {
+ // (fma x, c, x) -> (fmul x, (c+1))
+ if (N1CFP && N0 == N2) {
return DAG.getNode(ISD::FMUL, dl, VT, N0,
- DAG.getNode(ISD::FADD, dl, VT,
- N1, DAG.getConstantFP(-1.0, dl, VT),
- &Flags), &Flags);
+ DAG.getNode(ISD::FADD, dl, VT,
+ N1, DAG.getConstantFP(1.0, dl, VT),
+ &Flags), &Flags);
+ }
+
+ // (fma x, c, (fneg x)) -> (fmul x, (c-1))
+ if (N1CFP && N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) {
+ return DAG.getNode(ISD::FMUL, dl, VT, N0,
+ DAG.getNode(ISD::FADD, dl, VT,
+ N1, DAG.getConstantFP(-1.0, dl, VT),
+ &Flags), &Flags);
+ }
}
return SDValue();
// FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
// is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
- if (!DAG.getTarget().Options.UnsafeFPMath)
+ bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath;
+ const SDNodeFlags *Flags = N->getFlags();
+ if (!UnsafeMath && !Flags->hasAllowReciprocal())
return SDValue();
// Skip if current node is a reciprocal.
// Find all FDIV users of the same divisor.
// Use a set because duplicates may be present in the user list.
SetVector<SDNode *> Users;
- for (auto *U : N1->uses())
- if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1)
- Users.insert(U);
+ for (auto *U : N1->uses()) {
+ if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) {
+ // This division is eligible for optimization only if global unsafe math
+ // is enabled or if this division allows reciprocal formation.
+ if (UnsafeMath || U->getFlags()->hasAllowReciprocal())
+ Users.insert(U);
+ }
+ }
// Now that we have the actual number of divisor uses, make sure it meets
// the minimum threshold specified by the target.
EVT VT = N->getValueType(0);
SDLoc DL(N);
SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
- const SDNodeFlags *Flags = &cast<BinaryWithFlagsSDNode>(N)->Flags;
SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1, Flags);
// Dividend / Divisor -> Dividend * Reciprocal
ZeroCmp, Zero, RV);
}
+/// copysign(x, fp_extend(y)) -> copysign(x, y)
+/// copysign(x, fp_round(y)) -> copysign(x, y)
+static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) {
+ SDValue N1 = N->getOperand(1);
+ if ((N1.getOpcode() == ISD::FP_EXTEND ||
+ N1.getOpcode() == ISD::FP_ROUND)) {
+ // Do not optimize out type conversion of f128 type yet.
+ // For some targets like x86_64, configuration is changed to keep one f128
+ // value in one SSE register, but instruction selection cannot handle
+ // FCOPYSIGN on SSE registers yet.
+ EVT N1VT = N1->getValueType(0);
+ EVT N1Op0VT = N1->getOperand(0)->getValueType(0);
+ return (N1VT == N1Op0VT || N1Op0VT != MVT::f128);
+ }
+ return false;
+}
+
SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
// copysign(x, fp_extend(y)) -> copysign(x, y)
// copysign(x, fp_round(y)) -> copysign(x, y)
- if (N1.getOpcode() == ISD::FP_EXTEND || N1.getOpcode() == ISD::FP_ROUND)
+ if (CanCombineFCOPYSIGN_EXTEND_ROUND(N))
return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
N0, N1.getOperand(0));
APFloat CVal = CFP1->getValueAPF();
CVal.changeSign();
if (Level >= AfterLegalizeDAG &&
- (TLI.isFPImmLegal(CVal, N->getValueType(0)) ||
- TLI.isOperationLegal(ISD::ConstantFP, N->getValueType(0))))
+ (TLI.isFPImmLegal(CVal, VT) ||
+ TLI.isOperationLegal(ISD::ConstantFP, VT)))
return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0.getOperand(0),
DAG.getNode(ISD::FNEG, SDLoc(N), VT,
N0.getOperand(1)),
SDValue DAGCombiner::visitFMINNUM(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- const ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
- const ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+ EVT VT = N->getValueType(0);
+ const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
+ const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
if (N0CFP && N1CFP) {
const APFloat &C0 = N0CFP->getValueAPF();
const APFloat &C1 = N1CFP->getValueAPF();
- return DAG.getConstantFP(minnum(C0, C1), SDLoc(N), N->getValueType(0));
+ return DAG.getConstantFP(minnum(C0, C1), SDLoc(N), VT);
}
- if (N0CFP) {
- EVT VT = N->getValueType(0);
- // Canonicalize to constant on RHS.
+ // Canonicalize to constant on RHS.
+ if (isConstantFPBuildVectorOrConstantFP(N0) &&
+ !isConstantFPBuildVectorOrConstantFP(N1))
return DAG.getNode(ISD::FMINNUM, SDLoc(N), VT, N1, N0);
- }
return SDValue();
}
SDValue DAGCombiner::visitFMAXNUM(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- const ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
- const ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+ EVT VT = N->getValueType(0);
+ const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
+ const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
if (N0CFP && N1CFP) {
const APFloat &C0 = N0CFP->getValueAPF();
const APFloat &C1 = N1CFP->getValueAPF();
- return DAG.getConstantFP(maxnum(C0, C1), SDLoc(N), N->getValueType(0));
+ return DAG.getConstantFP(maxnum(C0, C1), SDLoc(N), VT);
}
- if (N0CFP) {
- EVT VT = N->getValueType(0);
- // Canonicalize to constant on RHS.
+ // Canonicalize to constant on RHS.
+ if (isConstantFPBuildVectorOrConstantFP(N0) &&
+ !isConstantFPBuildVectorOrConstantFP(N1))
return DAG.getNode(ISD::FMAXNUM, SDLoc(N), VT, N1, N0);
- }
return SDValue();
}
};
} // namespace
+// This is a helper function for visitMUL to check the profitability
+// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
+// MulNode is the original multiply, AddNode is (add x, c1),
+// and ConstNode is c2.
+//
+// If the (add x, c1) has multiple uses, we could increase
+// the number of adds if we make this transformation.
+// It would only be worth doing this if we can remove a
+// multiply in the process. Check for that here.
+// To illustrate:
+// (A + c1) * c3
+// (A + c2) * c3
+// We're checking for cases where we have common "c3 * A" expressions.
+bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode,
+ SDValue &AddNode,
+ SDValue &ConstNode) {
+ APInt Val;
+
+ // If the add only has one use, this would be OK to do.
+ if (AddNode.getNode()->hasOneUse())
+ return true;
+
+ // Walk all the users of the constant with which we're multiplying.
+ for (SDNode *Use : ConstNode->uses()) {
+
+ if (Use == MulNode) // This use is the one we're on right now. Skip it.
+ continue;
+
+ if (Use->getOpcode() == ISD::MUL) { // We have another multiply use.
+ SDNode *OtherOp;
+ SDNode *MulVar = AddNode.getOperand(0).getNode();
+
+ // OtherOp is what we're multiplying against the constant.
+ if (Use->getOperand(0) == ConstNode)
+ OtherOp = Use->getOperand(1).getNode();
+ else
+ OtherOp = Use->getOperand(0).getNode();
+
+ // Check to see if multiply is with the same operand of our "add".
+ //
+ // ConstNode = CONST
+ // Use = ConstNode * A <-- visiting Use. OtherOp is A.
+ // ...
+ // AddNode = (A + c1) <-- MulVar is A.
+ // = AddNode * ConstNode <-- current visiting instruction.
+ //
+ // If we make this transformation, we will have a common
+ // multiply (ConstNode * A) that we can save.
+ if (OtherOp == MulVar)
+ return true;
+
+ // Now check to see if a future expansion will give us a common
+ // multiply.
+ //
+ // ConstNode = CONST
+ // AddNode = (A + c1)
+ // ... = AddNode * ConstNode <-- current visiting instruction.
+ // ...
+ // OtherOp = (A + c2)
+ // Use = OtherOp * ConstNode <-- visiting Use.
+ //
+ // If we make this transformation, we will have a common
+ // multiply (CONST * A) after we also do the same transformation
+ // to the "t2" instruction.
+ if (OtherOp->getOpcode() == ISD::ADD &&
+ isConstantIntBuildVectorOrConstantInt(OtherOp->getOperand(1)) &&
+ OtherOp->getOperand(0).getNode() == MulVar)
+ return true;
+ }
+ }
+
+ // Didn't find a case where this would be profitable.
+ return false;
+}
+
SDValue DAGCombiner::getMergedConstantVectorStore(SelectionDAG &DAG,
SDLoc SL,
ArrayRef<MemOpLink> Stores,
SDValue Chain = St->getChain();
for (auto I = Chain->use_begin(), E = Chain->use_end(); I != E; ++I) {
if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I)) {
+ if (I.getOperandNo() != 0)
+ continue;
if (OtherST->isVolatile() || OtherST->isIndexed())
continue;
if (Index->getMemoryVT() != MemVT)
break;
+ // We do not allow under-aligned stores in order to prevent
+ // overriding stores. NOTE: this is a bad hack. Alignment SHOULD
+ // be irrelevant here; what MATTERS is that we not move memory
+ // operations that potentially overlap past each-other.
+ if (Index->getAlignment() < MemVT.getStoreSize())
+ break;
+
// We found a potential memory operand to merge.
StoreNodes.push_back(MemOpLink(Index, Ptr.Offset, Seq++));
if (StoreNodes.size() < 2)
return false;
- // Sort the memory operands according to their distance from the base pointer.
+ // Sort the memory operands according to their distance from the
+ // base pointer. As a secondary criteria: make sure stores coming
+ // later in the code come first in the list. This is important for
+ // the non-UseAA case, because we're merging stores into the FINAL
+ // store along a chain which potentially contains aliasing stores.
+ // Thus, if there are multiple stores to the same address, the last
+ // one can be considered for merging but not the others.
std::sort(StoreNodes.begin(), StoreNodes.end(),
[](MemOpLink LHS, MemOpLink RHS) {
return LHS.OffsetFromBase < RHS.OffsetFromBase ||
(LHS.OffsetFromBase == RHS.OffsetFromBase &&
- LHS.SequenceNum > RHS.SequenceNum);
+ LHS.SequenceNum < RHS.SequenceNum);
});
// Scan the memory operations on the chain and find the first non-consecutive
break;
}
- bool Alias = false;
// Check if this store interferes with any of the loads that we found.
- for (unsigned ld = 0, lde = AliasLoadNodes.size(); ld < lde; ++ld)
- if (isAlias(AliasLoadNodes[ld], StoreNodes[i].MemNode)) {
- Alias = true;
- break;
- }
- // We found a load that alias with this store. Stop the sequence.
- if (Alias)
+ // If we find a load that alias with this store. Stop the sequence.
+ if (std::any_of(AliasLoadNodes.begin(), AliasLoadNodes.end(),
+ [&](LSBaseSDNode* Ldn) {
+ return isAlias(Ldn, StoreNodes[i].MemNode);
+ }))
break;
// Mark this node as useful.
StartAddress = LoadNodes[0].OffsetFromBase;
SDValue FirstChain = FirstLoad->getChain();
for (unsigned i = 1; i < LoadNodes.size(); ++i) {
- // All loads much share the same chain.
+ // All loads must share the same chain.
if (LoadNodes[i].MemNode->getChain() != FirstChain)
break;
SDLoc LoadDL(LoadNodes[0].MemNode);
SDLoc StoreDL(StoreNodes[0].MemNode);
- // The merged loads are required to have the same chain, so using the first's
- // chain is acceptable.
+ // The merged loads are required to have the same incoming chain, so
+ // using the first's chain is acceptable.
SDValue NewLoad = DAG.getLoad(
JointMemOpVT, LoadDL, FirstLoad->getChain(), FirstLoad->getBasePtr(),
FirstLoad->getPointerInfo(), false, false, false, FirstLoadAlign);
NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(),
FirstInChain->getPointerInfo(), false, false, FirstStoreAlign);
- // Replace one of the loads with the new load.
- LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[0].MemNode);
- DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1),
- SDValue(NewLoad.getNode(), 1));
-
- // Remove the rest of the load chains.
- for (unsigned i = 1; i < NumElem ; ++i) {
- // Replace all chain users of the old load nodes with the chain of the new
- // load node.
+ // Transfer chain users from old loads to the new load.
+ for (unsigned i = 0; i < NumElem; ++i) {
LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
- DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), Ld->getChain());
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1),
+ SDValue(NewLoad.getNode(), 1));
}
// Replace the last store with the new store.
}
SDValue EltNo = N->getOperand(1);
- bool ConstEltNo = isa<ConstantSDNode>(EltNo);
+ ConstantSDNode *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo);
+
+ // extract_vector_elt (build_vector x, y), 1 -> y
+ if (ConstEltNo &&
+ InVec.getOpcode() == ISD::BUILD_VECTOR &&
+ TLI.isTypeLegal(VT) &&
+ (InVec.hasOneUse() ||
+ TLI.aggressivelyPreferBuildVectorSources(VT))) {
+ SDValue Elt = InVec.getOperand(ConstEltNo->getZExtValue());
+ EVT InEltVT = Elt.getValueType();
+
+ // Sometimes build_vector's scalar input types do not match result type.
+ if (NVT == InEltVT)
+ return Elt;
+
+ // TODO: It may be useful to truncate if free if the build_vector implicitly
+ // converts.
+ }
// Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
// We only perform this optimization before the op legalization phase because
// patterns. For example on AVX, extracting elements from a wide vector
// without using extract_subvector. However, if we can find an underlying
// scalar value, then we can always use that.
- if (InVec.getOpcode() == ISD::VECTOR_SHUFFLE
- && ConstEltNo) {
- int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
+ if (ConstEltNo && InVec.getOpcode() == ISD::VECTOR_SHUFFLE) {
int NumElem = VT.getVectorNumElements();
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(InVec);
// Find the new index to extract from.
- int OrigElt = SVOp->getMaskElt(Elt);
+ int OrigElt = SVOp->getMaskElt(ConstEltNo->getZExtValue());
// Extracting an undef index is undef.
if (OrigElt == -1)
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
+ SDValue Ops[] = {LHS, RHS};
- // If the LHS and RHS are BUILD_VECTOR nodes, see if we can constant fold
- // this operation.
- if (LHS.getOpcode() == ISD::BUILD_VECTOR &&
- RHS.getOpcode() == ISD::BUILD_VECTOR) {
- // Check if both vectors are constants. If not bail out.
- if (!(cast<BuildVectorSDNode>(LHS)->isConstant() &&
- cast<BuildVectorSDNode>(RHS)->isConstant()))
- return SDValue();
-
- SmallVector<SDValue, 8> Ops;
- for (unsigned i = 0, e = LHS.getNumOperands(); i != e; ++i) {
- SDValue LHSOp = LHS.getOperand(i);
- SDValue RHSOp = RHS.getOperand(i);
-
- // Can't fold divide by zero.
- if (N->getOpcode() == ISD::SDIV || N->getOpcode() == ISD::UDIV ||
- N->getOpcode() == ISD::FDIV) {
- if (isNullConstant(RHSOp) || (RHSOp.getOpcode() == ISD::ConstantFP &&
- cast<ConstantFPSDNode>(RHSOp.getNode())->isZero()))
- break;
- }
-
- EVT VT = LHSOp.getValueType();
- EVT RVT = RHSOp.getValueType();
- EVT ST = VT;
-
- if (RVT.getSizeInBits() < VT.getSizeInBits())
- ST = RVT;
-
- // Integer BUILD_VECTOR operands may have types larger than the element
- // size (e.g., when the element type is not legal). Prior to type
- // legalization, the types may not match between the two BUILD_VECTORS.
- // Truncate the operands to make them match.
- if (VT.getSizeInBits() != LHS.getValueType().getScalarSizeInBits()) {
- EVT ScalarT = LHS.getValueType().getScalarType();
- LHSOp = DAG.getNode(ISD::TRUNCATE, SDLoc(N), ScalarT, LHSOp);
- VT = LHSOp.getValueType();
- }
- if (RVT.getSizeInBits() != RHS.getValueType().getScalarSizeInBits()) {
- EVT ScalarT = RHS.getValueType().getScalarType();
- RHSOp = DAG.getNode(ISD::TRUNCATE, SDLoc(N), ScalarT, RHSOp);
- RVT = RHSOp.getValueType();
- }
-
- SDValue FoldOp = DAG.getNode(N->getOpcode(), SDLoc(LHS), VT,
- LHSOp, RHSOp, N->getFlags());
-
- // We need the resulting constant to be legal if we are in a phase after
- // legalization, so zero extend to the smallest operand type if required.
- if (ST != VT && Level != BeforeLegalizeTypes)
- FoldOp = DAG.getNode(ISD::ANY_EXTEND, SDLoc(LHS), ST, FoldOp);
-
- if (FoldOp.getOpcode() != ISD::UNDEF &&
- FoldOp.getOpcode() != ISD::Constant &&
- FoldOp.getOpcode() != ISD::ConstantFP)
- break;
- Ops.push_back(FoldOp);
- AddToWorklist(FoldOp.getNode());
- }
-
- if (Ops.size() == LHS.getNumOperands())
- return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), LHS.getValueType(), Ops);
- }
+ // See if we can constant fold the vector operation.
+ if (SDValue Fold = DAG.FoldConstantVectorArithmetic(
+ N->getOpcode(), SDLoc(LHS), LHS.getValueType(), Ops, N->getFlags()))
+ return Fold;
// Try to convert a constant mask AND into a shuffle clear mask.
if (SDValue Shuffle = XformToShuffleWithZero(N))
SDValue Chain = Chains.pop_back_val();
// For TokenFactor nodes, look at each operand and only continue up the
- // chain until we find two aliases. If we've seen two aliases, assume we'll
- // find more and revert to original chain since the xform is unlikely to be
- // profitable.
+ // chain until we reach the depth limit.
//
// FIXME: The depth check could be made to return the last non-aliasing
// chain we found before we hit a tokenfactor rather than the original
// chain.
- if (Depth > 6 || Aliases.size() == 2) {
+ if (Depth > TLI.getGatherAllAliasesMaxDepth()) {
Aliases.clear();
Aliases.push_back(OriginalChain);
return;