SDValue visitFADD(SDNode *N);
SDValue visitFSUB(SDNode *N);
SDValue visitFMUL(SDNode *N);
+ SDValue visitFMA(SDNode *N);
SDValue visitFDIV(SDNode *N);
SDValue visitFREM(SDNode *N);
SDValue visitFCOPYSIGN(SDNode *N);
SDValue visitFP_EXTEND(SDNode *N);
SDValue visitFNEG(SDNode *N);
SDValue visitFABS(SDNode *N);
+ SDValue visitFCEIL(SDNode *N);
+ SDValue visitFTRUNC(SDNode *N);
+ SDValue visitFFLOOR(SDNode *N);
SDValue visitBRCOND(SDNode *N);
SDValue visitBR_CC(SDNode *N);
SDValue visitLOAD(SDNode *N);
class WorkListRemover : public SelectionDAG::DAGUpdateListener {
DAGCombiner &DC;
public:
- explicit WorkListRemover(DAGCombiner &dc) : DC(dc) {}
+ explicit WorkListRemover(DAGCombiner &dc)
+ : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
virtual void NodeDeleted(SDNode *N, SDNode *E) {
DC.removeFromWorkList(N);
}
-
- virtual void NodeUpdated(SDNode *N) {
- // Ignore updates.
- }
};
}
N->getValueType(i) == To[i].getValueType()) &&
"Cannot combine value to value of different type!"));
WorkListRemover DeadNodes(*this);
- DAG.ReplaceAllUsesWith(N, To, &DeadNodes);
-
+ DAG.ReplaceAllUsesWith(N, To);
if (AddTo) {
// Push the new nodes and any users onto the worklist
for (unsigned i = 0, e = NumTo; i != e; ++i) {
// Replace all uses. If any nodes become isomorphic to other nodes and
// are deleted, make sure to remove them from our worklist.
WorkListRemover DeadNodes(*this);
- DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New, &DeadNodes);
+ DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
// Push the new node and any (possibly new) users onto the worklist.
AddToWorkList(TLO.New.getNode());
Trunc.getNode()->dump(&DAG);
dbgs() << '\n');
WorkListRemover DeadNodes(*this);
- DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc, &DeadNodes);
- DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1),
- &DeadNodes);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
removeFromWorkList(Load);
DAG.DeleteNode(Load);
AddToWorkList(Trunc.getNode());
Result.getNode()->dump(&DAG);
dbgs() << '\n');
WorkListRemover DeadNodes(*this);
- DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result, &DeadNodes);
- DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1), &DeadNodes);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
removeFromWorkList(N);
DAG.DeleteNode(N);
AddToWorkList(Result.getNode());
DAG.TransferDbgValues(SDValue(N, 0), RV);
WorkListRemover DeadNodes(*this);
if (N->getNumValues() == RV.getNode()->getNumValues())
- DAG.ReplaceAllUsesWith(N, RV.getNode(), &DeadNodes);
+ DAG.ReplaceAllUsesWith(N, RV.getNode());
else {
assert(N->getValueType(0) == RV.getValueType() &&
N->getNumValues() == 1 && "Type mismatch");
SDValue OpV = RV;
- DAG.ReplaceAllUsesWith(N, &OpV, &DeadNodes);
+ DAG.ReplaceAllUsesWith(N, &OpV);
}
// Push the new node and any users onto the worklist
// If the root changed (e.g. it was a dead load, update the root).
DAG.setRoot(Dummy.getValue());
+ DAG.RemoveDeadNodes();
}
SDValue DAGCombiner::visit(SDNode *N) {
case ISD::FADD: return visitFADD(N);
case ISD::FSUB: return visitFSUB(N);
case ISD::FMUL: return visitFMUL(N);
+ case ISD::FMA: return visitFMA(N);
case ISD::FDIV: return visitFDIV(N);
case ISD::FREM: return visitFREM(N);
case ISD::FCOPYSIGN: return visitFCOPYSIGN(N);
case ISD::FP_EXTEND: return visitFP_EXTEND(N);
case ISD::FNEG: return visitFNEG(N);
case ISD::FABS: return visitFABS(N);
+ case ISD::FFLOOR: return visitFFLOOR(N);
+ case ISD::FCEIL: return visitFCEIL(N);
+ case ISD::FTRUNC: return visitFTRUNC(N);
case ISD::BRCOND: return visitBRCOND(N);
case ISD::BR_CC: return visitBR_CC(N);
case ISD::LOAD: return visitLOAD(N);
// Replacing results may cause a different MERGE_VALUES to suddenly
// be CSE'd with N, and carry its uses with it. Iterate until no
// uses remain, to ensure that the node can be safely deleted.
+ // First add the users of this node to the work list so that they
+ // can be tried again once they have new operands.
+ AddUsersToWorkList(N);
do {
for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
- DAG.ReplaceAllUsesOfValueWith(SDValue(N, i), N->getOperand(i),
- &DeadNodes);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, i), N->getOperand(i));
} while (!N->use_empty());
removeFromWorkList(N);
DAG.DeleteNode(N);
if (VT.isInteger() && !VT.isVector()) {
APInt LHSZero, LHSOne;
APInt RHSZero, RHSOne;
- APInt Mask = APInt::getAllOnesValue(VT.getScalarType().getSizeInBits());
- DAG.ComputeMaskedBits(N0, Mask, LHSZero, LHSOne);
+ DAG.ComputeMaskedBits(N0, LHSZero, LHSOne);
if (LHSZero.getBoolValue()) {
- DAG.ComputeMaskedBits(N1, Mask, RHSZero, RHSOne);
+ DAG.ComputeMaskedBits(N1, RHSZero, RHSOne);
// If all possibly-set bits on the LHS are clear on the RHS, return an OR.
// If all possibly-set bits on the RHS are clear on the LHS, return an OR.
- if ((RHSZero & (~LHSZero & Mask)) == (~LHSZero & Mask) ||
- (LHSZero & (~RHSZero & Mask)) == (~RHSZero & Mask))
+ if ((RHSZero & ~LHSZero) == ~LHSZero || (LHSZero & ~RHSZero) == ~RHSZero)
return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N1);
}
}
// fold (addc a, b) -> (or a, b), CARRY_FALSE iff a and b share no bits.
APInt LHSZero, LHSOne;
APInt RHSZero, RHSOne;
- APInt Mask = APInt::getAllOnesValue(VT.getScalarType().getSizeInBits());
- DAG.ComputeMaskedBits(N0, Mask, LHSZero, LHSOne);
+ DAG.ComputeMaskedBits(N0, LHSZero, LHSOne);
if (LHSZero.getBoolValue()) {
- DAG.ComputeMaskedBits(N1, Mask, RHSZero, RHSOne);
+ DAG.ComputeMaskedBits(N1, RHSZero, RHSOne);
// If all possibly-set bits on the LHS are clear on the RHS, return an OR.
// If all possibly-set bits on the RHS are clear on the LHS, return an OR.
- if ((RHSZero & (~LHSZero & Mask)) == (~LHSZero & Mask) ||
- (LHSZero & (~RHSZero & Mask)) == (~RHSZero & Mask))
+ if ((RHSZero & ~LHSZero) == ~LHSZero || (LHSZero & ~RHSZero) == ~RHSZero)
return CombineTo(N, DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N1),
DAG.getNode(ISD::CARRY_FALSE,
N->getDebugLoc(), MVT::Glue));
if (N1.getOpcode() == ISD::ADD && N0C && N1C1) {
SDValue NewC = DAG.getConstant((N0C->getAPIntValue() - N1C1->getAPIntValue()), VT);
return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, NewC,
- N1.getOperand(0));
+ N1.getOperand(0));
}
// fold ((A+(B+or-C))-B) -> A+or-C
if (N0.getOpcode() == ISD::ADD &&
ORNode, N0.getOperand(1));
}
+ // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
+ // Only perform this optimization after type legalization and before
+ // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
+ // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
+ // we don't want to undo this promotion.
+ // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
+ // on scalars.
+ if ((N0.getOpcode() == ISD::BITCAST || N0.getOpcode() == ISD::SCALAR_TO_VECTOR)
+ && Level == AfterLegalizeTypes) {
+ SDValue In0 = N0.getOperand(0);
+ SDValue In1 = N1.getOperand(0);
+ EVT In0Ty = In0.getValueType();
+ EVT In1Ty = In1.getValueType();
+ // If both incoming values are integers, and the original types are the same.
+ if (In0Ty.isInteger() && In1Ty.isInteger() && In0Ty == In1Ty) {
+ SDValue Op = DAG.getNode(N->getOpcode(), N->getDebugLoc(), In0Ty, In0, In1);
+ SDValue BC = DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, Op);
+ AddToWorkList(Op.getNode());
+ return BC;
+ }
+ }
+
+ // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
+ // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
+ // If both shuffles use the same mask, and both shuffle within a single
+ // vector, then it is worthwhile to move the swizzle after the operation.
+ // The type-legalizer generates this pattern when loading illegal
+ // vector types from memory. In many cases this allows additional shuffle
+ // optimizations.
+ if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG &&
+ N0.getOperand(1).getOpcode() == ISD::UNDEF &&
+ N1.getOperand(1).getOpcode() == ISD::UNDEF) {
+ ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(N0);
+ ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(N1);
+
+ assert(N0.getOperand(0).getValueType() == N1.getOperand(1).getValueType() &&
+ "Inputs to shuffles are not the same type");
+
+ unsigned NumElts = VT.getVectorNumElements();
+
+ // Check that both shuffles use the same mask. The masks are known to be of
+ // the same length because the result vector type is the same.
+ bool SameMask = true;
+ for (unsigned i = 0; i != NumElts; ++i) {
+ int Idx0 = SVN0->getMaskElt(i);
+ int Idx1 = SVN1->getMaskElt(i);
+ if (Idx0 != Idx1) {
+ SameMask = false;
+ break;
+ }
+ }
+
+ if (SameMask) {
+ SDValue Op = DAG.getNode(N->getOpcode(), N->getDebugLoc(), VT,
+ N0.getOperand(0), N1.getOperand(0));
+ AddToWorkList(Op.getNode());
+ return DAG.getVectorShuffle(VT, N->getDebugLoc(), Op,
+ DAG.getUNDEF(VT), &SVN0->getMask()[0]);
+ }
+ }
+
return SDValue();
}
Load->getOffset(), Load->getMemoryVT(),
Load->getMemOperand());
// Replace uses of the EXTLOAD with the new ZEXTLOAD.
- CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
+ if (Load->getNumValues() == 3) {
+ // PRE/POST_INC loads have 3 values.
+ SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1),
+ NewLoad.getValue(2) };
+ CombineTo(Load, To, 3, true);
+ } else {
+ CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
+ }
}
// Fold the AND away, taking care not to fold to the old load node if we
}
}
+ if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
+ VT.getSizeInBits() <= 64) {
+ if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
+ APInt ADDC = ADDI->getAPIntValue();
+ if (!TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
+ // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
+ // immediate for an add, but it is legal if its top c2 bits are set,
+ // transform the ADD so the immediate doesn't need to be materialized
+ // in a register.
+ if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
+ APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
+ SRLI->getZExtValue());
+ if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
+ ADDC |= Mask;
+ if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
+ SDValue NewAdd =
+ DAG.getNode(ISD::ADD, N0.getDebugLoc(), VT,
+ N0.getOperand(0), DAG.getConstant(ADDC, VT));
+ CombineTo(N0.getNode(), NewAdd);
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+ }
+ }
+ }
+ }
+
+
return SDValue();
}
if (N1C && N0.getOpcode() == ISD::CTLZ &&
N1C->getAPIntValue() == Log2_32(VT.getSizeInBits())) {
APInt KnownZero, KnownOne;
- APInt Mask = APInt::getAllOnesValue(VT.getScalarType().getSizeInBits());
- DAG.ComputeMaskedBits(N0.getOperand(0), Mask, KnownZero, KnownOne);
+ DAG.ComputeMaskedBits(N0.getOperand(0), KnownZero, KnownOne);
// If any of the input bits are KnownOne, then the input couldn't be all
// zeros, thus the result of the srl will always be zero.
// If all of the bits input the to ctlz node are known to be zero, then
// the result of the ctlz is "32" and the result of the shift is one.
- APInt UnknownBits = ~KnownZero & Mask;
+ APInt UnknownBits = ~KnownZero;
if (UnknownBits == 0) return DAG.getConstant(1, VT);
// Otherwise, check to see if there is exactly one bit input to the ctlz.
// Only do this before legalize for now.
if (VT.isVector() && !LegalOperations) {
EVT N0VT = N0.getOperand(0).getValueType();
- // We know that the # elements of the results is the same as the
- // # elements of the compare (and the # elements of the compare result
- // for that matter). Check to see that they are the same size. If so,
- // we know that the element size of the sext'd result matches the
- // element size of the compare operands.
- if (VT.getSizeInBits() == N0VT.getSizeInBits())
+ // On some architectures (such as SSE/NEON/etc) the SETCC result type is
+ // of the same size as the compared operands. Only optimize sext(setcc())
+ // if this is the case.
+ EVT SVT = TLI.getSetCCResultType(N0VT);
+
+ // We know that the # elements of the results is the same as the
+ // # elements of the compare (and the # elements of the compare result
+ // for that matter). Check to see that they are the same size. If so,
+ // we know that the element size of the sext'd result matches the
+ // element size of the compare operands.
+ if (VT.getSizeInBits() == SVT.getSizeInBits())
return DAG.getSetCC(N->getDebugLoc(), VT, N0.getOperand(0),
N0.getOperand(1),
cast<CondCodeSDNode>(N0.getOperand(2))->get());
EVT MatchingVectorType =
EVT::getVectorVT(*DAG.getContext(), MatchingElementType,
N0VT.getVectorNumElements());
- SDValue VsetCC =
- DAG.getSetCC(N->getDebugLoc(), MatchingVectorType, N0.getOperand(0),
- N0.getOperand(1),
- cast<CondCodeSDNode>(N0.getOperand(2))->get());
- return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT);
+
+ if (SVT == MatchingVectorType) {
+ SDValue VsetCC = DAG.getSetCC(N->getDebugLoc(), MatchingVectorType,
+ N0.getOperand(0), N0.getOperand(1),
+ cast<CondCodeSDNode>(N0.getOperand(2))->get());
+ return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT);
+ }
}
}
return SDValue();
}
+// isTruncateOf - If N is a truncate of some other value, return true, record
+// the value being truncated in Op and which of Op's bits are zero in KnownZero.
+// This function computes KnownZero to avoid a duplicated call to
+// ComputeMaskedBits in the caller.
+static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,
+ APInt &KnownZero) {
+ APInt KnownOne;
+ if (N->getOpcode() == ISD::TRUNCATE) {
+ Op = N->getOperand(0);
+ DAG.ComputeMaskedBits(Op, KnownZero, KnownOne);
+ return true;
+ }
+
+ if (N->getOpcode() != ISD::SETCC || N->getValueType(0) != MVT::i1 ||
+ cast<CondCodeSDNode>(N->getOperand(2))->get() != ISD::SETNE)
+ return false;
+
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+ assert(Op0.getValueType() == Op1.getValueType());
+
+ ConstantSDNode *COp0 = dyn_cast<ConstantSDNode>(Op0);
+ ConstantSDNode *COp1 = dyn_cast<ConstantSDNode>(Op1);
+ if (COp0 && COp0->isNullValue())
+ Op = Op1;
+ else if (COp1 && COp1->isNullValue())
+ Op = Op0;
+ else
+ return false;
+
+ DAG.ComputeMaskedBits(Op, KnownZero, KnownOne);
+
+ if (!(KnownZero | APInt(Op.getValueSizeInBits(), 1)).isAllOnesValue())
+ return false;
+
+ return true;
+}
+
SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
// (zext (truncate x)) -> (truncate x)
// This is valid when the truncated bits of x are already zero.
// FIXME: We should extend this to work for vectors too.
- if (N0.getOpcode() == ISD::TRUNCATE && !VT.isVector()) {
- SDValue Op = N0.getOperand(0);
- APInt TruncatedBits
- = APInt::getBitsSet(Op.getValueSizeInBits(),
- N0.getValueSizeInBits(),
- std::min(Op.getValueSizeInBits(),
- VT.getSizeInBits()));
- APInt KnownZero, KnownOne;
- DAG.ComputeMaskedBits(Op, TruncatedBits, KnownZero, KnownOne);
- if (TruncatedBits == KnownZero) {
+ SDValue Op;
+ APInt KnownZero;
+ if (!VT.isVector() && isTruncateOf(DAG, N0, Op, KnownZero)) {
+ APInt TruncatedBits =
+ (Op.getValueSizeInBits() == N0.getValueSizeInBits()) ?
+ APInt(Op.getValueSizeInBits(), 0) :
+ APInt::getBitsSet(Op.getValueSizeInBits(),
+ N0.getValueSizeInBits(),
+ std::min(Op.getValueSizeInBits(),
+ VT.getSizeInBits()));
+ if (TruncatedBits == (KnownZero & TruncatedBits)) {
if (VT.bitsGT(Op.getValueType()))
return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, Op);
if (VT.bitsLT(Op.getValueType()))
SDValue Op = N0.getOperand(0);
if (Op.getValueType().bitsLT(VT)) {
Op = DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, Op);
+ AddToWorkList(Op.getNode());
} else if (Op.getValueType().bitsGT(VT)) {
Op = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Op);
+ AddToWorkList(Op.getNode());
}
return DAG.getZeroExtendInReg(Op, N->getDebugLoc(),
N0.getValueType().getScalarType());
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
EVT PtrType = N0.getOperand(1).getValueType();
+ if (PtrType == MVT::Untyped || PtrType.isExtended())
+ // It's not possible to generate a constant of extended or untyped type.
+ return SDValue();
+
// For big endian targets, we need to adjust the offset to the pointer to
// load the correct bytes.
if (TLI.isBigEndian()) {
// Replace the old load's chain with the new load's chain.
WorkListRemover DeadNodes(*this);
- DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1),
- &DeadNodes);
+ DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
// Shift the result left, if we've swallowed a left shift.
SDValue Result = Load;
SDValue EltNo = N0->getOperand(1);
if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) {
int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
-
+ EVT IndexTy = N0->getOperand(1).getValueType();
int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1));
SDValue V = DAG.getNode(ISD::BITCAST, N->getDebugLoc(),
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
N->getDebugLoc(), TrTy, V,
- DAG.getConstant(Index, MVT::i32));
+ DAG.getConstant(Index, IndexTy));
}
}
// fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
// fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
// This often reduces constant pool loads.
- if ((N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FABS) &&
+ if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(VT)) ||
+ (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(VT))) &&
N0.getNode()->hasOneUse() && VT.isInteger() && !VT.isVector()) {
SDValue NewConv = DAG.getNode(ISD::BITCAST, N0.getDebugLoc(), VT,
N0.getOperand(0));
if (FoldedVOp.getNode()) return FoldedVOp;
}
- // fold (fadd c1, c2) -> (fadd c1, c2)
+ // fold (fadd c1, c2) -> c1 + c2
if (N0CFP && N1CFP && VT != MVT::ppcf128)
return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0, N1);
// canonicalize constant to RHS
DAG.getNode(ISD::FADD, N->getDebugLoc(), VT,
N0.getOperand(1), N1));
+ // FADD -> FMA combines:
+ if ((DAG.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast ||
+ DAG.getTarget().Options.UnsafeFPMath) &&
+ DAG.getTarget().getTargetLowering()->isFMAFasterThanMulAndAdd(VT) &&
+ TLI.isOperationLegalOrCustom(ISD::FMA, VT)) {
+
+ // fold (fadd (fmul x, y), z) -> (fma x, y, z)
+ if (N0.getOpcode() == ISD::FMUL && N0->hasOneUse()) {
+ return DAG.getNode(ISD::FMA, N->getDebugLoc(), VT,
+ N0.getOperand(0), N0.getOperand(1), N1);
+ }
+
+ // fold (fadd x, (fmul y, z)) -> (fma x, y, z)
+ // Note: Commutes FADD operands.
+ if (N1.getOpcode() == ISD::FMUL && N1->hasOneUse()) {
+ return DAG.getNode(ISD::FMA, N->getDebugLoc(), VT,
+ N1.getOperand(0), N1.getOperand(1), N0);
+ }
+ }
+
return SDValue();
}
ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
EVT VT = N->getValueType(0);
+ DebugLoc dl = N->getDebugLoc();
// fold vector ops
if (VT.isVector()) {
if (isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options))
return GetNegatedExpression(N1, DAG, LegalOperations);
if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
- return DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT, N1);
+ return DAG.getNode(ISD::FNEG, dl, VT, N1);
}
// fold (fsub A, (fneg B)) -> (fadd A, B)
if (isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options))
- return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0,
+ return DAG.getNode(ISD::FADD, dl, VT, N0,
GetNegatedExpression(N1, DAG, LegalOperations));
// If 'unsafe math' is enabled, fold
+ // (fsub x, x) -> 0.0 &
// (fsub x, (fadd x, y)) -> (fneg y) &
// (fsub x, (fadd y, x)) -> (fneg y)
if (DAG.getTarget().Options.UnsafeFPMath) {
+ if (N0 == N1)
+ return DAG.getConstantFP(0.0f, VT);
+
if (N1.getOpcode() == ISD::FADD) {
SDValue N10 = N1->getOperand(0);
SDValue N11 = N1->getOperand(1);
}
}
+ // FSUB -> FMA combines:
+ if ((DAG.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast ||
+ DAG.getTarget().Options.UnsafeFPMath) &&
+ DAG.getTarget().getTargetLowering()->isFMAFasterThanMulAndAdd(VT) &&
+ TLI.isOperationLegalOrCustom(ISD::FMA, VT)) {
+
+ // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
+ if (N0.getOpcode() == ISD::FMUL && N0->hasOneUse()) {
+ return DAG.getNode(ISD::FMA, dl, VT,
+ N0.getOperand(0), N0.getOperand(1),
+ DAG.getNode(ISD::FNEG, dl, VT, N1));
+ }
+
+ // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
+ // Note: Commutes FSUB operands.
+ if (N1.getOpcode() == ISD::FMUL && N1->hasOneUse()) {
+ return DAG.getNode(ISD::FMA, dl, VT,
+ DAG.getNode(ISD::FNEG, dl, VT,
+ N1.getOperand(0)),
+ N1.getOperand(1), N0);
+ }
+
+ // fold (fsub (-(fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
+ if (N0.getOpcode() == ISD::FNEG &&
+ N0.getOperand(0).getOpcode() == ISD::FMUL &&
+ N0->hasOneUse() && N0.getOperand(0).hasOneUse()) {
+ SDValue N00 = N0.getOperand(0).getOperand(0);
+ SDValue N01 = N0.getOperand(0).getOperand(1);
+ return DAG.getNode(ISD::FMA, dl, VT,
+ DAG.getNode(ISD::FNEG, dl, VT, N00), N01,
+ DAG.getNode(ISD::FNEG, dl, VT, N1));
+ }
+ }
+
return SDValue();
}
if (DAG.getTarget().Options.UnsafeFPMath &&
ISD::isBuildVectorAllZeros(N1.getNode()))
return N1;
+ // fold (fmul A, 1.0) -> A
+ if (N1CFP && N1CFP->isExactlyValue(1.0))
+ return N0;
// fold (fmul X, 2.0) -> (fadd X, X)
if (N1CFP && N1CFP->isExactlyValue(+2.0))
return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0, N0);
return SDValue();
}
+SDValue DAGCombiner::visitFMA(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue N2 = N->getOperand(2);
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+ EVT VT = N->getValueType(0);
+
+ if (N0CFP && N0CFP->isExactlyValue(1.0))
+ return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N1, N2);
+ if (N1CFP && N1CFP->isExactlyValue(1.0))
+ return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0, N2);
+
+ // Canonicalize (fma c, x, y) -> (fma x, c, y)
+ if (N0CFP && !N1CFP)
+ return DAG.getNode(ISD::FMA, N->getDebugLoc(), VT, N1, N0, N2);
+
+ return SDValue();
+}
+
SDValue DAGCombiner::visitFDIV(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
if (N0CFP && N1CFP && VT != MVT::ppcf128)
return DAG.getNode(ISD::FDIV, N->getDebugLoc(), VT, N0, N1);
+ // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
+ if (N1CFP && VT != MVT::ppcf128 && DAG.getTarget().Options.UnsafeFPMath) {
+ // Compute the reciprocal 1.0 / c2.
+ APFloat N1APF = N1CFP->getValueAPF();
+ APFloat Recip(N1APF.getSemantics(), 1); // 1.0
+ APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven);
+ // Only do the transform if the reciprocal is a legal fp immediate that
+ // isn't too nasty (eg NaN, denormal, ...).
+ if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty
+ (!LegalOperations ||
+ // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
+ // backend)... we should handle this gracefully after Legalize.
+ // TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT) ||
+ TLI.isOperationLegal(llvm::ISD::ConstantFP, VT) ||
+ TLI.isFPImmLegal(Recip, VT)))
+ return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N0,
+ DAG.getConstantFP(Recip, VT));
+ }
// (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI,
return DAG.getNode(ISD::UINT_TO_FP, N->getDebugLoc(), VT, N0);
}
+ // The next optimizations are desireable only if SELECT_CC can be lowered.
+ // Check against MVT::Other for SELECT_CC, which is a workaround for targets
+ // having to say they don't support SELECT_CC on every type the DAG knows
+ // about, since there is no way to mark an opcode illegal at all value types
+ // (See also visitSELECT)
+ if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, MVT::Other)) {
+ // fold (sint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
+ if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 &&
+ !VT.isVector() &&
+ (!LegalOperations ||
+ TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) {
+ SDValue Ops[] =
+ { N0.getOperand(0), N0.getOperand(1),
+ DAG.getConstantFP(-1.0, VT) , DAG.getConstantFP(0.0, VT),
+ N0.getOperand(2) };
+ return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), VT, Ops, 5);
+ }
+
+ // fold (sint_to_fp (zext (setcc x, y, cc))) ->
+ // (select_cc x, y, 1.0, 0.0,, cc)
+ if (N0.getOpcode() == ISD::ZERO_EXTEND &&
+ N0.getOperand(0).getOpcode() == ISD::SETCC &&!VT.isVector() &&
+ (!LegalOperations ||
+ TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) {
+ SDValue Ops[] =
+ { N0.getOperand(0).getOperand(0), N0.getOperand(0).getOperand(1),
+ DAG.getConstantFP(1.0, VT) , DAG.getConstantFP(0.0, VT),
+ N0.getOperand(0).getOperand(2) };
+ return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), VT, Ops, 5);
+ }
+ }
+
return SDValue();
}
return DAG.getNode(ISD::SINT_TO_FP, N->getDebugLoc(), VT, N0);
}
+ // The next optimizations are desireable only if SELECT_CC can be lowered.
+ // Check against MVT::Other for SELECT_CC, which is a workaround for targets
+ // having to say they don't support SELECT_CC on every type the DAG knows
+ // about, since there is no way to mark an opcode illegal at all value types
+ // (See also visitSELECT)
+ if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, MVT::Other)) {
+ // fold (uint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
+
+ if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
+ (!LegalOperations ||
+ TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) {
+ SDValue Ops[] =
+ { N0.getOperand(0), N0.getOperand(1),
+ DAG.getConstantFP(1.0, VT), DAG.getConstantFP(0.0, VT),
+ N0.getOperand(2) };
+ return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), VT, Ops, 5);
+ }
+ }
+
return SDValue();
}
// Transform fneg(bitconvert(x)) -> bitconvert(x^sign) to avoid loading
// constant pool values.
- if (N0.getOpcode() == ISD::BITCAST &&
+ if (!TLI.isFNegFree(VT) && N0.getOpcode() == ISD::BITCAST &&
!VT.isVector() &&
N0.getNode()->hasOneUse() &&
N0.getOperand(0).getValueType().isInteger()) {
return SDValue();
}
+SDValue DAGCombiner::visitFCEIL(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ EVT VT = N->getValueType(0);
+
+ // fold (fceil c1) -> fceil(c1)
+ if (N0CFP && VT != MVT::ppcf128)
+ return DAG.getNode(ISD::FCEIL, N->getDebugLoc(), VT, N0);
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ EVT VT = N->getValueType(0);
+
+ // fold (ftrunc c1) -> ftrunc(c1)
+ if (N0CFP && VT != MVT::ppcf128)
+ return DAG.getNode(ISD::FTRUNC, N->getDebugLoc(), VT, N0);
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ EVT VT = N->getValueType(0);
+
+ // fold (ffloor c1) -> ffloor(c1)
+ if (N0CFP && VT != MVT::ppcf128)
+ return DAG.getNode(ISD::FFLOOR, N->getDebugLoc(), VT, N0);
+
+ return SDValue();
+}
+
SDValue DAGCombiner::visitFABS(SDNode *N) {
SDValue N0 = N->getOperand(0);
ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
// Transform fabs(bitconvert(x)) -> bitconvert(x&~sign) to avoid loading
// constant pool values.
- if (N0.getOpcode() == ISD::BITCAST && N0.getNode()->hasOneUse() &&
+ if (!TLI.isFAbsFree(VT) &&
+ N0.getOpcode() == ISD::BITCAST && N0.getNode()->hasOneUse() &&
N0.getOperand(0).getValueType().isInteger() &&
!N0.getOperand(0).getValueType().isVector()) {
SDValue Int = N0.getOperand(0);
}
// Replace the uses of SRL with SETCC
WorkListRemover DeadNodes(*this);
- DAG.ReplaceAllUsesOfValueWith(N1, SetCC, &DeadNodes);
+ DAG.ReplaceAllUsesOfValueWith(N1, SetCC);
removeFromWorkList(N1.getNode());
DAG.DeleteNode(N1.getNode());
return SDValue(N, 0); // Return N so it doesn't get rechecked!
Tmp.getNode()->dump(&DAG);
dbgs() << '\n');
WorkListRemover DeadNodes(*this);
- DAG.ReplaceAllUsesOfValueWith(N1, Tmp, &DeadNodes);
+ DAG.ReplaceAllUsesOfValueWith(N1, Tmp);
removeFromWorkList(TheXor);
DAG.DeleteNode(TheXor);
return DAG.getNode(ISD::BRCOND, N->getDebugLoc(),
Equal ? ISD::SETEQ : ISD::SETNE);
// Replace the uses of XOR with SETCC
WorkListRemover DeadNodes(*this);
- DAG.ReplaceAllUsesOfValueWith(N1, SetCC, &DeadNodes);
+ DAG.ReplaceAllUsesOfValueWith(N1, SetCC);
removeFromWorkList(N1.getNode());
DAG.DeleteNode(N1.getNode());
return DAG.getNode(ISD::BRCOND, N->getDebugLoc(),
dbgs() << '\n');
WorkListRemover DeadNodes(*this);
if (isLoad) {
- DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0),
- &DeadNodes);
- DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2),
- &DeadNodes);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
} else {
- DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1),
- &DeadNodes);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
}
// Finally, since the node is now dead, remove it from the graph.
DAG.DeleteNode(N);
// Replace the uses of Ptr with uses of the updated base value.
- DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 1 : 0),
- &DeadNodes);
+ DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 1 : 0));
removeFromWorkList(Ptr.getNode());
DAG.DeleteNode(Ptr.getNode());
dbgs() << '\n');
WorkListRemover DeadNodes(*this);
if (isLoad) {
- DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0),
- &DeadNodes);
- DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2),
- &DeadNodes);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
} else {
- DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1),
- &DeadNodes);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
}
// Finally, since the node is now dead, remove it from the graph.
// Replace the uses of Use with uses of the updated base value.
DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0),
- Result.getValue(isLoad ? 1 : 0),
- &DeadNodes);
+ Result.getValue(isLoad ? 1 : 0));
removeFromWorkList(Op);
DAG.DeleteNode(Op);
return true;
Chain.getNode()->dump(&DAG);
dbgs() << "\n");
WorkListRemover DeadNodes(*this);
- DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain, &DeadNodes);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
if (N->use_empty()) {
removeFromWorkList(N);
Undef.getNode()->dump(&DAG);
dbgs() << " and 2 other values\n");
WorkListRemover DeadNodes(*this);
- DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef, &DeadNodes);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef);
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1),
- DAG.getUNDEF(N->getValueType(1)),
- &DeadNodes);
- DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain, &DeadNodes);
+ DAG.getUNDEF(N->getValueType(1)));
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain);
removeFromWorkList(N);
DAG.DeleteNode(N);
return SDValue(N, 0); // Return N so it doesn't get rechecked!
AddToWorkList(NewLD.getNode());
AddToWorkList(NewVal.getNode());
WorkListRemover DeadNodes(*this);
- DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1),
- &DeadNodes);
+ DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1));
++OpsNarrowed;
return NewST;
}
AddToWorkList(NewLD.getNode());
AddToWorkList(NewST.getNode());
WorkListRemover DeadNodes(*this);
- DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1),
- &DeadNodes);
+ DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1));
++LdStFP2Int;
return NewST;
}
SDValue Tmp;
switch (CFP->getValueType(0).getSimpleVT().SimpleTy) {
default: llvm_unreachable("Unknown FP type");
- case MVT::f80: // We don't do this for these yet.
+ case MVT::f16: // We don't do this for these yet.
+ case MVT::f80:
case MVT::f128:
case MVT::ppcf128:
break;
OrigElt -= NumElem;
}
+ EVT IndexTy = N->getOperand(1).getValueType();
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, N->getDebugLoc(), NVT,
- InVec, DAG.getConstant(OrigElt, MVT::i32));
+ InVec, DAG.getConstant(OrigElt, IndexTy));
}
// Perform only after legalization to ensure build_vector / vector_shuffle
WorkListRemover DeadNodes(*this);
SDValue From[] = { SDValue(N, 0), SDValue(LN0,1) };
SDValue To[] = { Load, Chain };
- DAG.ReplaceAllUsesOfValuesWith(From, To, 2, &DeadNodes);
+ DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
// Since we're explcitly calling ReplaceAllUses, add the new node to the
// worklist explicitly as well.
AddToWorkList(Load.getNode());
unsigned NumInScalars = N->getNumOperands();
DebugLoc dl = N->getDebugLoc();
EVT VT = N->getValueType(0);
+
+ // A vector built entirely of undefs is undef.
+ if (ISD::allOperandsUndef(N))
+ return DAG.getUNDEF(VT);
+
// Check to see if this is a BUILD_VECTOR of a bunch of values
// which come from any_extend or zero_extend nodes. If so, we can create
// a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR
// using shuffles.
EVT SourceType = MVT::Other;
bool AllAnyExt = true;
- bool AllUndef = true;
+
for (unsigned i = 0; i != NumInScalars; ++i) {
SDValue In = N->getOperand(i);
// Ignore undef inputs.
if (In.getOpcode() == ISD::UNDEF) continue;
- AllUndef = false;
bool AnyExt = In.getOpcode() == ISD::ANY_EXTEND;
bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND;
AllAnyExt &= AnyExt;
}
- if (AllUndef)
- return DAG.getUNDEF(VT);
-
// In order to have valid types, all of the inputs must be extended from the
// same source type and all of the inputs must be any or zero extend.
// Scalar sizes must be a power of two.
if (VecIn1.getValueType().getSizeInBits()*2 != VT.getSizeInBits())
return SDValue();
- // Widen the input vector by adding undef values.
- VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, N->getDebugLoc(), VT,
- VecIn1, DAG.getUNDEF(VecIn1.getValueType()));
+ // If the element type of the input vector is not the same as
+ // the output element type, make concat_vectors based on input element
+ // type and then bitcast it to the output vector type.
+ //
+ // In another words avoid nodes like this:
+ // <NODE> v16i8 = concat_vectors v4i16 v4i16
+ // Replace it with this one:
+ // <NODE0> v8i16 = concat_vectors v4i16 v4i16
+ // <NODE1> v16i8 = bitcast NODE0
+ EVT ItemType = VecIn1.getValueType().getVectorElementType();
+ if (ItemType != VT.getVectorElementType()) {
+ EVT ConcatVT = EVT::getVectorVT(*DAG.getContext(),
+ ItemType,
+ VecIn1.getValueType().getVectorNumElements()*2);
+ // Widen the input vector by adding undef values.
+ VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, ConcatVT,
+ VecIn1, DAG.getUNDEF(VecIn1.getValueType()));
+ VecIn1 = DAG.getNode(ISD::BITCAST, dl, VT, VecIn1);
+ } else
+ // Widen the input vector by adding undef values.
+ VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT,
+ VecIn1, DAG.getUNDEF(VecIn1.getValueType()));
+
}
// If VecIn2 is unused then change it to undef.
if (N->getNumOperands() == 1)
return N->getOperand(0);
+ // Check if all of the operands are undefs.
+ if (ISD::allOperandsUndef(N))
+ return DAG.getUNDEF(N->getValueType(0));
+
return SDValue();
}
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- assert(N0.getValueType().getVectorNumElements() == NumElts &&
- "Vector shuffle must be normalized in DAG");
+ assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG");
// Canonicalize shuffle undef, undef -> undef
if (N0.getOpcode() == ISD::UNDEF && N1.getOpcode() == ISD::UNDEF)
SmallVector<int, 8> NewMask;
for (unsigned i = 0; i != NumElts; ++i) {
int Idx = SVN->getMaskElt(i);
- if (Idx < 0)
- NewMask.push_back(Idx);
- else if (Idx < (int)NumElts)
- NewMask.push_back(Idx + NumElts);
- else
- NewMask.push_back(Idx - NumElts);
+ if (Idx >= 0) {
+ if (Idx < (int)NumElts)
+ Idx += NumElts;
+ else
+ Idx -= NumElts;
+ }
+ NewMask.push_back(Idx);
}
return DAG.getVectorShuffle(VT, N->getDebugLoc(), N1, DAG.getUNDEF(VT),
&NewMask[0]);
return N0;
}
}
+
+ // If this shuffle node is simply a swizzle of another shuffle node,
+ // and it reverses the swizzle of the previous shuffle then we can
+ // optimize shuffle(shuffle(x, undef), undef) -> x.
+ if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG &&
+ N1.getOpcode() == ISD::UNDEF) {
+
+ ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0);
+
+ // Shuffle nodes can only reverse shuffles with a single non-undef value.
+ if (N0.getOperand(1).getOpcode() != ISD::UNDEF)
+ return SDValue();
+
+ // The incoming shuffle must be of the same type as the result of the
+ // current shuffle.
+ assert(OtherSV->getOperand(0).getValueType() == VT &&
+ "Shuffle types don't match");
+
+ for (unsigned i = 0; i != NumElts; ++i) {
+ int Idx = SVN->getMaskElt(i);
+ assert(Idx < (int)NumElts && "Index references undef operand");
+ // Next, this index comes from the first value, which is the incoming
+ // shuffle. Adopt the incoming index.
+ if (Idx >= 0)
+ Idx = OtherSV->getMaskElt(Idx);
+
+ // The combined shuffle must map each index to itself.
+ if (Idx >= 0 && (unsigned)Idx != i)
+ return SDValue();
+ }
+
+ return OtherSV->getOperand(0);
+ }
+
return SDValue();
}
SDValue Elt = RHS.getOperand(i);
if (!isa<ConstantSDNode>(Elt))
return SDValue();
- else if (cast<ConstantSDNode>(Elt)->isAllOnesValue())
+
+ if (cast<ConstantSDNode>(Elt)->isAllOnesValue())
Indices.push_back(i);
else if (cast<ConstantSDNode>(Elt)->isNullValue())
Indices.push_back(NumElts);