bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
EVT MemVT, unsigned NumElem,
bool IsConstantSrc, bool UseVector);
-
+
/// Merge consecutive store operations into a wide store.
/// This optimization uses wide integers or vectors when possible.
/// \return True if some memory operations were changed.
DAGCombiner(SelectionDAG &D, AliasAnalysis &A, CodeGenOpt::Level OL)
: DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes),
OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(A) {
- AttributeSet FnAttrs =
- DAG.getMachineFunction().getFunction()->getAttributes();
- ForCodeSize =
- FnAttrs.hasAttribute(AttributeSet::FunctionIndex,
- Attribute::OptimizeForSize) ||
- FnAttrs.hasAttribute(AttributeSet::FunctionIndex, Attribute::MinSize);
+ auto *F = DAG.getMachineFunction().getFunction();
+ ForCodeSize = F->hasFnAttribute(Attribute::OptimizeForSize) ||
+ F->hasFnAttribute(Attribute::MinSize);
}
/// Runs the dag combiner on all nodes in the work list
}
SDValue TargetLowering::DAGCombinerInfo::
-CombineTo(SDNode *N, const std::vector<SDValue> &To, bool AddTo) {
+CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
}
LegalTypes = Level >= AfterLegalizeTypes;
// Early exit if this basic block is in an optnone function.
- AttributeSet FnAttrs =
- DAG.getMachineFunction().getFunction()->getAttributes();
- if (FnAttrs.hasAttribute(AttributeSet::FunctionIndex,
- Attribute::OptimizeNone))
+ if (DAG.getMachineFunction().getFunction()->hasFnAttribute(
+ Attribute::OptimizeNone))
return;
// Add all the dag nodes to the worklist.
switch (Op.getOpcode()) {
case ISD::EntryToken:
// Entry tokens don't need to be added to the list. They are
- // rededundant.
+ // redundant.
Changed = true;
break;
SDValue Result;
- // If we've change things around then replace token factor.
+ // If we've changed things around then replace token factor.
if (Changed) {
if (Ops.empty()) {
// The entry token is the only possible outcome.
Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Ops);
}
- // Don't add users to work list.
- return CombineTo(N, Result, false);
+ // Add users to worklist if AA is enabled, since it may introduce
+ // a lot of new chained token factors while removing memory deps.
+ bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA
+ : DAG.getSubtarget().useAA();
+ return CombineTo(N, Result, UseAA /*add to worklist*/);
}
return Result;
SDValue DAGCombiner::visitADD(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
- ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
EVT VT = N0.getValueType();
// fold vector ops
if (N1.getOpcode() == ISD::UNDEF)
return N1;
// fold (add c1, c2) -> c1+c2
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
if (N0C && N1C)
return DAG.FoldConstantArithmetic(ISD::ADD, VT, N0C, N1C);
// canonicalize constant to RHS
SDValue DAGCombiner::visitADDC(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
- ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
EVT VT = N0.getValueType();
// If the flag result is dead, turn this into an ADD.
SDLoc(N), MVT::Glue));
// canonicalize constant to RHS.
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
if (N0C && !N1C)
return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N1, N0);
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
SDValue CarryIn = N->getOperand(2);
- ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
- ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
// canonicalize constant to RHS
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
if (N0C && !N1C)
return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
N1, N0, CarryIn);
SDValue DAGCombiner::visitSUB(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode());
- ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
- ConstantSDNode *N1C1 = N1.getOpcode() != ISD::ADD ? nullptr :
- dyn_cast<ConstantSDNode>(N1.getOperand(1).getNode());
EVT VT = N0.getValueType();
// fold vector ops
if (N0 == N1)
return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes);
// fold (sub c1, c2) -> c1-c2
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode());
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
if (N0C && N1C)
return DAG.FoldConstantArithmetic(ISD::SUB, VT, N0C, N1C);
// fold (sub x, c) -> (add x, -c)
if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
return N0.getOperand(0);
// fold C2-(A+C1) -> (C2-C1)-A
+ ConstantSDNode *N1C1 = N1.getOpcode() != ISD::ADD ? nullptr :
+ dyn_cast<ConstantSDNode>(N1.getOperand(1).getNode());
if (N1.getOpcode() == ISD::ADD && N0C && N1C1) {
SDValue NewC = DAG.getConstant(N0C->getAPIntValue() - N1C1->getAPIntValue(),
VT);
SDValue DAGCombiner::visitSUBC(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
- ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
EVT VT = N0.getValueType();
// If the flag result is dead, turn this into an SUB.
MVT::Glue));
// fold (subc x, 0) -> x + no borrow
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
if (N1C && N1C->isNullValue())
return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, SDLoc(N),
MVT::Glue));
SDValue DAGCombiner::visitSDIV(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- ConstantSDNode *N0C = isConstOrConstSplat(N0);
- ConstantSDNode *N1C = isConstOrConstSplat(N1);
EVT VT = N->getValueType(0);
// fold vector ops
}
// fold (sdiv c1, c2) -> c1/c2
+ ConstantSDNode *N0C = isConstOrConstSplat(N0);
+ ConstantSDNode *N1C = isConstOrConstSplat(N1);
if (N0C && N1C && !N1C->isNullValue())
return DAG.FoldConstantArithmetic(ISD::SDIV, VT, N0C, N1C);
// fold (sdiv X, 1) -> X
SDValue DAGCombiner::visitUDIV(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- ConstantSDNode *N0C = isConstOrConstSplat(N0);
- ConstantSDNode *N1C = isConstOrConstSplat(N1);
EVT VT = N->getValueType(0);
// fold vector ops
}
// fold (udiv c1, c2) -> c1/c2
+ ConstantSDNode *N0C = isConstOrConstSplat(N0);
+ ConstantSDNode *N1C = isConstOrConstSplat(N1);
if (N0C && N1C && !N1C->isNullValue())
return DAG.FoldConstantArithmetic(ISD::UDIV, VT, N0C, N1C);
// fold (udiv x, (1 << c)) -> x >>u c
SDValue DAGCombiner::visitSREM(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- ConstantSDNode *N0C = isConstOrConstSplat(N0);
- ConstantSDNode *N1C = isConstOrConstSplat(N1);
EVT VT = N->getValueType(0);
// fold (srem c1, c2) -> c1%c2
+ ConstantSDNode *N0C = isConstOrConstSplat(N0);
+ ConstantSDNode *N1C = isConstOrConstSplat(N1);
if (N0C && N1C && !N1C->isNullValue())
return DAG.FoldConstantArithmetic(ISD::SREM, VT, N0C, N1C);
// If we know the sign bits of both operands are zero, strength reduce to a
SDValue DAGCombiner::visitUREM(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- ConstantSDNode *N0C = isConstOrConstSplat(N0);
- ConstantSDNode *N1C = isConstOrConstSplat(N1);
EVT VT = N->getValueType(0);
// fold (urem c1, c2) -> c1%c2
+ ConstantSDNode *N0C = isConstOrConstSplat(N0);
+ ConstantSDNode *N1C = isConstOrConstSplat(N1);
if (N0C && N1C && !N1C->isNullValue())
return DAG.FoldConstantArithmetic(ISD::UREM, VT, N0C, N1C);
// fold (urem x, pow2) -> (and x, pow2-1)
SDValue DAGCombiner::visitAND(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- SDValue LL, LR, RL, RR, CC0, CC1;
- ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
- ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
EVT VT = N1.getValueType();
- unsigned BitWidth = VT.getScalarType().getSizeInBits();
// fold vector ops
if (VT.isVector()) {
if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
return DAG.getConstant(0, VT);
// fold (and c1, c2) -> c1&c2
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
if (N0C && N1C)
return DAG.FoldConstantArithmetic(ISD::AND, VT, N0C, N1C);
// canonicalize constant to RHS
if (N1C && N1C->isAllOnesValue())
return N0;
// if (and x, c) is known to be zero, return 0
+ unsigned BitWidth = VT.getScalarType().getSizeInBits();
if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
APInt::getAllOnesValue(BitWidth)))
return DAG.getConstant(0, VT);
}
}
// fold (and (setcc x), (setcc y)) -> (setcc (and x, y))
+ SDValue LL, LR, RL, RR, CC0, CC1;
if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){
ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get();
ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get();
SDValue DAGCombiner::visitOR(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- SDValue LL, LR, RL, RR, CC0, CC1;
- ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
- ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
EVT VT = N1.getValueType();
// fold vector ops
return DAG.getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), VT);
}
// fold (or c1, c2) -> c1|c2
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
if (N0C && N1C)
return DAG.FoldConstantArithmetic(ISD::OR, VT, N0C, N1C);
// canonicalize constant to RHS
}
}
// fold (or (setcc x), (setcc y)) -> (setcc (or x, y))
+ SDValue LL, LR, RL, RR, CC0, CC1;
if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){
ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get();
ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get();
SDValue DAGCombiner::visitXOR(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- SDValue LHS, RHS, CC;
- ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
- ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
EVT VT = N0.getValueType();
// fold vector ops
if (N1.getOpcode() == ISD::UNDEF)
return N1;
// fold (xor c1, c2) -> c1^c2
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
if (N0C && N1C)
return DAG.FoldConstantArithmetic(ISD::XOR, VT, N0C, N1C);
// canonicalize constant to RHS
return RXOR;
// fold !(x cc y) -> (x !cc y)
+ SDValue LHS, RHS, CC;
if (TLI.isConstTrueVal(N1.getNode()) && isSetCCEquivalent(N0, LHS, RHS, CC)) {
bool isInt = LHS.getValueType().isInteger();
ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
SDValue DAGCombiner::visitSHL(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
- ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
EVT VT = N0.getValueType();
unsigned OpSizeInBits = VT.getScalarSizeInBits();
// fold vector ops
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
if (VT.isVector()) {
SDValue FoldedVOp = SimplifyVBinOp(N);
if (FoldedVOp.getNode()) return FoldedVOp;
}
// fold (shl c1, c2) -> c1<<c2
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
if (N0C && N1C)
return DAG.FoldConstantArithmetic(ISD::SHL, VT, N0C, N1C);
// fold (shl 0, x) -> 0
SDValue DAGCombiner::visitSRA(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
- ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
EVT VT = N0.getValueType();
unsigned OpSizeInBits = VT.getScalarType().getSizeInBits();
// fold vector ops
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
if (VT.isVector()) {
SDValue FoldedVOp = SimplifyVBinOp(N);
if (FoldedVOp.getNode()) return FoldedVOp;
}
// fold (sra c1, c2) -> (sra c1, c2)
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
if (N0C && N1C)
return DAG.FoldConstantArithmetic(ISD::SRA, VT, N0C, N1C);
// fold (sra 0, x) -> 0
SDValue DAGCombiner::visitSRL(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
- ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
EVT VT = N0.getValueType();
unsigned OpSizeInBits = VT.getScalarType().getSizeInBits();
// fold vector ops
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
if (VT.isVector()) {
SDValue FoldedVOp = SimplifyVBinOp(N);
if (FoldedVOp.getNode()) return FoldedVOp;
}
// fold (srl c1, c2) -> c1 >>u c2
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
if (N0C && N1C)
return DAG.FoldConstantArithmetic(ISD::SRL, VT, N0C, N1C);
// fold (srl 0, x) -> 0
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
SDValue N2 = N->getOperand(2);
- ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
- ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
- ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
EVT VT = N->getValueType(0);
EVT VT0 = N0.getValueType();
if (N1 == N2)
return N1;
// fold (select true, X, Y) -> X
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
if (N0C && !N0C->isNullValue())
return N1;
// fold (select false, X, Y) -> Y
if (N0C && N0C->isNullValue())
return N2;
// fold (select C, 1, X) -> (or C, X)
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
if (VT == MVT::i1 && N1C && N1C->getAPIntValue() == 1)
return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N2);
// fold (select C, 0, 1) -> (xor C, 1)
// undiscoverable (or not reasonably discoverable). For example, it could be
// in another basic block or it could require searching a complicated
// expression.
+ ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
if (VT.isInteger() &&
(VT0 == MVT::i1 || (VT0.isInteger() &&
TLI.getBooleanContents(false, false) ==
std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
MachineMemOperand *MMO = DAG.getMachineFunction().
- getMachineMemOperand(MST->getPointerInfo(),
+ getMachineMemOperand(MST->getPointerInfo(),
MachineMemOperand::MOStore, LoMemVT.getStoreSize(),
Alignment, MST->getAAInfo(), MST->getRanges());
DAG.getConstant(IncrementSize, Ptr.getValueType()));
MMO = DAG.getMachineFunction().
- getMachineMemOperand(MST->getPointerInfo(),
+ getMachineMemOperand(MST->getPointerInfo(),
MachineMemOperand::MOStore, HiMemVT.getStoreSize(),
SecondHalfAlignment, MST->getAAInfo(),
MST->getRanges());
std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
MachineMemOperand *MMO = DAG.getMachineFunction().
- getMachineMemOperand(MLD->getPointerInfo(),
+ getMachineMemOperand(MLD->getPointerInfo(),
MachineMemOperand::MOLoad, LoMemVT.getStoreSize(),
Alignment, MLD->getAAInfo(), MLD->getRanges());
DAG.getConstant(IncrementSize, Ptr.getValueType()));
MMO = DAG.getMachineFunction().
- getMachineMemOperand(MLD->getPointerInfo(),
+ getMachineMemOperand(MLD->getPointerInfo(),
MachineMemOperand::MOLoad, HiMemVT.getStoreSize(),
SecondHalfAlignment, MLD->getAAInfo(), MLD->getRanges());
for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
if (BV->getOperand(i).getOpcode() == ISD::UNDEF) {
- for (unsigned j = 0; j != NumOutputsPerInput; ++j)
- Ops.push_back(DAG.getUNDEF(DstEltVT));
+ Ops.append(NumOutputsPerInput, DAG.getUNDEF(DstEltVT));
continue;
}
return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BV), VT, Ops);
}
+// Attempt different variants of (fadd (fmul a, b), c) -> fma or fmad
+static SDValue performFaddFmulCombines(unsigned FusedOpcode,
+ bool Aggressive,
+ SDNode *N,
+ const TargetLowering &TLI,
+ SelectionDAG &DAG) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N->getValueType(0);
+
+ // fold (fadd (fmul x, y), z) -> (fma x, y, z)
+ if (N0.getOpcode() == ISD::FMUL &&
+ (Aggressive || N0->hasOneUse())) {
+ return DAG.getNode(FusedOpcode, SDLoc(N), VT,
+ N0.getOperand(0), N0.getOperand(1), N1);
+ }
+
+ // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
+ // Note: Commutes FADD operands.
+ if (N1.getOpcode() == ISD::FMUL &&
+ (Aggressive || N1->hasOneUse())) {
+ return DAG.getNode(FusedOpcode, SDLoc(N), VT,
+ N1.getOperand(0), N1.getOperand(1), N0);
+ }
+
+ // More folding opportunities when target permits.
+ if (Aggressive) {
+ // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z))
+ if (N0.getOpcode() == ISD::FMA &&
+ N0.getOperand(2).getOpcode() == ISD::FMUL) {
+ return DAG.getNode(FusedOpcode, SDLoc(N), VT,
+ N0.getOperand(0), N0.getOperand(1),
+ DAG.getNode(FusedOpcode, SDLoc(N), VT,
+ N0.getOperand(2).getOperand(0),
+ N0.getOperand(2).getOperand(1),
+ N1));
+ }
+
+ // fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x))
+ if (N1->getOpcode() == ISD::FMA &&
+ N1.getOperand(2).getOpcode() == ISD::FMUL) {
+ return DAG.getNode(FusedOpcode, SDLoc(N), VT,
+ N1.getOperand(0), N1.getOperand(1),
+ DAG.getNode(FusedOpcode, SDLoc(N), VT,
+ N1.getOperand(2).getOperand(0),
+ N1.getOperand(2).getOperand(1),
+ N0));
+ }
+ }
+
+ return SDValue();
+}
+
+static SDValue performFsubFmulCombines(unsigned FusedOpcode,
+ bool Aggressive,
+ SDNode *N,
+ const TargetLowering &TLI,
+ SelectionDAG &DAG) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N->getValueType(0);
+
+ SDLoc SL(N);
+
+ // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
+ if (N0.getOpcode() == ISD::FMUL &&
+ (Aggressive || N0->hasOneUse())) {
+ return DAG.getNode(FusedOpcode, SL, VT,
+ N0.getOperand(0), N0.getOperand(1),
+ DAG.getNode(ISD::FNEG, SL, VT, N1));
+ }
+
+ // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
+ // Note: Commutes FSUB operands.
+ if (N1.getOpcode() == ISD::FMUL &&
+ (Aggressive || N1->hasOneUse()))
+ return DAG.getNode(FusedOpcode, SL, VT,
+ DAG.getNode(ISD::FNEG, SL, VT,
+ N1.getOperand(0)),
+ N1.getOperand(1), N0);
+
+ // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
+ if (N0.getOpcode() == ISD::FNEG &&
+ N0.getOperand(0).getOpcode() == ISD::FMUL &&
+ (Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {
+ SDValue N00 = N0.getOperand(0).getOperand(0);
+ SDValue N01 = N0.getOperand(0).getOperand(1);
+ return DAG.getNode(FusedOpcode, SL, VT,
+ DAG.getNode(ISD::FNEG, SL, VT, N00), N01,
+ DAG.getNode(ISD::FNEG, SL, VT, N1));
+ }
+
+ // More folding opportunities when target permits.
+ if (Aggressive) {
+ // fold (fsub (fma x, y, (fmul u, v)), z)
+ // -> (fma x, y (fma u, v, (fneg z)))
+ if (N0.getOpcode() == FusedOpcode &&
+ N0.getOperand(2).getOpcode() == ISD::FMUL) {
+ return DAG.getNode(FusedOpcode, SDLoc(N), VT,
+ N0.getOperand(0), N0.getOperand(1),
+ DAG.getNode(FusedOpcode, SDLoc(N), VT,
+ N0.getOperand(2).getOperand(0),
+ N0.getOperand(2).getOperand(1),
+ DAG.getNode(ISD::FNEG, SDLoc(N), VT,
+ N1)));
+ }
+
+ // fold (fsub x, (fma y, z, (fmul u, v)))
+ // -> (fma (fneg y), z, (fma (fneg u), v, x))
+ if (N1.getOpcode() == FusedOpcode &&
+ N1.getOperand(2).getOpcode() == ISD::FMUL) {
+ SDValue N20 = N1.getOperand(2).getOperand(0);
+ SDValue N21 = N1.getOperand(2).getOperand(1);
+ return DAG.getNode(FusedOpcode, SDLoc(N), VT,
+ DAG.getNode(ISD::FNEG, SDLoc(N), VT,
+ N1.getOperand(0)),
+ N1.getOperand(1),
+ DAG.getNode(FusedOpcode, SDLoc(N), VT,
+ DAG.getNode(ISD::FNEG, SDLoc(N), VT,
+ N20),
+ N21, N0));
+ }
+ }
+
+ return SDValue();
+}
+
SDValue DAGCombiner::visitFADD(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
}
} // enable-unsafe-fp-math
+ if (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT)) {
+ // Assume if there is an fmad instruction that it should be aggressively
+ // used.
+ if (SDValue Fused = performFaddFmulCombines(ISD::FMAD, true, N, TLI, DAG))
+ return Fused;
+ }
+
// FADD -> FMA combines:
if ((Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) &&
TLI.isFMAFasterThanFMulAndFAdd(VT) &&
(!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT))) {
- // fold (fadd (fmul x, y), z) -> (fma x, y, z)
- if (N0.getOpcode() == ISD::FMUL &&
- (N0->hasOneUse() || TLI.enableAggressiveFMAFusion(VT)))
- return DAG.getNode(ISD::FMA, SDLoc(N), VT,
- N0.getOperand(0), N0.getOperand(1), N1);
-
- // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
- // Note: Commutes FADD operands.
- if (N1.getOpcode() == ISD::FMUL &&
- (N1->hasOneUse() || TLI.enableAggressiveFMAFusion(VT)))
- return DAG.getNode(ISD::FMA, SDLoc(N), VT,
- N1.getOperand(0), N1.getOperand(1), N0);
+ if (!TLI.isOperationLegal(ISD::FMAD, VT)) {
+ // Don't form FMA if we are preferring FMAD.
+ if (SDValue Fused
+ = performFaddFmulCombines(ISD::FMA,
+ TLI.enableAggressiveFMAFusion(VT),
+ N, TLI, DAG)) {
+ return Fused;
+ }
+ }
// When FP_EXTEND nodes are free on the target, and there is an opportunity
// to combine into FMA, arrange such nodes accordingly.
N10.getOperand(1)), N0);
}
}
-
- // More folding opportunities when target permits.
- if (TLI.enableAggressiveFMAFusion(VT)) {
-
- // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z))
- if (N0.getOpcode() == ISD::FMA &&
- N0.getOperand(2).getOpcode() == ISD::FMUL)
- return DAG.getNode(ISD::FMA, SDLoc(N), VT,
- N0.getOperand(0), N0.getOperand(1),
- DAG.getNode(ISD::FMA, SDLoc(N), VT,
- N0.getOperand(2).getOperand(0),
- N0.getOperand(2).getOperand(1),
- N1));
-
- // fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x))
- if (N1->getOpcode() == ISD::FMA &&
- N1.getOperand(2).getOpcode() == ISD::FMUL)
- return DAG.getNode(ISD::FMA, SDLoc(N), VT,
- N1.getOperand(0), N1.getOperand(1),
- DAG.getNode(ISD::FMA, SDLoc(N), VT,
- N1.getOperand(2).getOperand(0),
- N1.getOperand(2).getOperand(1),
- N0));
- }
}
return SDValue();
}
}
+ if (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT)) {
+ // Assume if there is an fmad instruction that it should be aggressively
+ // used.
+ if (SDValue Fused = performFsubFmulCombines(ISD::FMAD, true, N, TLI, DAG))
+ return Fused;
+ }
+
// FSUB -> FMA combines:
if ((Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) &&
TLI.isFMAFasterThanFMulAndFAdd(VT) &&
(!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT))) {
- // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
- if (N0.getOpcode() == ISD::FMUL &&
- (N0->hasOneUse() || TLI.enableAggressiveFMAFusion(VT)))
- return DAG.getNode(ISD::FMA, dl, VT,
- N0.getOperand(0), N0.getOperand(1),
- DAG.getNode(ISD::FNEG, dl, VT, N1));
-
- // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
- // Note: Commutes FSUB operands.
- if (N1.getOpcode() == ISD::FMUL &&
- (N1->hasOneUse() || TLI.enableAggressiveFMAFusion(VT)))
- return DAG.getNode(ISD::FMA, dl, VT,
- DAG.getNode(ISD::FNEG, dl, VT,
- N1.getOperand(0)),
- N1.getOperand(1), N0);
-
- // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
- if (N0.getOpcode() == ISD::FNEG &&
- N0.getOperand(0).getOpcode() == ISD::FMUL &&
- ((N0->hasOneUse() && N0.getOperand(0).hasOneUse()) ||
- TLI.enableAggressiveFMAFusion(VT))) {
- SDValue N00 = N0.getOperand(0).getOperand(0);
- SDValue N01 = N0.getOperand(0).getOperand(1);
- return DAG.getNode(ISD::FMA, dl, VT,
- DAG.getNode(ISD::FNEG, dl, VT, N00), N01,
- DAG.getNode(ISD::FNEG, dl, VT, N1));
+ if (!TLI.isOperationLegal(ISD::FMAD, VT)) {
+ // Don't form FMA if we are preferring FMAD.
+
+ if (SDValue Fused
+ = performFsubFmulCombines(ISD::FMA,
+ TLI.enableAggressiveFMAFusion(VT),
+ N, TLI, DAG)) {
+ return Fused;
+ }
}
// When FP_EXTEND nodes are free on the target, and there is an opportunity
// to combine into FMA, arrange such nodes accordingly.
if (TLI.isFPExtFree(VT)) {
-
// fold (fsub (fpext (fmul x, y)), z)
// -> (fma (fpext x), (fpext y), (fneg z))
if (N0.getOpcode() == ISD::FP_EXTEND) {
}
}
}
-
- // More folding opportunities when target permits.
- if (TLI.enableAggressiveFMAFusion(VT)) {
-
- // fold (fsub (fma x, y, (fmul u, v)), z)
- // -> (fma x, y (fma u, v, (fneg z)))
- if (N0.getOpcode() == ISD::FMA &&
- N0.getOperand(2).getOpcode() == ISD::FMUL)
- return DAG.getNode(ISD::FMA, SDLoc(N), VT,
- N0.getOperand(0), N0.getOperand(1),
- DAG.getNode(ISD::FMA, SDLoc(N), VT,
- N0.getOperand(2).getOperand(0),
- N0.getOperand(2).getOperand(1),
- DAG.getNode(ISD::FNEG, SDLoc(N), VT,
- N1)));
-
- // fold (fsub x, (fma y, z, (fmul u, v)))
- // -> (fma (fneg y), z, (fma (fneg u), v, x))
- if (N1.getOpcode() == ISD::FMA &&
- N1.getOperand(2).getOpcode() == ISD::FMUL) {
- SDValue N20 = N1.getOperand(2).getOperand(0);
- SDValue N21 = N1.getOperand(2).getOperand(1);
- return DAG.getNode(ISD::FMA, SDLoc(N), VT,
- DAG.getNode(ISD::FNEG, SDLoc(N), VT,
- N1.getOperand(0)),
- N1.getOperand(1),
- DAG.getNode(ISD::FMA, SDLoc(N), VT,
- DAG.getNode(ISD::FNEG, SDLoc(N), VT,
- N20),
- N21, N0));
- }
- }
}
return SDValue();
SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
SDValue N0 = N->getOperand(0);
- ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
EVT VT = N->getValueType(0);
EVT OpVT = N0.getValueType();
// fold (sint_to_fp c1) -> c1fp
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
if (N0C &&
// ...but only if the target supports immediate floating-point values
(!LegalOperations ||
SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
SDValue N0 = N->getOperand(0);
- ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
EVT VT = N->getValueType(0);
EVT OpVT = N0.getValueType();
// fold (uint_to_fp c1) -> c1fp
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
if (N0C &&
// ...but only if the target supports immediate floating-point values
(!LegalOperations ||
return SDValue();
}
+// Fold (fp_to_{s/u}int ({s/u}int_to_fpx)) -> zext x, sext x, trunc x, or x
+static SDValue FoldIntToFPToInt(SDNode *N, SelectionDAG &DAG) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ if (N0.getOpcode() != ISD::UINT_TO_FP && N0.getOpcode() != ISD::SINT_TO_FP)
+ return SDValue();
+
+ SDValue Src = N0.getOperand(0);
+ EVT SrcVT = Src.getValueType();
+ bool IsInputSigned = N0.getOpcode() == ISD::SINT_TO_FP;
+ bool IsOutputSigned = N->getOpcode() == ISD::FP_TO_SINT;
+
+ // We can safely assume the conversion won't overflow the output range,
+ // because (for example) (uint8_t)18293.f is undefined behavior.
+
+ // Since we can assume the conversion won't overflow, our decision as to
+ // whether the input will fit in the float should depend on the minimum
+ // of the input range and output range.
+
+ // This means this is also safe for a signed input and unsigned output, since
+ // a negative input would lead to undefined behavior.
+ unsigned InputSize = (int)SrcVT.getScalarSizeInBits() - IsInputSigned;
+ unsigned OutputSize = (int)VT.getScalarSizeInBits() - IsOutputSigned;
+ unsigned ActualSize = std::min(InputSize, OutputSize);
+ const fltSemantics &sem = DAG.EVTToAPFloatSemantics(N0.getValueType());
+
+ // We can only fold away the float conversion if the input range can be
+ // represented exactly in the float range.
+ if (APFloat::semanticsPrecision(sem) >= ActualSize) {
+ if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits()) {
+ unsigned ExtOp = IsInputSigned && IsOutputSigned ? ISD::SIGN_EXTEND
+ : ISD::ZERO_EXTEND;
+ return DAG.getNode(ExtOp, SDLoc(N), VT, Src);
+ }
+ if (VT.getScalarSizeInBits() < SrcVT.getScalarSizeInBits())
+ return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Src);
+ if (SrcVT == VT)
+ return Src;
+ return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Src);
+ }
+ return SDValue();
+}
+
SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
SDValue N0 = N->getOperand(0);
ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
if (N0CFP)
return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0);
- return SDValue();
+ return FoldIntToFPToInt(N, DAG);
}
SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
if (N0CFP)
return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0);
- return SDValue();
+ return FoldIntToFPToInt(N, DAG);
}
SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
// fold (fp_round (fp_round x)) -> (fp_round x)
if (N0.getOpcode() == ISD::FP_ROUND) {
- // This is a value preserving truncation if both round's are.
- bool IsTrunc = N->getConstantOperandVal(1) == 1 &&
- N0.getNode()->getConstantOperandVal(1) == 1;
- return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0.getOperand(0),
- DAG.getIntPtrConstant(IsTrunc));
+ const bool NIsTrunc = N->getConstantOperandVal(1) == 1;
+ const bool N0IsTrunc = N0.getNode()->getConstantOperandVal(1) == 1;
+ // If the first fp_round isn't a value preserving truncation, it might
+ // introduce a tie in the second fp_round, that wouldn't occur in the
+ // single-step fp_round we want to fold to.
+ // In other words, double rounding isn't the same as rounding.
+ // Also, this is a value preserving truncation iff both fp_round's are.
+ if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc)
+ return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0.getOperand(0),
+ DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc));
}
// fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
if (NotMaskLZ == 64) return Result; // All zero mask.
// See if we have a continuous run of bits. If so, we have 0*1+0*
- if (CountTrailingOnes_64(NotMask >> NotMaskTZ)+NotMaskTZ+NotMaskLZ != 64)
+ if (countTrailingOnes(NotMask >> NotMaskTZ) + NotMaskTZ + NotMaskLZ != 64)
return Result;
// Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
// Make sure we have something to merge.
if (NumElem < 2)
return false;
-
+
int64_t ElementSizeBytes = MemVT.getSizeInBits() / 8;
LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
unsigned EarliestNodeUsed = 0;
-
+
for (unsigned i=0; i < NumElem; ++i) {
// Find a chain for the new wide-store operand. Notice that some
// of the store nodes that we found may not be selected for inclusion
if (StoreNodes[i].SequenceNum > StoreNodes[EarliestNodeUsed].SequenceNum)
EarliestNodeUsed = i;
}
-
+
// The earliest Node in the DAG.
LSBaseSDNode *EarliestOp = StoreNodes[EarliestNodeUsed].MemNode;
SDLoc DL(StoreNodes[0].MemNode);
-
+
SDValue StoredVal;
if (UseVector) {
// Find a legal type for the vector store.
return false;
Ops.push_back(Val);
}
-
+
// Build the extracted vector elements back into a vector.
StoredVal = DAG.getNode(ISD::BUILD_VECTOR, DL, Ty, Ops);
}
unsigned StoreBW = NumElem * ElementSizeBytes * 8;
APInt StoreInt(StoreBW, 0);
-
+
// Construct a single integer constant which is made of the smaller
// constant inputs.
bool IsLE = TLI.isLittleEndian();
llvm_unreachable("Invalid constant element type");
}
}
-
+
// Create the new Load and Store operations.
EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW);
StoredVal = DAG.getConstant(StoreInt, StoreTy);
}
-
+
SDValue NewStore = DAG.getStore(EarliestOp->getChain(), DL, StoredVal,
FirstInChain->getBasePtr(),
FirstInChain->getPointerInfo(),
false, false,
FirstInChain->getAlignment());
-
+
// Replace the first store with the new store
CombineTo(EarliestOp, NewStore);
// Erase all other stores.
DAG.ReplaceAllUsesWith(SDValue(St, 0), St->getChain());
deleteAndRecombine(St);
}
-
+
return true;
}
bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
EVT MemVT = St->getMemoryVT();
int64_t ElementSizeBytes = MemVT.getSizeInBits()/8;
- bool NoVectors = DAG.getMachineFunction().getFunction()->getAttributes().
- hasAttribute(AttributeSet::FunctionIndex, Attribute::NoImplicitFloat);
+ bool NoVectors = DAG.getMachineFunction().getFunction()->hasFnAttribute(
+ Attribute::NoImplicitFloat);
// Don't merge vectors into wider inputs.
if (MemVT.isVector() || !MemVT.isSimple())
bool IsConstantSrc = isa<ConstantSDNode>(StoredVal) ||
isa<ConstantFPSDNode>(StoredVal);
bool IsExtractVecEltSrc = (StoredVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT);
-
+
if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecEltSrc)
return false;
// consecutive loads).
if (StoredVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
return false;
-
+
// Find a legal type for the vector store.
EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1);
if (TLI.isTypeLegal(Ty))
if (!TLI.isOperationLegalOrCustom(Opcode, NVT))
return SDValue();
+ // Just because the floating-point vector type is legal does not necessarily
+ // mean that the corresponding integer vector type is.
+ if (!isTypeLegal(NVT))
+ return SDValue();
+
SmallVector<SDValue, 8> Opnds;
for (unsigned i = 0; i != NumInScalars; ++i) {
SDValue In = N->getOperand(i);
if (!TLI.isExtractSubvectorCheap(VT, VT.getVectorNumElements()))
return SDValue();
-
+
// Try to replace VecIn1 with two extract_subvectors
// No need to update the masks, they should still be correct.
- VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, VecIn1,
+ VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, VecIn1,
DAG.getConstant(VT.getVectorNumElements(), TLI.getVectorIdxTy()));
VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, VecIn1,
DAG.getConstant(0, TLI.getVectorIdxTy()));
}
}
+ // Fold any combination of BUILD_VECTOR or UNDEF nodes into one BUILD_VECTOR.
+ // We have already tested above for an UNDEF only concatenation.
// fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...))
// -> (BUILD_VECTOR A, B, ..., C, D, ...)
- if (N->getNumOperands() == 2 &&
- N->getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
- N->getOperand(1).getOpcode() == ISD::BUILD_VECTOR) {
- EVT VT = N->getValueType(0);
- SDValue N0 = N->getOperand(0);
- SDValue N1 = N->getOperand(1);
+ auto IsBuildVectorOrUndef = [](const SDValue &Op) {
+ return ISD::UNDEF == Op.getOpcode() || ISD::BUILD_VECTOR == Op.getOpcode();
+ };
+ bool AllBuildVectorsOrUndefs =
+ std::all_of(N->op_begin(), N->op_end(), IsBuildVectorOrUndef);
+ if (AllBuildVectorsOrUndefs) {
SmallVector<SDValue, 8> Opnds;
- unsigned BuildVecNumElts = N0.getNumOperands();
-
- EVT SclTy0 = N0.getOperand(0)->getValueType(0);
- EVT SclTy1 = N1.getOperand(0)->getValueType(0);
- if (SclTy0.isFloatingPoint()) {
- for (unsigned i = 0; i != BuildVecNumElts; ++i)
- Opnds.push_back(N0.getOperand(i));
- for (unsigned i = 0; i != BuildVecNumElts; ++i)
- Opnds.push_back(N1.getOperand(i));
- } else {
+ EVT SVT = VT.getScalarType();
+
+ EVT MinVT = SVT;
+ if (!SVT.isFloatingPoint()) {
// If BUILD_VECTOR are from built from integer, they may have different
- // operand types. Get the smaller type and truncate all operands to it.
- EVT MinTy = SclTy0.bitsLE(SclTy1) ? SclTy0 : SclTy1;
- for (unsigned i = 0; i != BuildVecNumElts; ++i)
- Opnds.push_back(DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinTy,
- N0.getOperand(i)));
- for (unsigned i = 0; i != BuildVecNumElts; ++i)
- Opnds.push_back(DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinTy,
- N1.getOperand(i)));
+ // operand types. Get the smallest type and truncate all operands to it.
+ bool FoundMinVT = false;
+ for (const SDValue &Op : N->ops())
+ if (ISD::BUILD_VECTOR == Op.getOpcode()) {
+ EVT OpSVT = Op.getOperand(0)->getValueType(0);
+ MinVT = (!FoundMinVT || OpSVT.bitsLE(MinVT)) ? OpSVT : MinVT;
+ FoundMinVT = true;
+ }
+ assert(FoundMinVT && "Concat vector type mismatch");
+ }
+
+ for (const SDValue &Op : N->ops()) {
+ EVT OpVT = Op.getValueType();
+ unsigned NumElts = OpVT.getVectorNumElements();
+
+ if (ISD::UNDEF == Op.getOpcode())
+ for (unsigned i = 0; i != NumElts; ++i)
+ Opnds.push_back(DAG.getUNDEF(MinVT));
+
+ if (ISD::BUILD_VECTOR == Op.getOpcode()) {
+ if (SVT.isFloatingPoint()) {
+ assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch");
+ for (unsigned i = 0; i != NumElts; ++i)
+ Opnds.push_back(Op.getOperand(i));
+ } else {
+ for (unsigned i = 0; i != NumElts; ++i)
+ Opnds.push_back(
+ DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinVT, Op.getOperand(i)));
+ }
+ }
}
+ assert(VT.getVectorNumElements() == Opnds.size() &&
+ "Concat vector type mismatch");
return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Opnds);
}
if (AllSame)
return N0;
- // If the splatted element is a constant, just build the vector out of
- // constants directly.
+ // Canonicalize any other splat as a build_vector.
const SDValue &Splatted = V->getOperand(SVN->getSplatIndex());
- if (isa<ConstantSDNode>(Splatted) || isa<ConstantFPSDNode>(Splatted)) {
- SmallVector<SDValue, 8> Ops;
- for (unsigned i = 0; i != NumElts; ++i) {
- Ops.push_back(Splatted);
- }
- SDValue NewBV = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N),
- V->getValueType(0), Ops);
-
- // We may have jumped through bitcasts, so the type of the
- // BUILD_VECTOR may not match the type of the shuffle.
- if (V->getValueType(0) != VT)
- NewBV = DAG.getNode(ISD::BITCAST, SDLoc(N), VT, NewBV);
- return NewBV;
- }
+ SmallVector<SDValue, 8> Ops(NumElts, Splatted);
+ SDValue NewBV = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N),
+ V->getValueType(0), Ops);
+
+ // We may have jumped through bitcasts, so the type of the
+ // BUILD_VECTOR may not match the type of the shuffle.
+ if (V->getValueType(0) != VT)
+ NewBV = DAG.getNode(ISD::BITCAST, SDLoc(N), VT, NewBV);
+ return NewBV;
}
}
// shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
// shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
// Don't try to fold shuffles with illegal type.
- if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG &&
- TLI.isTypeLegal(VT)) {
+ // Only fold if this shuffle is the only user of the other shuffle.
+ if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && N->isOnlyUserOf(N0.getNode()) &&
+ Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0);
// The incoming shuffle must be of the same type as the result of the
if (!TLI.isShuffleMaskLegal(Mask, VT))
return SDValue();
-
+
// shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2)
// shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2)
// shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2)
return SDValue();
}
- // Let's see if the target supports this vector_shuffle.
+ // Let's see if the target supports this vector_shuffle and make sure
+ // we're not running after operation legalization where it may have
+ // custom lowered the vector shuffles.
EVT RVT = RHS.getValueType();
- if (!TLI.isVectorClearMaskLegal(Indices, RVT))
+ if (LegalOperations || !TLI.isVectorClearMaskLegal(Indices, RVT))
return SDValue();
// Return the new VECTOR_SHUFFLE node.