bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
EVT MemVT, unsigned NumElem,
bool IsConstantSrc, bool UseVector);
-
+
/// Merge consecutive store operations into a wide store.
/// This optimization uses wide integers or vectors when possible.
/// \return True if some memory operations were changed.
}
SDValue TargetLowering::DAGCombinerInfo::
-CombineTo(SDNode *N, const std::vector<SDValue> &To, bool AddTo) {
+CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
}
SDValue DAGCombiner::visitADD(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
- ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
EVT VT = N0.getValueType();
// fold vector ops
if (N1.getOpcode() == ISD::UNDEF)
return N1;
// fold (add c1, c2) -> c1+c2
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
if (N0C && N1C)
return DAG.FoldConstantArithmetic(ISD::ADD, VT, N0C, N1C);
// canonicalize constant to RHS
SDValue DAGCombiner::visitADDC(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
- ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
EVT VT = N0.getValueType();
// If the flag result is dead, turn this into an ADD.
SDLoc(N), MVT::Glue));
// canonicalize constant to RHS.
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
if (N0C && !N1C)
return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N1, N0);
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
SDValue CarryIn = N->getOperand(2);
- ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
- ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
// canonicalize constant to RHS
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
if (N0C && !N1C)
return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
N1, N0, CarryIn);
SDValue DAGCombiner::visitSUB(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode());
- ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
- ConstantSDNode *N1C1 = N1.getOpcode() != ISD::ADD ? nullptr :
- dyn_cast<ConstantSDNode>(N1.getOperand(1).getNode());
EVT VT = N0.getValueType();
// fold vector ops
if (N0 == N1)
return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes);
// fold (sub c1, c2) -> c1-c2
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode());
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
if (N0C && N1C)
return DAG.FoldConstantArithmetic(ISD::SUB, VT, N0C, N1C);
// fold (sub x, c) -> (add x, -c)
if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
return N0.getOperand(0);
// fold C2-(A+C1) -> (C2-C1)-A
+ ConstantSDNode *N1C1 = N1.getOpcode() != ISD::ADD ? nullptr :
+ dyn_cast<ConstantSDNode>(N1.getOperand(1).getNode());
if (N1.getOpcode() == ISD::ADD && N0C && N1C1) {
SDValue NewC = DAG.getConstant(N0C->getAPIntValue() - N1C1->getAPIntValue(),
VT);
SDValue DAGCombiner::visitSUBC(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
- ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
EVT VT = N0.getValueType();
// If the flag result is dead, turn this into an SUB.
MVT::Glue));
// fold (subc x, 0) -> x + no borrow
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
if (N1C && N1C->isNullValue())
return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, SDLoc(N),
MVT::Glue));
SDValue DAGCombiner::visitSDIV(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- ConstantSDNode *N0C = isConstOrConstSplat(N0);
- ConstantSDNode *N1C = isConstOrConstSplat(N1);
EVT VT = N->getValueType(0);
// fold vector ops
}
// fold (sdiv c1, c2) -> c1/c2
+ ConstantSDNode *N0C = isConstOrConstSplat(N0);
+ ConstantSDNode *N1C = isConstOrConstSplat(N1);
if (N0C && N1C && !N1C->isNullValue())
return DAG.FoldConstantArithmetic(ISD::SDIV, VT, N0C, N1C);
// fold (sdiv X, 1) -> X
SDValue DAGCombiner::visitUDIV(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- ConstantSDNode *N0C = isConstOrConstSplat(N0);
- ConstantSDNode *N1C = isConstOrConstSplat(N1);
EVT VT = N->getValueType(0);
// fold vector ops
}
// fold (udiv c1, c2) -> c1/c2
+ ConstantSDNode *N0C = isConstOrConstSplat(N0);
+ ConstantSDNode *N1C = isConstOrConstSplat(N1);
if (N0C && N1C && !N1C->isNullValue())
return DAG.FoldConstantArithmetic(ISD::UDIV, VT, N0C, N1C);
// fold (udiv x, (1 << c)) -> x >>u c
SDValue DAGCombiner::visitSREM(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- ConstantSDNode *N0C = isConstOrConstSplat(N0);
- ConstantSDNode *N1C = isConstOrConstSplat(N1);
EVT VT = N->getValueType(0);
// fold (srem c1, c2) -> c1%c2
+ ConstantSDNode *N0C = isConstOrConstSplat(N0);
+ ConstantSDNode *N1C = isConstOrConstSplat(N1);
if (N0C && N1C && !N1C->isNullValue())
return DAG.FoldConstantArithmetic(ISD::SREM, VT, N0C, N1C);
// If we know the sign bits of both operands are zero, strength reduce to a
SDValue DAGCombiner::visitUREM(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- ConstantSDNode *N0C = isConstOrConstSplat(N0);
- ConstantSDNode *N1C = isConstOrConstSplat(N1);
EVT VT = N->getValueType(0);
// fold (urem c1, c2) -> c1%c2
+ ConstantSDNode *N0C = isConstOrConstSplat(N0);
+ ConstantSDNode *N1C = isConstOrConstSplat(N1);
if (N0C && N1C && !N1C->isNullValue())
return DAG.FoldConstantArithmetic(ISD::UREM, VT, N0C, N1C);
// fold (urem x, pow2) -> (and x, pow2-1)
SDValue DAGCombiner::visitAND(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- SDValue LL, LR, RL, RR, CC0, CC1;
- ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
- ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
EVT VT = N1.getValueType();
- unsigned BitWidth = VT.getScalarType().getSizeInBits();
// fold vector ops
if (VT.isVector()) {
if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
return DAG.getConstant(0, VT);
// fold (and c1, c2) -> c1&c2
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
if (N0C && N1C)
return DAG.FoldConstantArithmetic(ISD::AND, VT, N0C, N1C);
// canonicalize constant to RHS
if (N1C && N1C->isAllOnesValue())
return N0;
// if (and x, c) is known to be zero, return 0
+ unsigned BitWidth = VT.getScalarType().getSizeInBits();
if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
APInt::getAllOnesValue(BitWidth)))
return DAG.getConstant(0, VT);
}
}
// fold (and (setcc x), (setcc y)) -> (setcc (and x, y))
+ SDValue LL, LR, RL, RR, CC0, CC1;
if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){
ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get();
ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get();
SDValue DAGCombiner::visitOR(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- SDValue LL, LR, RL, RR, CC0, CC1;
- ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
- ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
EVT VT = N1.getValueType();
// fold vector ops
return DAG.getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), VT);
}
// fold (or c1, c2) -> c1|c2
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
if (N0C && N1C)
return DAG.FoldConstantArithmetic(ISD::OR, VT, N0C, N1C);
// canonicalize constant to RHS
}
}
// fold (or (setcc x), (setcc y)) -> (setcc (or x, y))
+ SDValue LL, LR, RL, RR, CC0, CC1;
if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){
ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get();
ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get();
SDValue DAGCombiner::visitXOR(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- SDValue LHS, RHS, CC;
- ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
- ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
EVT VT = N0.getValueType();
// fold vector ops
if (N1.getOpcode() == ISD::UNDEF)
return N1;
// fold (xor c1, c2) -> c1^c2
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
if (N0C && N1C)
return DAG.FoldConstantArithmetic(ISD::XOR, VT, N0C, N1C);
// canonicalize constant to RHS
return RXOR;
// fold !(x cc y) -> (x !cc y)
+ SDValue LHS, RHS, CC;
if (TLI.isConstTrueVal(N1.getNode()) && isSetCCEquivalent(N0, LHS, RHS, CC)) {
bool isInt = LHS.getValueType().isInteger();
ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
SDValue DAGCombiner::visitSHL(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
- ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
EVT VT = N0.getValueType();
unsigned OpSizeInBits = VT.getScalarSizeInBits();
// fold vector ops
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
if (VT.isVector()) {
SDValue FoldedVOp = SimplifyVBinOp(N);
if (FoldedVOp.getNode()) return FoldedVOp;
}
// fold (shl c1, c2) -> c1<<c2
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
if (N0C && N1C)
return DAG.FoldConstantArithmetic(ISD::SHL, VT, N0C, N1C);
// fold (shl 0, x) -> 0
SDValue DAGCombiner::visitSRA(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
- ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
EVT VT = N0.getValueType();
unsigned OpSizeInBits = VT.getScalarType().getSizeInBits();
// fold vector ops
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
if (VT.isVector()) {
SDValue FoldedVOp = SimplifyVBinOp(N);
if (FoldedVOp.getNode()) return FoldedVOp;
}
// fold (sra c1, c2) -> (sra c1, c2)
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
if (N0C && N1C)
return DAG.FoldConstantArithmetic(ISD::SRA, VT, N0C, N1C);
// fold (sra 0, x) -> 0
SDValue DAGCombiner::visitSRL(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
- ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
EVT VT = N0.getValueType();
unsigned OpSizeInBits = VT.getScalarType().getSizeInBits();
// fold vector ops
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
if (VT.isVector()) {
SDValue FoldedVOp = SimplifyVBinOp(N);
if (FoldedVOp.getNode()) return FoldedVOp;
}
// fold (srl c1, c2) -> c1 >>u c2
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
if (N0C && N1C)
return DAG.FoldConstantArithmetic(ISD::SRL, VT, N0C, N1C);
// fold (srl 0, x) -> 0
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
SDValue N2 = N->getOperand(2);
- ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
- ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
- ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
EVT VT = N->getValueType(0);
EVT VT0 = N0.getValueType();
if (N1 == N2)
return N1;
// fold (select true, X, Y) -> X
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
if (N0C && !N0C->isNullValue())
return N1;
// fold (select false, X, Y) -> Y
if (N0C && N0C->isNullValue())
return N2;
// fold (select C, 1, X) -> (or C, X)
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
if (VT == MVT::i1 && N1C && N1C->getAPIntValue() == 1)
return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N2);
// fold (select C, 0, 1) -> (xor C, 1)
// undiscoverable (or not reasonably discoverable). For example, it could be
// in another basic block or it could require searching a complicated
// expression.
+ ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
if (VT.isInteger() &&
(VT0 == MVT::i1 || (VT0.isInteger() &&
TLI.getBooleanContents(false, false) ==
std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
MachineMemOperand *MMO = DAG.getMachineFunction().
- getMachineMemOperand(MST->getPointerInfo(),
+ getMachineMemOperand(MST->getPointerInfo(),
MachineMemOperand::MOStore, LoMemVT.getStoreSize(),
Alignment, MST->getAAInfo(), MST->getRanges());
DAG.getConstant(IncrementSize, Ptr.getValueType()));
MMO = DAG.getMachineFunction().
- getMachineMemOperand(MST->getPointerInfo(),
+ getMachineMemOperand(MST->getPointerInfo(),
MachineMemOperand::MOStore, HiMemVT.getStoreSize(),
SecondHalfAlignment, MST->getAAInfo(),
MST->getRanges());
std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
MachineMemOperand *MMO = DAG.getMachineFunction().
- getMachineMemOperand(MLD->getPointerInfo(),
+ getMachineMemOperand(MLD->getPointerInfo(),
MachineMemOperand::MOLoad, LoMemVT.getStoreSize(),
Alignment, MLD->getAAInfo(), MLD->getRanges());
DAG.getConstant(IncrementSize, Ptr.getValueType()));
MMO = DAG.getMachineFunction().
- getMachineMemOperand(MLD->getPointerInfo(),
+ getMachineMemOperand(MLD->getPointerInfo(),
MachineMemOperand::MOLoad, HiMemVT.getStoreSize(),
SecondHalfAlignment, MLD->getAAInfo(), MLD->getRanges());
return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BV), VT, Ops);
}
+// Attempt different variants of (fadd (fmul a, b), c) -> fma or fmad
+static SDValue performFaddFmulCombines(unsigned FusedOpcode,
+ bool Aggressive,
+ SDNode *N,
+ const TargetLowering &TLI,
+ SelectionDAG &DAG) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N->getValueType(0);
+
+ // fold (fadd (fmul x, y), z) -> (fma x, y, z)
+ if (N0.getOpcode() == ISD::FMUL &&
+ (Aggressive || N0->hasOneUse())) {
+ return DAG.getNode(FusedOpcode, SDLoc(N), VT,
+ N0.getOperand(0), N0.getOperand(1), N1);
+ }
+
+ // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
+ // Note: Commutes FADD operands.
+ if (N1.getOpcode() == ISD::FMUL &&
+ (Aggressive || N1->hasOneUse())) {
+ return DAG.getNode(FusedOpcode, SDLoc(N), VT,
+ N1.getOperand(0), N1.getOperand(1), N0);
+ }
+
+ // More folding opportunities when target permits.
+ if (Aggressive) {
+ // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z))
+ if (N0.getOpcode() == ISD::FMA &&
+ N0.getOperand(2).getOpcode() == ISD::FMUL) {
+ return DAG.getNode(FusedOpcode, SDLoc(N), VT,
+ N0.getOperand(0), N0.getOperand(1),
+ DAG.getNode(FusedOpcode, SDLoc(N), VT,
+ N0.getOperand(2).getOperand(0),
+ N0.getOperand(2).getOperand(1),
+ N1));
+ }
+
+ // fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x))
+ if (N1->getOpcode() == ISD::FMA &&
+ N1.getOperand(2).getOpcode() == ISD::FMUL) {
+ return DAG.getNode(FusedOpcode, SDLoc(N), VT,
+ N1.getOperand(0), N1.getOperand(1),
+ DAG.getNode(FusedOpcode, SDLoc(N), VT,
+ N1.getOperand(2).getOperand(0),
+ N1.getOperand(2).getOperand(1),
+ N0));
+ }
+ }
+
+ return SDValue();
+}
+
+static SDValue performFsubFmulCombines(unsigned FusedOpcode,
+ bool Aggressive,
+ SDNode *N,
+ const TargetLowering &TLI,
+ SelectionDAG &DAG) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N->getValueType(0);
+
+ SDLoc SL(N);
+
+ // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
+ if (N0.getOpcode() == ISD::FMUL &&
+ (Aggressive || N0->hasOneUse())) {
+ return DAG.getNode(FusedOpcode, SL, VT,
+ N0.getOperand(0), N0.getOperand(1),
+ DAG.getNode(ISD::FNEG, SL, VT, N1));
+ }
+
+ // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
+ // Note: Commutes FSUB operands.
+ if (N1.getOpcode() == ISD::FMUL &&
+ (Aggressive || N1->hasOneUse()))
+ return DAG.getNode(FusedOpcode, SL, VT,
+ DAG.getNode(ISD::FNEG, SL, VT,
+ N1.getOperand(0)),
+ N1.getOperand(1), N0);
+
+ // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
+ if (N0.getOpcode() == ISD::FNEG &&
+ N0.getOperand(0).getOpcode() == ISD::FMUL &&
+ (Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {
+ SDValue N00 = N0.getOperand(0).getOperand(0);
+ SDValue N01 = N0.getOperand(0).getOperand(1);
+ return DAG.getNode(FusedOpcode, SL, VT,
+ DAG.getNode(ISD::FNEG, SL, VT, N00), N01,
+ DAG.getNode(ISD::FNEG, SL, VT, N1));
+ }
+
+ // More folding opportunities when target permits.
+ if (Aggressive) {
+ // fold (fsub (fma x, y, (fmul u, v)), z)
+ // -> (fma x, y (fma u, v, (fneg z)))
+ if (N0.getOpcode() == FusedOpcode &&
+ N0.getOperand(2).getOpcode() == ISD::FMUL) {
+ return DAG.getNode(FusedOpcode, SDLoc(N), VT,
+ N0.getOperand(0), N0.getOperand(1),
+ DAG.getNode(FusedOpcode, SDLoc(N), VT,
+ N0.getOperand(2).getOperand(0),
+ N0.getOperand(2).getOperand(1),
+ DAG.getNode(ISD::FNEG, SDLoc(N), VT,
+ N1)));
+ }
+
+ // fold (fsub x, (fma y, z, (fmul u, v)))
+ // -> (fma (fneg y), z, (fma (fneg u), v, x))
+ if (N1.getOpcode() == FusedOpcode &&
+ N1.getOperand(2).getOpcode() == ISD::FMUL) {
+ SDValue N20 = N1.getOperand(2).getOperand(0);
+ SDValue N21 = N1.getOperand(2).getOperand(1);
+ return DAG.getNode(FusedOpcode, SDLoc(N), VT,
+ DAG.getNode(ISD::FNEG, SDLoc(N), VT,
+ N1.getOperand(0)),
+ N1.getOperand(1),
+ DAG.getNode(FusedOpcode, SDLoc(N), VT,
+ DAG.getNode(ISD::FNEG, SDLoc(N), VT,
+ N20),
+ N21, N0));
+ }
+ }
+
+ return SDValue();
+}
+
SDValue DAGCombiner::visitFADD(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
}
} // enable-unsafe-fp-math
+ if (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT)) {
+ // Assume if there is an fmad instruction that it should be aggressively
+ // used.
+ if (SDValue Fused = performFaddFmulCombines(ISD::FMAD, true, N, TLI, DAG))
+ return Fused;
+ }
+
// FADD -> FMA combines:
if ((Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) &&
TLI.isFMAFasterThanFMulAndFAdd(VT) &&
(!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT))) {
- // fold (fadd (fmul x, y), z) -> (fma x, y, z)
- if (N0.getOpcode() == ISD::FMUL &&
- (N0->hasOneUse() || TLI.enableAggressiveFMAFusion(VT)))
- return DAG.getNode(ISD::FMA, SDLoc(N), VT,
- N0.getOperand(0), N0.getOperand(1), N1);
-
- // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
- // Note: Commutes FADD operands.
- if (N1.getOpcode() == ISD::FMUL &&
- (N1->hasOneUse() || TLI.enableAggressiveFMAFusion(VT)))
- return DAG.getNode(ISD::FMA, SDLoc(N), VT,
- N1.getOperand(0), N1.getOperand(1), N0);
+ if (!TLI.isOperationLegal(ISD::FMAD, VT)) {
+ // Don't form FMA if we are preferring FMAD.
+ if (SDValue Fused
+ = performFaddFmulCombines(ISD::FMA,
+ TLI.enableAggressiveFMAFusion(VT),
+ N, TLI, DAG)) {
+ return Fused;
+ }
+ }
// When FP_EXTEND nodes are free on the target, and there is an opportunity
// to combine into FMA, arrange such nodes accordingly.
N10.getOperand(1)), N0);
}
}
-
- // More folding opportunities when target permits.
- if (TLI.enableAggressiveFMAFusion(VT)) {
-
- // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z))
- if (N0.getOpcode() == ISD::FMA &&
- N0.getOperand(2).getOpcode() == ISD::FMUL)
- return DAG.getNode(ISD::FMA, SDLoc(N), VT,
- N0.getOperand(0), N0.getOperand(1),
- DAG.getNode(ISD::FMA, SDLoc(N), VT,
- N0.getOperand(2).getOperand(0),
- N0.getOperand(2).getOperand(1),
- N1));
-
- // fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x))
- if (N1->getOpcode() == ISD::FMA &&
- N1.getOperand(2).getOpcode() == ISD::FMUL)
- return DAG.getNode(ISD::FMA, SDLoc(N), VT,
- N1.getOperand(0), N1.getOperand(1),
- DAG.getNode(ISD::FMA, SDLoc(N), VT,
- N1.getOperand(2).getOperand(0),
- N1.getOperand(2).getOperand(1),
- N0));
- }
}
return SDValue();
}
}
+ if (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT)) {
+ // Assume if there is an fmad instruction that it should be aggressively
+ // used.
+ if (SDValue Fused = performFsubFmulCombines(ISD::FMAD, true, N, TLI, DAG))
+ return Fused;
+ }
+
// FSUB -> FMA combines:
if ((Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) &&
TLI.isFMAFasterThanFMulAndFAdd(VT) &&
(!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT))) {
- // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
- if (N0.getOpcode() == ISD::FMUL &&
- (N0->hasOneUse() || TLI.enableAggressiveFMAFusion(VT)))
- return DAG.getNode(ISD::FMA, dl, VT,
- N0.getOperand(0), N0.getOperand(1),
- DAG.getNode(ISD::FNEG, dl, VT, N1));
-
- // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
- // Note: Commutes FSUB operands.
- if (N1.getOpcode() == ISD::FMUL &&
- (N1->hasOneUse() || TLI.enableAggressiveFMAFusion(VT)))
- return DAG.getNode(ISD::FMA, dl, VT,
- DAG.getNode(ISD::FNEG, dl, VT,
- N1.getOperand(0)),
- N1.getOperand(1), N0);
-
- // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
- if (N0.getOpcode() == ISD::FNEG &&
- N0.getOperand(0).getOpcode() == ISD::FMUL &&
- ((N0->hasOneUse() && N0.getOperand(0).hasOneUse()) ||
- TLI.enableAggressiveFMAFusion(VT))) {
- SDValue N00 = N0.getOperand(0).getOperand(0);
- SDValue N01 = N0.getOperand(0).getOperand(1);
- return DAG.getNode(ISD::FMA, dl, VT,
- DAG.getNode(ISD::FNEG, dl, VT, N00), N01,
- DAG.getNode(ISD::FNEG, dl, VT, N1));
+ if (!TLI.isOperationLegal(ISD::FMAD, VT)) {
+ // Don't form FMA if we are preferring FMAD.
+
+ if (SDValue Fused
+ = performFsubFmulCombines(ISD::FMA,
+ TLI.enableAggressiveFMAFusion(VT),
+ N, TLI, DAG)) {
+ return Fused;
+ }
}
// When FP_EXTEND nodes are free on the target, and there is an opportunity
// to combine into FMA, arrange such nodes accordingly.
if (TLI.isFPExtFree(VT)) {
-
// fold (fsub (fpext (fmul x, y)), z)
// -> (fma (fpext x), (fpext y), (fneg z))
if (N0.getOpcode() == ISD::FP_EXTEND) {
}
}
}
-
- // More folding opportunities when target permits.
- if (TLI.enableAggressiveFMAFusion(VT)) {
-
- // fold (fsub (fma x, y, (fmul u, v)), z)
- // -> (fma x, y (fma u, v, (fneg z)))
- if (N0.getOpcode() == ISD::FMA &&
- N0.getOperand(2).getOpcode() == ISD::FMUL)
- return DAG.getNode(ISD::FMA, SDLoc(N), VT,
- N0.getOperand(0), N0.getOperand(1),
- DAG.getNode(ISD::FMA, SDLoc(N), VT,
- N0.getOperand(2).getOperand(0),
- N0.getOperand(2).getOperand(1),
- DAG.getNode(ISD::FNEG, SDLoc(N), VT,
- N1)));
-
- // fold (fsub x, (fma y, z, (fmul u, v)))
- // -> (fma (fneg y), z, (fma (fneg u), v, x))
- if (N1.getOpcode() == ISD::FMA &&
- N1.getOperand(2).getOpcode() == ISD::FMUL) {
- SDValue N20 = N1.getOperand(2).getOperand(0);
- SDValue N21 = N1.getOperand(2).getOperand(1);
- return DAG.getNode(ISD::FMA, SDLoc(N), VT,
- DAG.getNode(ISD::FNEG, SDLoc(N), VT,
- N1.getOperand(0)),
- N1.getOperand(1),
- DAG.getNode(ISD::FMA, SDLoc(N), VT,
- DAG.getNode(ISD::FNEG, SDLoc(N), VT,
- N20),
- N21, N0));
- }
- }
}
return SDValue();
SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
SDValue N0 = N->getOperand(0);
- ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
EVT VT = N->getValueType(0);
EVT OpVT = N0.getValueType();
// fold (sint_to_fp c1) -> c1fp
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
if (N0C &&
// ...but only if the target supports immediate floating-point values
(!LegalOperations ||
SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
SDValue N0 = N->getOperand(0);
- ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
EVT VT = N->getValueType(0);
EVT OpVT = N0.getValueType();
// fold (uint_to_fp c1) -> c1fp
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
if (N0C &&
// ...but only if the target supports immediate floating-point values
(!LegalOperations ||
// Make sure we have something to merge.
if (NumElem < 2)
return false;
-
+
int64_t ElementSizeBytes = MemVT.getSizeInBits() / 8;
LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
unsigned EarliestNodeUsed = 0;
-
+
for (unsigned i=0; i < NumElem; ++i) {
// Find a chain for the new wide-store operand. Notice that some
// of the store nodes that we found may not be selected for inclusion
if (StoreNodes[i].SequenceNum > StoreNodes[EarliestNodeUsed].SequenceNum)
EarliestNodeUsed = i;
}
-
+
// The earliest Node in the DAG.
LSBaseSDNode *EarliestOp = StoreNodes[EarliestNodeUsed].MemNode;
SDLoc DL(StoreNodes[0].MemNode);
-
+
SDValue StoredVal;
if (UseVector) {
// Find a legal type for the vector store.
return false;
Ops.push_back(Val);
}
-
+
// Build the extracted vector elements back into a vector.
StoredVal = DAG.getNode(ISD::BUILD_VECTOR, DL, Ty, Ops);
}
unsigned StoreBW = NumElem * ElementSizeBytes * 8;
APInt StoreInt(StoreBW, 0);
-
+
// Construct a single integer constant which is made of the smaller
// constant inputs.
bool IsLE = TLI.isLittleEndian();
llvm_unreachable("Invalid constant element type");
}
}
-
+
// Create the new Load and Store operations.
EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW);
StoredVal = DAG.getConstant(StoreInt, StoreTy);
}
-
+
SDValue NewStore = DAG.getStore(EarliestOp->getChain(), DL, StoredVal,
FirstInChain->getBasePtr(),
FirstInChain->getPointerInfo(),
false, false,
FirstInChain->getAlignment());
-
+
// Replace the first store with the new store
CombineTo(EarliestOp, NewStore);
// Erase all other stores.
DAG.ReplaceAllUsesWith(SDValue(St, 0), St->getChain());
deleteAndRecombine(St);
}
-
+
return true;
}
bool IsConstantSrc = isa<ConstantSDNode>(StoredVal) ||
isa<ConstantFPSDNode>(StoredVal);
bool IsExtractVecEltSrc = (StoredVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT);
-
+
if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecEltSrc)
return false;
// consecutive loads).
if (StoredVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
return false;
-
+
// Find a legal type for the vector store.
EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1);
if (TLI.isTypeLegal(Ty))
if (!TLI.isOperationLegalOrCustom(Opcode, NVT))
return SDValue();
+ // Just because the floating-point vector type is legal does not necessarily
+ // mean that the corresponding integer vector type is.
+ if (!isTypeLegal(NVT))
+ return SDValue();
+
SmallVector<SDValue, 8> Opnds;
for (unsigned i = 0; i != NumInScalars; ++i) {
SDValue In = N->getOperand(i);
if (!TLI.isExtractSubvectorCheap(VT, VT.getVectorNumElements()))
return SDValue();
-
+
// Try to replace VecIn1 with two extract_subvectors
// No need to update the masks, they should still be correct.
- VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, VecIn1,
+ VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, VecIn1,
DAG.getConstant(VT.getVectorNumElements(), TLI.getVectorIdxTy()));
VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, VecIn1,
DAG.getConstant(0, TLI.getVectorIdxTy()));
}
}
+ // Fold any combination of BUILD_VECTOR or UNDEF nodes into one BUILD_VECTOR.
+ // We have already tested above for an UNDEF only concatenation.
// fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...))
// -> (BUILD_VECTOR A, B, ..., C, D, ...)
- if (N->getNumOperands() == 2 &&
- N->getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
- N->getOperand(1).getOpcode() == ISD::BUILD_VECTOR) {
- EVT VT = N->getValueType(0);
- SDValue N0 = N->getOperand(0);
- SDValue N1 = N->getOperand(1);
+ auto IsBuildVectorOrUndef = [](const SDValue &Op) {
+ return ISD::UNDEF == Op.getOpcode() || ISD::BUILD_VECTOR == Op.getOpcode();
+ };
+ bool AllBuildVectorsOrUndefs =
+ std::all_of(N->op_begin(), N->op_end(), IsBuildVectorOrUndef);
+ if (AllBuildVectorsOrUndefs) {
SmallVector<SDValue, 8> Opnds;
- unsigned BuildVecNumElts = N0.getNumOperands();
-
- EVT SclTy0 = N0.getOperand(0)->getValueType(0);
- EVT SclTy1 = N1.getOperand(0)->getValueType(0);
- if (SclTy0.isFloatingPoint()) {
- for (unsigned i = 0; i != BuildVecNumElts; ++i)
- Opnds.push_back(N0.getOperand(i));
- for (unsigned i = 0; i != BuildVecNumElts; ++i)
- Opnds.push_back(N1.getOperand(i));
- } else {
+ EVT SVT = VT.getScalarType();
+
+ EVT MinVT = SVT;
+ if (!SVT.isFloatingPoint()) {
// If BUILD_VECTOR are from built from integer, they may have different
- // operand types. Get the smaller type and truncate all operands to it.
- EVT MinTy = SclTy0.bitsLE(SclTy1) ? SclTy0 : SclTy1;
- for (unsigned i = 0; i != BuildVecNumElts; ++i)
- Opnds.push_back(DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinTy,
- N0.getOperand(i)));
- for (unsigned i = 0; i != BuildVecNumElts; ++i)
- Opnds.push_back(DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinTy,
- N1.getOperand(i)));
+ // operand types. Get the smallest type and truncate all operands to it.
+ bool FoundMinVT = false;
+ for (const SDValue &Op : N->ops())
+ if (ISD::BUILD_VECTOR == Op.getOpcode()) {
+ EVT OpSVT = Op.getOperand(0)->getValueType(0);
+ MinVT = (!FoundMinVT || OpSVT.bitsLE(MinVT)) ? OpSVT : MinVT;
+ FoundMinVT = true;
+ }
+ assert(FoundMinVT && "Concat vector type mismatch");
}
+ for (const SDValue &Op : N->ops()) {
+ EVT OpVT = Op.getValueType();
+ unsigned NumElts = OpVT.getVectorNumElements();
+
+ if (ISD::UNDEF == Op.getOpcode())
+ for (unsigned i = 0; i != NumElts; ++i)
+ Opnds.push_back(DAG.getUNDEF(MinVT));
+
+ if (ISD::BUILD_VECTOR == Op.getOpcode()) {
+ if (SVT.isFloatingPoint()) {
+ assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch");
+ for (unsigned i = 0; i != NumElts; ++i)
+ Opnds.push_back(Op.getOperand(i));
+ } else {
+ for (unsigned i = 0; i != NumElts; ++i)
+ Opnds.push_back(
+ DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinVT, Op.getOperand(i)));
+ }
+ }
+ }
+
+ assert(VT.getVectorNumElements() == Opnds.size() &&
+ "Concat vector type mismatch");
return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Opnds);
}
if (AllSame)
return N0;
- // If the splatted element is a constant, just build the vector out of
- // constants directly.
+ // Canonicalize any other splat as a build_vector.
const SDValue &Splatted = V->getOperand(SVN->getSplatIndex());
- if (isa<ConstantSDNode>(Splatted) || isa<ConstantFPSDNode>(Splatted)) {
- SmallVector<SDValue, 8> Ops(NumElts, Splatted);
- SDValue NewBV = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N),
- V->getValueType(0), Ops);
-
- // We may have jumped through bitcasts, so the type of the
- // BUILD_VECTOR may not match the type of the shuffle.
- if (V->getValueType(0) != VT)
- NewBV = DAG.getNode(ISD::BITCAST, SDLoc(N), VT, NewBV);
- return NewBV;
- }
+ SmallVector<SDValue, 8> Ops(NumElts, Splatted);
+ SDValue NewBV = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N),
+ V->getValueType(0), Ops);
+
+ // We may have jumped through bitcasts, so the type of the
+ // BUILD_VECTOR may not match the type of the shuffle.
+ if (V->getValueType(0) != VT)
+ NewBV = DAG.getNode(ISD::BITCAST, SDLoc(N), VT, NewBV);
+ return NewBV;
}
}
if (!TLI.isShuffleMaskLegal(Mask, VT))
return SDValue();
-
+
// shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2)
// shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2)
// shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2)
return SDValue();
}
- // Let's see if the target supports this vector_shuffle.
+ // Let's see if the target supports this vector_shuffle and make sure
+ // we're not running after operation legalization where it may have
+ // custom lowered the vector shuffles.
EVT RVT = RHS.getValueType();
- if (!TLI.isVectorClearMaskLegal(Indices, RVT))
+ if (LegalOperations || !TLI.isVectorClearMaskLegal(Indices, RVT))
return SDValue();
// Return the new VECTOR_SHUFFLE node.