X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;ds=sidebyside;f=lib%2FCodeGen%2FSelectionDAG%2FDAGCombiner.cpp;h=0437f59b4fefccb14930006c41b569eb66832469;hb=41cda401577bdc8b2d357199671dd6e038f0c83a;hp=92164e925435612952bd9631b330d8167f834920;hpb=1a50a12b432e59a76651895a147ef852fc121f45;p=oota-llvm.git diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 92164e92543..0437f59b4fe 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -385,7 +385,7 @@ namespace { bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl &StoreNodes, EVT MemVT, unsigned NumElem, bool IsConstantSrc, bool UseVector); - + /// Merge consecutive store operations into a wide store. /// This optimization uses wide integers or vectors when possible. /// \return True if some memory operations were changed. @@ -466,7 +466,7 @@ void TargetLowering::DAGCombinerInfo::RemoveFromWorklist(SDNode *N) { } SDValue TargetLowering::DAGCombinerInfo:: -CombineTo(SDNode *N, const std::vector &To, bool AddTo) { +CombineTo(SDNode *N, ArrayRef To, bool AddTo) { return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo); } @@ -1565,8 +1565,6 @@ SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) { SDValue DAGCombiner::visitADD(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - ConstantSDNode *N0C = dyn_cast(N0); - ConstantSDNode *N1C = dyn_cast(N1); EVT VT = N0.getValueType(); // fold vector ops @@ -1587,6 +1585,8 @@ SDValue DAGCombiner::visitADD(SDNode *N) { if (N1.getOpcode() == ISD::UNDEF) return N1; // fold (add c1, c2) -> c1+c2 + ConstantSDNode *N0C = dyn_cast(N0); + ConstantSDNode *N1C = dyn_cast(N1); if (N0C && N1C) return DAG.FoldConstantArithmetic(ISD::ADD, VT, N0C, N1C); // canonicalize constant to RHS @@ -1738,8 +1738,6 @@ SDValue DAGCombiner::visitADD(SDNode *N) { SDValue DAGCombiner::visitADDC(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - ConstantSDNode *N0C = dyn_cast(N0); - ConstantSDNode *N1C = dyn_cast(N1); EVT VT = N0.getValueType(); // If the flag result is dead, turn this into an ADD. @@ -1749,6 +1747,8 @@ SDValue DAGCombiner::visitADDC(SDNode *N) { SDLoc(N), MVT::Glue)); // canonicalize constant to RHS. + ConstantSDNode *N0C = dyn_cast(N0); + ConstantSDNode *N1C = dyn_cast(N1); if (N0C && !N1C) return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N1, N0); @@ -1780,10 +1780,10 @@ SDValue DAGCombiner::visitADDE(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); SDValue CarryIn = N->getOperand(2); - ConstantSDNode *N0C = dyn_cast(N0); - ConstantSDNode *N1C = dyn_cast(N1); // canonicalize constant to RHS + ConstantSDNode *N0C = dyn_cast(N0); + ConstantSDNode *N1C = dyn_cast(N1); if (N0C && !N1C) return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(), N1, N0, CarryIn); @@ -1810,10 +1810,6 @@ static SDValue tryFoldToZero(SDLoc DL, const TargetLowering &TLI, EVT VT, SDValue DAGCombiner::visitSUB(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - ConstantSDNode *N0C = dyn_cast(N0.getNode()); - ConstantSDNode *N1C = dyn_cast(N1.getNode()); - ConstantSDNode *N1C1 = N1.getOpcode() != ISD::ADD ? nullptr : - dyn_cast(N1.getOperand(1).getNode()); EVT VT = N0.getValueType(); // fold vector ops @@ -1831,6 +1827,8 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { if (N0 == N1) return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes); // fold (sub c1, c2) -> c1-c2 + ConstantSDNode *N0C = dyn_cast(N0.getNode()); + ConstantSDNode *N1C = dyn_cast(N1.getNode()); if (N0C && N1C) return DAG.FoldConstantArithmetic(ISD::SUB, VT, N0C, N1C); // fold (sub x, c) -> (add x, -c) @@ -1850,6 +1848,8 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1) return N0.getOperand(0); // fold C2-(A+C1) -> (C2-C1)-A + ConstantSDNode *N1C1 = N1.getOpcode() != ISD::ADD ? nullptr : + dyn_cast(N1.getOperand(1).getNode()); if (N1.getOpcode() == ISD::ADD && N0C && N1C1) { SDValue NewC = DAG.getConstant(N0C->getAPIntValue() - N1C1->getAPIntValue(), VT); @@ -1914,8 +1914,6 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { SDValue DAGCombiner::visitSUBC(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - ConstantSDNode *N0C = dyn_cast(N0); - ConstantSDNode *N1C = dyn_cast(N1); EVT VT = N0.getValueType(); // If the flag result is dead, turn this into an SUB. @@ -1931,6 +1929,8 @@ SDValue DAGCombiner::visitSUBC(SDNode *N) { MVT::Glue)); // fold (subc x, 0) -> x + no borrow + ConstantSDNode *N0C = dyn_cast(N0); + ConstantSDNode *N1C = dyn_cast(N1); if (N1C && N1C->isNullValue()) return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, SDLoc(N), MVT::Glue)); @@ -2079,8 +2079,6 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { SDValue DAGCombiner::visitSDIV(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - ConstantSDNode *N0C = isConstOrConstSplat(N0); - ConstantSDNode *N1C = isConstOrConstSplat(N1); EVT VT = N->getValueType(0); // fold vector ops @@ -2090,6 +2088,8 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { } // fold (sdiv c1, c2) -> c1/c2 + ConstantSDNode *N0C = isConstOrConstSplat(N0); + ConstantSDNode *N1C = isConstOrConstSplat(N1); if (N0C && N1C && !N1C->isNullValue()) return DAG.FoldConstantArithmetic(ISD::SDIV, VT, N0C, N1C); // fold (sdiv X, 1) -> X @@ -2169,8 +2169,6 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { SDValue DAGCombiner::visitUDIV(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - ConstantSDNode *N0C = isConstOrConstSplat(N0); - ConstantSDNode *N1C = isConstOrConstSplat(N1); EVT VT = N->getValueType(0); // fold vector ops @@ -2180,6 +2178,8 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) { } // fold (udiv c1, c2) -> c1/c2 + ConstantSDNode *N0C = isConstOrConstSplat(N0); + ConstantSDNode *N1C = isConstOrConstSplat(N1); if (N0C && N1C && !N1C->isNullValue()) return DAG.FoldConstantArithmetic(ISD::UDIV, VT, N0C, N1C); // fold (udiv x, (1 << c)) -> x >>u c @@ -2221,11 +2221,11 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) { SDValue DAGCombiner::visitSREM(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - ConstantSDNode *N0C = isConstOrConstSplat(N0); - ConstantSDNode *N1C = isConstOrConstSplat(N1); EVT VT = N->getValueType(0); // fold (srem c1, c2) -> c1%c2 + ConstantSDNode *N0C = isConstOrConstSplat(N0); + ConstantSDNode *N1C = isConstOrConstSplat(N1); if (N0C && N1C && !N1C->isNullValue()) return DAG.FoldConstantArithmetic(ISD::SREM, VT, N0C, N1C); // If we know the sign bits of both operands are zero, strength reduce to a @@ -2263,11 +2263,11 @@ SDValue DAGCombiner::visitSREM(SDNode *N) { SDValue DAGCombiner::visitUREM(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - ConstantSDNode *N0C = isConstOrConstSplat(N0); - ConstantSDNode *N1C = isConstOrConstSplat(N1); EVT VT = N->getValueType(0); // fold (urem c1, c2) -> c1%c2 + ConstantSDNode *N0C = isConstOrConstSplat(N0); + ConstantSDNode *N1C = isConstOrConstSplat(N1); if (N0C && N1C && !N1C->isNullValue()) return DAG.FoldConstantArithmetic(ISD::UREM, VT, N0C, N1C); // fold (urem x, pow2) -> (and x, pow2-1) @@ -2688,11 +2688,7 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { SDValue DAGCombiner::visitAND(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - SDValue LL, LR, RL, RR, CC0, CC1; - ConstantSDNode *N0C = dyn_cast(N0); - ConstantSDNode *N1C = dyn_cast(N1); EVT VT = N1.getValueType(); - unsigned BitWidth = VT.getScalarType().getSizeInBits(); // fold vector ops if (VT.isVector()) { @@ -2724,6 +2720,8 @@ SDValue DAGCombiner::visitAND(SDNode *N) { if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) return DAG.getConstant(0, VT); // fold (and c1, c2) -> c1&c2 + ConstantSDNode *N0C = dyn_cast(N0); + ConstantSDNode *N1C = dyn_cast(N1); if (N0C && N1C) return DAG.FoldConstantArithmetic(ISD::AND, VT, N0C, N1C); // canonicalize constant to RHS @@ -2733,6 +2731,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { if (N1C && N1C->isAllOnesValue()) return N0; // if (and x, c) is known to be zero, return 0 + unsigned BitWidth = VT.getScalarType().getSizeInBits(); if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnesValue(BitWidth))) return DAG.getConstant(0, VT); @@ -2865,6 +2864,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { } } // fold (and (setcc x), (setcc y)) -> (setcc (and x, y)) + SDValue LL, LR, RL, RR, CC0, CC1; if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){ ISD::CondCode Op0 = cast(CC0)->get(); ISD::CondCode Op1 = cast(CC1)->get(); @@ -3341,9 +3341,6 @@ SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) { SDValue DAGCombiner::visitOR(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - SDValue LL, LR, RL, RR, CC0, CC1; - ConstantSDNode *N0C = dyn_cast(N0); - ConstantSDNode *N1C = dyn_cast(N1); EVT VT = N1.getValueType(); // fold vector ops @@ -3435,6 +3432,8 @@ SDValue DAGCombiner::visitOR(SDNode *N) { return DAG.getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), VT); } // fold (or c1, c2) -> c1|c2 + ConstantSDNode *N0C = dyn_cast(N0); + ConstantSDNode *N1C = dyn_cast(N1); if (N0C && N1C) return DAG.FoldConstantArithmetic(ISD::OR, VT, N0C, N1C); // canonicalize constant to RHS @@ -3476,6 +3475,7 @@ SDValue DAGCombiner::visitOR(SDNode *N) { } } // fold (or (setcc x), (setcc y)) -> (setcc (or x, y)) + SDValue LL, LR, RL, RR, CC0, CC1; if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){ ISD::CondCode Op0 = cast(CC0)->get(); ISD::CondCode Op1 = cast(CC1)->get(); @@ -3828,9 +3828,6 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) { SDValue DAGCombiner::visitXOR(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - SDValue LHS, RHS, CC; - ConstantSDNode *N0C = dyn_cast(N0); - ConstantSDNode *N1C = dyn_cast(N1); EVT VT = N0.getValueType(); // fold vector ops @@ -3854,6 +3851,8 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { if (N1.getOpcode() == ISD::UNDEF) return N1; // fold (xor c1, c2) -> c1^c2 + ConstantSDNode *N0C = dyn_cast(N0); + ConstantSDNode *N1C = dyn_cast(N1); if (N0C && N1C) return DAG.FoldConstantArithmetic(ISD::XOR, VT, N0C, N1C); // canonicalize constant to RHS @@ -3868,6 +3867,7 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { return RXOR; // fold !(x cc y) -> (x !cc y) + SDValue LHS, RHS, CC; if (TLI.isConstTrueVal(N1.getNode()) && isSetCCEquivalent(N0, LHS, RHS, CC)) { bool isInt = LHS.getValueType().isInteger(); ISD::CondCode NotCC = ISD::getSetCCInverse(cast(CC)->get(), @@ -4077,12 +4077,11 @@ SDValue DAGCombiner::visitRotate(SDNode *N) { SDValue DAGCombiner::visitSHL(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - ConstantSDNode *N0C = dyn_cast(N0); - ConstantSDNode *N1C = dyn_cast(N1); EVT VT = N0.getValueType(); unsigned OpSizeInBits = VT.getScalarSizeInBits(); // fold vector ops + ConstantSDNode *N1C = dyn_cast(N1); if (VT.isVector()) { SDValue FoldedVOp = SimplifyVBinOp(N); if (FoldedVOp.getNode()) return FoldedVOp; @@ -4109,6 +4108,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { } // fold (shl c1, c2) -> c1<(N0); if (N0C && N1C) return DAG.FoldConstantArithmetic(ISD::SHL, VT, N0C, N1C); // fold (shl 0, x) -> 0 @@ -4257,12 +4257,11 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { SDValue DAGCombiner::visitSRA(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - ConstantSDNode *N0C = dyn_cast(N0); - ConstantSDNode *N1C = dyn_cast(N1); EVT VT = N0.getValueType(); unsigned OpSizeInBits = VT.getScalarType().getSizeInBits(); // fold vector ops + ConstantSDNode *N1C = dyn_cast(N1); if (VT.isVector()) { SDValue FoldedVOp = SimplifyVBinOp(N); if (FoldedVOp.getNode()) return FoldedVOp; @@ -4271,6 +4270,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { } // fold (sra c1, c2) -> (sra c1, c2) + ConstantSDNode *N0C = dyn_cast(N0); if (N0C && N1C) return DAG.FoldConstantArithmetic(ISD::SRA, VT, N0C, N1C); // fold (sra 0, x) -> 0 @@ -4403,12 +4403,11 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { SDValue DAGCombiner::visitSRL(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - ConstantSDNode *N0C = dyn_cast(N0); - ConstantSDNode *N1C = dyn_cast(N1); EVT VT = N0.getValueType(); unsigned OpSizeInBits = VT.getScalarType().getSizeInBits(); // fold vector ops + ConstantSDNode *N1C = dyn_cast(N1); if (VT.isVector()) { SDValue FoldedVOp = SimplifyVBinOp(N); if (FoldedVOp.getNode()) return FoldedVOp; @@ -4417,6 +4416,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { } // fold (srl c1, c2) -> c1 >>u c2 + ConstantSDNode *N0C = dyn_cast(N0); if (N0C && N1C) return DAG.FoldConstantArithmetic(ISD::SRL, VT, N0C, N1C); // fold (srl 0, x) -> 0 @@ -4686,9 +4686,6 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); SDValue N2 = N->getOperand(2); - ConstantSDNode *N0C = dyn_cast(N0); - ConstantSDNode *N1C = dyn_cast(N1); - ConstantSDNode *N2C = dyn_cast(N2); EVT VT = N->getValueType(0); EVT VT0 = N0.getValueType(); @@ -4696,12 +4693,14 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { if (N1 == N2) return N1; // fold (select true, X, Y) -> X + ConstantSDNode *N0C = dyn_cast(N0); if (N0C && !N0C->isNullValue()) return N1; // fold (select false, X, Y) -> Y if (N0C && N0C->isNullValue()) return N2; // fold (select C, 1, X) -> (or C, X) + ConstantSDNode *N1C = dyn_cast(N1); if (VT == MVT::i1 && N1C && N1C->getAPIntValue() == 1) return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N2); // fold (select C, 0, 1) -> (xor C, 1) @@ -4713,6 +4712,7 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { // undiscoverable (or not reasonably discoverable). For example, it could be // in another basic block or it could require searching a complicated // expression. + ConstantSDNode *N2C = dyn_cast(N2); if (VT.isInteger() && (VT0 == MVT::i1 || (VT0.isInteger() && TLI.getBooleanContents(false, false) == @@ -4913,7 +4913,7 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) { std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL); MachineMemOperand *MMO = DAG.getMachineFunction(). - getMachineMemOperand(MST->getPointerInfo(), + getMachineMemOperand(MST->getPointerInfo(), MachineMemOperand::MOStore, LoMemVT.getStoreSize(), Alignment, MST->getAAInfo(), MST->getRanges()); @@ -4925,7 +4925,7 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) { DAG.getConstant(IncrementSize, Ptr.getValueType())); MMO = DAG.getMachineFunction(). - getMachineMemOperand(MST->getPointerInfo(), + getMachineMemOperand(MST->getPointerInfo(), MachineMemOperand::MOStore, HiMemVT.getStoreSize(), SecondHalfAlignment, MST->getAAInfo(), MST->getRanges()); @@ -4988,7 +4988,7 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) { std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); MachineMemOperand *MMO = DAG.getMachineFunction(). - getMachineMemOperand(MLD->getPointerInfo(), + getMachineMemOperand(MLD->getPointerInfo(), MachineMemOperand::MOLoad, LoMemVT.getStoreSize(), Alignment, MLD->getAAInfo(), MLD->getRanges()); @@ -5000,7 +5000,7 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) { DAG.getConstant(IncrementSize, Ptr.getValueType())); MMO = DAG.getMachineFunction(). - getMachineMemOperand(MLD->getPointerInfo(), + getMachineMemOperand(MLD->getPointerInfo(), MachineMemOperand::MOLoad, HiMemVT.getStoreSize(), SecondHalfAlignment, MLD->getAAInfo(), MLD->getRanges()); @@ -6938,6 +6938,133 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BV), VT, Ops); } +// Attempt different variants of (fadd (fmul a, b), c) -> fma or fmad +static SDValue performFaddFmulCombines(unsigned FusedOpcode, + bool Aggressive, + SDNode *N, + const TargetLowering &TLI, + SelectionDAG &DAG) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + EVT VT = N->getValueType(0); + + // fold (fadd (fmul x, y), z) -> (fma x, y, z) + if (N0.getOpcode() == ISD::FMUL && + (Aggressive || N0->hasOneUse())) { + return DAG.getNode(FusedOpcode, SDLoc(N), VT, + N0.getOperand(0), N0.getOperand(1), N1); + } + + // fold (fadd x, (fmul y, z)) -> (fma y, z, x) + // Note: Commutes FADD operands. + if (N1.getOpcode() == ISD::FMUL && + (Aggressive || N1->hasOneUse())) { + return DAG.getNode(FusedOpcode, SDLoc(N), VT, + N1.getOperand(0), N1.getOperand(1), N0); + } + + // More folding opportunities when target permits. + if (Aggressive) { + // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z)) + if (N0.getOpcode() == ISD::FMA && + N0.getOperand(2).getOpcode() == ISD::FMUL) { + return DAG.getNode(FusedOpcode, SDLoc(N), VT, + N0.getOperand(0), N0.getOperand(1), + DAG.getNode(FusedOpcode, SDLoc(N), VT, + N0.getOperand(2).getOperand(0), + N0.getOperand(2).getOperand(1), + N1)); + } + + // fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x)) + if (N1->getOpcode() == ISD::FMA && + N1.getOperand(2).getOpcode() == ISD::FMUL) { + return DAG.getNode(FusedOpcode, SDLoc(N), VT, + N1.getOperand(0), N1.getOperand(1), + DAG.getNode(FusedOpcode, SDLoc(N), VT, + N1.getOperand(2).getOperand(0), + N1.getOperand(2).getOperand(1), + N0)); + } + } + + return SDValue(); +} + +static SDValue performFsubFmulCombines(unsigned FusedOpcode, + bool Aggressive, + SDNode *N, + const TargetLowering &TLI, + SelectionDAG &DAG) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + EVT VT = N->getValueType(0); + + SDLoc SL(N); + + // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z)) + if (N0.getOpcode() == ISD::FMUL && + (Aggressive || N0->hasOneUse())) { + return DAG.getNode(FusedOpcode, SL, VT, + N0.getOperand(0), N0.getOperand(1), + DAG.getNode(ISD::FNEG, SL, VT, N1)); + } + + // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x) + // Note: Commutes FSUB operands. + if (N1.getOpcode() == ISD::FMUL && + (Aggressive || N1->hasOneUse())) + return DAG.getNode(FusedOpcode, SL, VT, + DAG.getNode(ISD::FNEG, SL, VT, + N1.getOperand(0)), + N1.getOperand(1), N0); + + // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z)) + if (N0.getOpcode() == ISD::FNEG && + N0.getOperand(0).getOpcode() == ISD::FMUL && + (Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) { + SDValue N00 = N0.getOperand(0).getOperand(0); + SDValue N01 = N0.getOperand(0).getOperand(1); + return DAG.getNode(FusedOpcode, SL, VT, + DAG.getNode(ISD::FNEG, SL, VT, N00), N01, + DAG.getNode(ISD::FNEG, SL, VT, N1)); + } + + // More folding opportunities when target permits. + if (Aggressive) { + // fold (fsub (fma x, y, (fmul u, v)), z) + // -> (fma x, y (fma u, v, (fneg z))) + if (N0.getOpcode() == FusedOpcode && + N0.getOperand(2).getOpcode() == ISD::FMUL) { + return DAG.getNode(FusedOpcode, SDLoc(N), VT, + N0.getOperand(0), N0.getOperand(1), + DAG.getNode(FusedOpcode, SDLoc(N), VT, + N0.getOperand(2).getOperand(0), + N0.getOperand(2).getOperand(1), + DAG.getNode(ISD::FNEG, SDLoc(N), VT, + N1))); + } + + // fold (fsub x, (fma y, z, (fmul u, v))) + // -> (fma (fneg y), z, (fma (fneg u), v, x)) + if (N1.getOpcode() == FusedOpcode && + N1.getOperand(2).getOpcode() == ISD::FMUL) { + SDValue N20 = N1.getOperand(2).getOperand(0); + SDValue N21 = N1.getOperand(2).getOperand(1); + return DAG.getNode(FusedOpcode, SDLoc(N), VT, + DAG.getNode(ISD::FNEG, SDLoc(N), VT, + N1.getOperand(0)), + N1.getOperand(1), + DAG.getNode(FusedOpcode, SDLoc(N), VT, + DAG.getNode(ISD::FNEG, SDLoc(N), VT, + N20), + N21, N0)); + } + } + + return SDValue(); +} + SDValue DAGCombiner::visitFADD(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -7077,23 +7204,27 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { } } // enable-unsafe-fp-math + if (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT)) { + // Assume if there is an fmad instruction that it should be aggressively + // used. + if (SDValue Fused = performFaddFmulCombines(ISD::FMAD, true, N, TLI, DAG)) + return Fused; + } + // FADD -> FMA combines: if ((Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) && TLI.isFMAFasterThanFMulAndFAdd(VT) && (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT))) { - // fold (fadd (fmul x, y), z) -> (fma x, y, z) - if (N0.getOpcode() == ISD::FMUL && - (N0->hasOneUse() || TLI.enableAggressiveFMAFusion(VT))) - return DAG.getNode(ISD::FMA, SDLoc(N), VT, - N0.getOperand(0), N0.getOperand(1), N1); - - // fold (fadd x, (fmul y, z)) -> (fma y, z, x) - // Note: Commutes FADD operands. - if (N1.getOpcode() == ISD::FMUL && - (N1->hasOneUse() || TLI.enableAggressiveFMAFusion(VT))) - return DAG.getNode(ISD::FMA, SDLoc(N), VT, - N1.getOperand(0), N1.getOperand(1), N0); + if (!TLI.isOperationLegal(ISD::FMAD, VT)) { + // Don't form FMA if we are preferring FMAD. + if (SDValue Fused + = performFaddFmulCombines(ISD::FMA, + TLI.enableAggressiveFMAFusion(VT), + N, TLI, DAG)) { + return Fused; + } + } // When FP_EXTEND nodes are free on the target, and there is an opportunity // to combine into FMA, arrange such nodes accordingly. @@ -7122,30 +7253,6 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { N10.getOperand(1)), N0); } } - - // More folding opportunities when target permits. - if (TLI.enableAggressiveFMAFusion(VT)) { - - // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z)) - if (N0.getOpcode() == ISD::FMA && - N0.getOperand(2).getOpcode() == ISD::FMUL) - return DAG.getNode(ISD::FMA, SDLoc(N), VT, - N0.getOperand(0), N0.getOperand(1), - DAG.getNode(ISD::FMA, SDLoc(N), VT, - N0.getOperand(2).getOperand(0), - N0.getOperand(2).getOperand(1), - N1)); - - // fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x)) - if (N1->getOpcode() == ISD::FMA && - N1.getOperand(2).getOpcode() == ISD::FMUL) - return DAG.getNode(ISD::FMA, SDLoc(N), VT, - N1.getOperand(0), N1.getOperand(1), - DAG.getNode(ISD::FMA, SDLoc(N), VT, - N1.getOperand(2).getOperand(0), - N1.getOperand(2).getOperand(1), - N0)); - } } return SDValue(); @@ -7207,43 +7314,32 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { } } + if (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT)) { + // Assume if there is an fmad instruction that it should be aggressively + // used. + if (SDValue Fused = performFsubFmulCombines(ISD::FMAD, true, N, TLI, DAG)) + return Fused; + } + // FSUB -> FMA combines: if ((Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) && TLI.isFMAFasterThanFMulAndFAdd(VT) && (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT))) { - // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z)) - if (N0.getOpcode() == ISD::FMUL && - (N0->hasOneUse() || TLI.enableAggressiveFMAFusion(VT))) - return DAG.getNode(ISD::FMA, dl, VT, - N0.getOperand(0), N0.getOperand(1), - DAG.getNode(ISD::FNEG, dl, VT, N1)); - - // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x) - // Note: Commutes FSUB operands. - if (N1.getOpcode() == ISD::FMUL && - (N1->hasOneUse() || TLI.enableAggressiveFMAFusion(VT))) - return DAG.getNode(ISD::FMA, dl, VT, - DAG.getNode(ISD::FNEG, dl, VT, - N1.getOperand(0)), - N1.getOperand(1), N0); - - // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z)) - if (N0.getOpcode() == ISD::FNEG && - N0.getOperand(0).getOpcode() == ISD::FMUL && - ((N0->hasOneUse() && N0.getOperand(0).hasOneUse()) || - TLI.enableAggressiveFMAFusion(VT))) { - SDValue N00 = N0.getOperand(0).getOperand(0); - SDValue N01 = N0.getOperand(0).getOperand(1); - return DAG.getNode(ISD::FMA, dl, VT, - DAG.getNode(ISD::FNEG, dl, VT, N00), N01, - DAG.getNode(ISD::FNEG, dl, VT, N1)); + if (!TLI.isOperationLegal(ISD::FMAD, VT)) { + // Don't form FMA if we are preferring FMAD. + + if (SDValue Fused + = performFsubFmulCombines(ISD::FMA, + TLI.enableAggressiveFMAFusion(VT), + N, TLI, DAG)) { + return Fused; + } } // When FP_EXTEND nodes are free on the target, and there is an opportunity // to combine into FMA, arrange such nodes accordingly. if (TLI.isFPExtFree(VT)) { - // fold (fsub (fpext (fmul x, y)), z) // -> (fma (fpext x), (fpext y), (fneg z)) if (N0.getOpcode() == ISD::FP_EXTEND) { @@ -7308,38 +7404,6 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { } } } - - // More folding opportunities when target permits. - if (TLI.enableAggressiveFMAFusion(VT)) { - - // fold (fsub (fma x, y, (fmul u, v)), z) - // -> (fma x, y (fma u, v, (fneg z))) - if (N0.getOpcode() == ISD::FMA && - N0.getOperand(2).getOpcode() == ISD::FMUL) - return DAG.getNode(ISD::FMA, SDLoc(N), VT, - N0.getOperand(0), N0.getOperand(1), - DAG.getNode(ISD::FMA, SDLoc(N), VT, - N0.getOperand(2).getOperand(0), - N0.getOperand(2).getOperand(1), - DAG.getNode(ISD::FNEG, SDLoc(N), VT, - N1))); - - // fold (fsub x, (fma y, z, (fmul u, v))) - // -> (fma (fneg y), z, (fma (fneg u), v, x)) - if (N1.getOpcode() == ISD::FMA && - N1.getOperand(2).getOpcode() == ISD::FMUL) { - SDValue N20 = N1.getOperand(2).getOperand(0); - SDValue N21 = N1.getOperand(2).getOperand(1); - return DAG.getNode(ISD::FMA, SDLoc(N), VT, - DAG.getNode(ISD::FNEG, SDLoc(N), VT, - N1.getOperand(0)), - N1.getOperand(1), - DAG.getNode(ISD::FMA, SDLoc(N), VT, - DAG.getNode(ISD::FNEG, SDLoc(N), VT, - N20), - N21, N0)); - } - } } return SDValue(); @@ -7753,11 +7817,11 @@ SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) { SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) { SDValue N0 = N->getOperand(0); - ConstantSDNode *N0C = dyn_cast(N0); EVT VT = N->getValueType(0); EVT OpVT = N0.getValueType(); // fold (sint_to_fp c1) -> c1fp + ConstantSDNode *N0C = dyn_cast(N0); if (N0C && // ...but only if the target supports immediate floating-point values (!LegalOperations || @@ -7806,11 +7870,11 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) { SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) { SDValue N0 = N->getOperand(0); - ConstantSDNode *N0C = dyn_cast(N0); EVT VT = N->getValueType(0); EVT OpVT = N0.getValueType(); // fold (uint_to_fp c1) -> c1fp + ConstantSDNode *N0C = dyn_cast(N0); if (N0C && // ...but only if the target supports immediate floating-point values (!LegalOperations || @@ -9885,11 +9949,11 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts( // Make sure we have something to merge. if (NumElem < 2) return false; - + int64_t ElementSizeBytes = MemVT.getSizeInBits() / 8; LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; unsigned EarliestNodeUsed = 0; - + for (unsigned i=0; i < NumElem; ++i) { // Find a chain for the new wide-store operand. Notice that some // of the store nodes that we found may not be selected for inclusion @@ -9898,11 +9962,11 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts( if (StoreNodes[i].SequenceNum > StoreNodes[EarliestNodeUsed].SequenceNum) EarliestNodeUsed = i; } - + // The earliest Node in the DAG. LSBaseSDNode *EarliestOp = StoreNodes[EarliestNodeUsed].MemNode; SDLoc DL(StoreNodes[0].MemNode); - + SDValue StoredVal; if (UseVector) { // Find a legal type for the vector store. @@ -9925,7 +9989,7 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts( return false; Ops.push_back(Val); } - + // Build the extracted vector elements back into a vector. StoredVal = DAG.getNode(ISD::BUILD_VECTOR, DL, Ty, Ops); } @@ -9936,7 +10000,7 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts( unsigned StoreBW = NumElem * ElementSizeBytes * 8; APInt StoreInt(StoreBW, 0); - + // Construct a single integer constant which is made of the smaller // constant inputs. bool IsLE = TLI.isLittleEndian(); @@ -9953,18 +10017,18 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts( llvm_unreachable("Invalid constant element type"); } } - + // Create the new Load and Store operations. EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW); StoredVal = DAG.getConstant(StoreInt, StoreTy); } - + SDValue NewStore = DAG.getStore(EarliestOp->getChain(), DL, StoredVal, FirstInChain->getBasePtr(), FirstInChain->getPointerInfo(), false, false, FirstInChain->getAlignment()); - + // Replace the first store with the new store CombineTo(EarliestOp, NewStore); // Erase all other stores. @@ -9986,7 +10050,7 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts( DAG.ReplaceAllUsesWith(SDValue(St, 0), St->getChain()); deleteAndRecombine(St); } - + return true; } @@ -10007,7 +10071,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { bool IsConstantSrc = isa(StoredVal) || isa(StoredVal); bool IsExtractVecEltSrc = (StoredVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT); - + if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecEltSrc) return false; @@ -10217,7 +10281,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { // consecutive loads). if (StoredVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT) return false; - + // Find a legal type for the vector store. EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1); if (TLI.isTypeLegal(Ty)) @@ -11130,6 +11194,11 @@ SDValue DAGCombiner::reduceBuildVecConvertToConvertBuildVec(SDNode *N) { if (!TLI.isOperationLegalOrCustom(Opcode, NVT)) return SDValue(); + // Just because the floating-point vector type is legal does not necessarily + // mean that the corresponding integer vector type is. + if (!isTypeLegal(NVT)) + return SDValue(); + SmallVector Opnds; for (unsigned i = 0; i != NumInScalars; ++i) { SDValue In = N->getOperand(i); @@ -11285,10 +11354,10 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { if (!TLI.isExtractSubvectorCheap(VT, VT.getVectorNumElements())) return SDValue(); - + // Try to replace VecIn1 with two extract_subvectors // No need to update the masks, they should still be correct. - VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, VecIn1, + VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, VecIn1, DAG.getConstant(VT.getVectorNumElements(), TLI.getVectorIdxTy())); VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, VecIn1, DAG.getConstant(0, TLI.getVectorIdxTy())); @@ -11361,36 +11430,56 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { } } + // Fold any combination of BUILD_VECTOR or UNDEF nodes into one BUILD_VECTOR. + // We have already tested above for an UNDEF only concatenation. // fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...)) // -> (BUILD_VECTOR A, B, ..., C, D, ...) - if (N->getNumOperands() == 2 && - N->getOperand(0).getOpcode() == ISD::BUILD_VECTOR && - N->getOperand(1).getOpcode() == ISD::BUILD_VECTOR) { - EVT VT = N->getValueType(0); - SDValue N0 = N->getOperand(0); - SDValue N1 = N->getOperand(1); + auto IsBuildVectorOrUndef = [](const SDValue &Op) { + return ISD::UNDEF == Op.getOpcode() || ISD::BUILD_VECTOR == Op.getOpcode(); + }; + bool AllBuildVectorsOrUndefs = + std::all_of(N->op_begin(), N->op_end(), IsBuildVectorOrUndef); + if (AllBuildVectorsOrUndefs) { SmallVector Opnds; - unsigned BuildVecNumElts = N0.getNumOperands(); - - EVT SclTy0 = N0.getOperand(0)->getValueType(0); - EVT SclTy1 = N1.getOperand(0)->getValueType(0); - if (SclTy0.isFloatingPoint()) { - for (unsigned i = 0; i != BuildVecNumElts; ++i) - Opnds.push_back(N0.getOperand(i)); - for (unsigned i = 0; i != BuildVecNumElts; ++i) - Opnds.push_back(N1.getOperand(i)); - } else { + EVT SVT = VT.getScalarType(); + + EVT MinVT = SVT; + if (!SVT.isFloatingPoint()) { // If BUILD_VECTOR are from built from integer, they may have different - // operand types. Get the smaller type and truncate all operands to it. - EVT MinTy = SclTy0.bitsLE(SclTy1) ? SclTy0 : SclTy1; - for (unsigned i = 0; i != BuildVecNumElts; ++i) - Opnds.push_back(DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinTy, - N0.getOperand(i))); - for (unsigned i = 0; i != BuildVecNumElts; ++i) - Opnds.push_back(DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinTy, - N1.getOperand(i))); + // operand types. Get the smallest type and truncate all operands to it. + bool FoundMinVT = false; + for (const SDValue &Op : N->ops()) + if (ISD::BUILD_VECTOR == Op.getOpcode()) { + EVT OpSVT = Op.getOperand(0)->getValueType(0); + MinVT = (!FoundMinVT || OpSVT.bitsLE(MinVT)) ? OpSVT : MinVT; + FoundMinVT = true; + } + assert(FoundMinVT && "Concat vector type mismatch"); } + for (const SDValue &Op : N->ops()) { + EVT OpVT = Op.getValueType(); + unsigned NumElts = OpVT.getVectorNumElements(); + + if (ISD::UNDEF == Op.getOpcode()) + for (unsigned i = 0; i != NumElts; ++i) + Opnds.push_back(DAG.getUNDEF(MinVT)); + + if (ISD::BUILD_VECTOR == Op.getOpcode()) { + if (SVT.isFloatingPoint()) { + assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch"); + for (unsigned i = 0; i != NumElts; ++i) + Opnds.push_back(Op.getOperand(i)); + } else { + for (unsigned i = 0; i != NumElts; ++i) + Opnds.push_back( + DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinVT, Op.getOperand(i))); + } + } + } + + assert(VT.getVectorNumElements() == Opnds.size() && + "Concat vector type mismatch"); return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Opnds); } @@ -11749,20 +11838,17 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { if (AllSame) return N0; - // If the splatted element is a constant, just build the vector out of - // constants directly. + // Canonicalize any other splat as a build_vector. const SDValue &Splatted = V->getOperand(SVN->getSplatIndex()); - if (isa(Splatted) || isa(Splatted)) { - SmallVector Ops(NumElts, Splatted); - SDValue NewBV = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), - V->getValueType(0), Ops); - - // We may have jumped through bitcasts, so the type of the - // BUILD_VECTOR may not match the type of the shuffle. - if (V->getValueType(0) != VT) - NewBV = DAG.getNode(ISD::BITCAST, SDLoc(N), VT, NewBV); - return NewBV; - } + SmallVector Ops(NumElts, Splatted); + SDValue NewBV = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), + V->getValueType(0), Ops); + + // We may have jumped through bitcasts, so the type of the + // BUILD_VECTOR may not match the type of the shuffle. + if (V->getValueType(0) != VT) + NewBV = DAG.getNode(ISD::BITCAST, SDLoc(N), VT, NewBV); + return NewBV; } } @@ -11905,7 +11991,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { if (!TLI.isShuffleMaskLegal(Mask, VT)) return SDValue(); - + // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2) // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2) // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2) @@ -11976,9 +12062,11 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) { return SDValue(); } - // Let's see if the target supports this vector_shuffle. + // Let's see if the target supports this vector_shuffle and make sure + // we're not running after operation legalization where it may have + // custom lowered the vector shuffles. EVT RVT = RHS.getValueType(); - if (!TLI.isVectorClearMaskLegal(Indices, RVT)) + if (LegalOperations || !TLI.isVectorClearMaskLegal(Indices, RVT)) return SDValue(); // Return the new VECTOR_SHUFFLE node.