X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FCodeGen%2FSelectionDAG%2FDAGCombiner.cpp;h=673c46102db488da76bfc5e72df968bc5b0be4c5;hb=dc4dccabf314946c8c1ff035bebc9015937d80b5;hp=f8f52339e8ca87725f0ce1c58d889222f5ba5116;hpb=ef67194fd21d6ec7767fdbe2c7774533f962f976;p=oota-llvm.git diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index f8f52339e8c..673c46102db 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -255,6 +255,7 @@ namespace { SDValue visitSRA(SDNode *N); SDValue visitSRL(SDNode *N); SDValue visitRotate(SDNode *N); + SDValue visitBSWAP(SDNode *N); SDValue visitCTLZ(SDNode *N); SDValue visitCTLZ_ZERO_UNDEF(SDNode *N); SDValue visitCTTZ(SDNode *N); @@ -268,6 +269,7 @@ namespace { SDValue visitZERO_EXTEND(SDNode *N); SDValue visitANY_EXTEND(SDNode *N); SDValue visitSIGN_EXTEND_INREG(SDNode *N); + SDValue visitSIGN_EXTEND_VECTOR_INREG(SDNode *N); SDValue visitTRUNCATE(SDNode *N); SDValue visitBITCAST(SDNode *N); SDValue visitBUILD_PAIR(SDNode *N); @@ -307,6 +309,9 @@ namespace { SDValue visitINSERT_SUBVECTOR(SDNode *N); SDValue visitMLOAD(SDNode *N); SDValue visitMSTORE(SDNode *N); + SDValue visitMGATHER(SDNode *N); + SDValue visitMSCATTER(SDNode *N); + SDValue visitFP_TO_FP16(SDNode *N); SDValue visitFADDForFMACombine(SDNode *N); SDValue visitFSUBForFMACombine(SDNode *N); @@ -383,6 +388,13 @@ namespace { unsigned SequenceNum; }; + /// This is a helper function for MergeStoresOfConstantsOrVecElts. Returns a + /// constant build_vector of the stored constant values in Stores. + SDValue getMergedConstantVectorStore(SelectionDAG &DAG, + SDLoc SL, + ArrayRef Stores, + EVT Ty) const; + /// This is a helper function for MergeConsecutiveStores. When the source /// elements of the consecutive stores are all constants or all extracted /// vector elements, try to merge them into one larger store. @@ -391,6 +403,13 @@ namespace { EVT MemVT, unsigned NumElem, bool IsConstantSrc, bool UseVector); + /// This is a helper function for MergeConsecutiveStores. + /// Stores that may be merged are placed in StoreNodes. + /// Loads that may alias with those stores are placed in AliasLoadNodes. + void getStoreMergeAndAliasCandidates( + StoreSDNode* St, SmallVectorImpl &StoreNodes, + SmallVectorImpl &AliasLoadNodes); + /// Merge consecutive store operations into a wide store. /// This optimization uses wide integers or vectors when possible. /// \return True if some memory operations were changed. @@ -424,8 +443,8 @@ namespace { assert(LHSTy.isInteger() && "Shift amount is not an integer type!"); if (LHSTy.isVector()) return LHSTy; - return LegalTypes ? TLI.getScalarShiftAmountTy(LHSTy) - : TLI.getPointerTy(); + auto &DL = DAG.getDataLayout(); + return LegalTypes ? TLI.getScalarShiftAmountTy(DL) : TLI.getPointerTy(DL); } /// This method returns true if we are running before type legalization or @@ -437,7 +456,7 @@ namespace { /// Convenience wrapper around TargetLowering::getSetCCResultType EVT getSetCCResultType(EVT VT) const { - return TLI.getSetCCResultType(*DAG.getContext(), VT); + return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); } }; } @@ -591,7 +610,7 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG, case ISD::ConstantFP: { APFloat V = cast(Op)->getValueAPF(); V.changeSign(); - return DAG.getConstantFP(V, Op.getValueType()); + return DAG.getConstantFP(V, SDLoc(Op), Op.getValueType()); } case ISD::FADD: // FIXME: determine better conditions for this xform. @@ -615,7 +634,7 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG, // fold (fneg (fsub 0, B)) -> B if (ConstantFPSDNode *N0CFP = dyn_cast(Op.getOperand(0))) - if (N0CFP->getValueAPF().isZero()) + if (N0CFP->isZero()) return Op.getOperand(1); // fold (fneg (fsub A, B)) -> (fsub B, A) @@ -773,7 +792,7 @@ SDValue DAGCombiner::ReassociateOps(unsigned Opc, SDLoc DL, if (SDNode *L = isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) { if (SDNode *R = isConstantIntBuildVectorOrConstantInt(N1)) { // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2)) - if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, VT, L, R)) + if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, L, R)) return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode); return SDValue(); } @@ -793,7 +812,7 @@ SDValue DAGCombiner::ReassociateOps(unsigned Opc, SDLoc DL, if (SDNode *R = isConstantIntBuildVectorOrConstantInt(N1.getOperand(1))) { if (SDNode *L = isConstantIntBuildVectorOrConstantInt(N0)) { // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2)) - if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, VT, R, L)) + if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, R, L)) return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode); return SDValue(); } @@ -1173,8 +1192,8 @@ bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) { continue; if (N->use_empty()) { - for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) - Nodes.insert(N->getOperand(i).getNode()); + for (const SDValue &ChildN : N->op_values()) + Nodes.insert(ChildN.getNode()); removeFromWorklist(N); DAG.DeleteNode(N); @@ -1247,9 +1266,9 @@ void DAGCombiner::Run(CombineLevel AtLevel) { // worklist as well. Because the worklist uniques things already, this // won't repeatedly process the same operand. CombinedNodes.insert(N); - for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) - if (!CombinedNodes.count(N->getOperand(i).getNode())) - AddToWorklist(N->getOperand(i).getNode()); + for (const SDValue &ChildN : N->op_values()) + if (!CombinedNodes.count(ChildN.getNode())) + AddToWorklist(ChildN.getNode()); SDValue RV = combine(N); @@ -1331,6 +1350,7 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::SRL: return visitSRL(N); case ISD::ROTR: case ISD::ROTL: return visitRotate(N); + case ISD::BSWAP: return visitBSWAP(N); case ISD::CTLZ: return visitCTLZ(N); case ISD::CTLZ_ZERO_UNDEF: return visitCTLZ_ZERO_UNDEF(N); case ISD::CTTZ: return visitCTTZ(N); @@ -1344,6 +1364,7 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N); case ISD::ANY_EXTEND: return visitANY_EXTEND(N); case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N); + case ISD::SIGN_EXTEND_VECTOR_INREG: return visitSIGN_EXTEND_VECTOR_INREG(N); case ISD::TRUNCATE: return visitTRUNCATE(N); case ISD::BITCAST: return visitBITCAST(N); case ISD::BUILD_PAIR: return visitBUILD_PAIR(N); @@ -1381,8 +1402,11 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N); case ISD::SCALAR_TO_VECTOR: return visitSCALAR_TO_VECTOR(N); case ISD::INSERT_SUBVECTOR: return visitINSERT_SUBVECTOR(N); + case ISD::MGATHER: return visitMGATHER(N); case ISD::MLOAD: return visitMLOAD(N); + case ISD::MSCATTER: return visitMSCATTER(N); case ISD::MSTORE: return visitMSTORE(N); + case ISD::FP_TO_FP16: return visitFP_TO_FP16(N); } return SDValue(); } @@ -1446,11 +1470,9 @@ SDValue DAGCombiner::combine(SDNode *N) { if (isa(N0) || !isa(N1)) { SDValue Ops[] = {N1, N0}; SDNode *CSENode; - if (const BinaryWithFlagsSDNode *BinNode = - dyn_cast(N)) { - CSENode = DAG.getNodeIfExists( - N->getOpcode(), N->getVTList(), Ops, BinNode->hasNoUnsignedWrap(), - BinNode->hasNoSignedWrap(), BinNode->isExact()); + if (const auto *BinNode = dyn_cast(N)) { + CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops, + &BinNode->Flags); } else { CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops); } @@ -1501,8 +1523,7 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) { SDNode *TF = TFs[i]; // Check each of the operands. - for (unsigned i = 0, ie = TF->getNumOperands(); i != ie; ++i) { - SDValue Op = TF->getOperand(i); + for (const SDValue &Op : TF->op_values()) { switch (Op.getOpcode()) { case ISD::EntryToken: @@ -1573,6 +1594,33 @@ SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) { return SDValue(N, 0); // Return N so it doesn't get rechecked! } +static bool isNullConstant(SDValue V) { + ConstantSDNode *Const = dyn_cast(V); + return Const != nullptr && Const->isNullValue(); +} + +static bool isNullFPConstant(SDValue V) { + ConstantFPSDNode *Const = dyn_cast(V); + return Const != nullptr && Const->isZero() && !Const->isNegative(); +} + +static bool isAllOnesConstant(SDValue V) { + ConstantSDNode *Const = dyn_cast(V); + return Const != nullptr && Const->isAllOnesValue(); +} + +static bool isOneConstant(SDValue V) { + ConstantSDNode *Const = dyn_cast(V); + return Const != nullptr && Const->isOne(); +} + +/// If \p N is a ContantSDNode with isOpaque() == false return it casted to a +/// ContantSDNode pointer else nullptr. +static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) { + ConstantSDNode *Const = dyn_cast(N); + return Const != nullptr && !Const->isOpaque() ? Const : nullptr; +} + SDValue DAGCombiner::visitADD(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -1596,16 +1644,16 @@ SDValue DAGCombiner::visitADD(SDNode *N) { if (N1.getOpcode() == ISD::UNDEF) return N1; // fold (add c1, c2) -> c1+c2 - ConstantSDNode *N0C = dyn_cast(N0); - ConstantSDNode *N1C = dyn_cast(N1); + ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); + ConstantSDNode *N1C = getAsNonOpaqueConstant(N1); if (N0C && N1C) - return DAG.FoldConstantArithmetic(ISD::ADD, VT, N0C, N1C); + return DAG.FoldConstantArithmetic(ISD::ADD, SDLoc(N), VT, N0C, N1C); // canonicalize constant to RHS if (isConstantIntBuildVectorOrConstantInt(N0) && !isConstantIntBuildVectorOrConstantInt(N1)) return DAG.getNode(ISD::ADD, SDLoc(N), VT, N1, N0); // fold (add x, 0) -> x - if (N1C && N1C->isNullValue()) + if (isNullConstant(N1)) return N0; // fold (add Sym, c) -> Sym+c if (GlobalAddressSDNode *GA = dyn_cast(N0)) @@ -1616,21 +1664,21 @@ SDValue DAGCombiner::visitADD(SDNode *N) { (uint64_t)N1C->getSExtValue()); // fold ((c1-A)+c2) -> (c1+c2)-A if (N1C && N0.getOpcode() == ISD::SUB) - if (ConstantSDNode *N0C = dyn_cast(N0.getOperand(0))) - return DAG.getNode(ISD::SUB, SDLoc(N), VT, + if (ConstantSDNode *N0C = getAsNonOpaqueConstant(N0.getOperand(0))) { + SDLoc DL(N); + return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(N1C->getAPIntValue()+ - N0C->getAPIntValue(), VT), + N0C->getAPIntValue(), DL, VT), N0.getOperand(1)); + } // reassociate add if (SDValue RADD = ReassociateOps(ISD::ADD, SDLoc(N), N0, N1)) return RADD; // fold ((0-A) + B) -> B-A - if (N0.getOpcode() == ISD::SUB && isa(N0.getOperand(0)) && - cast(N0.getOperand(0))->isNullValue()) + if (N0.getOpcode() == ISD::SUB && isNullConstant(N0.getOperand(0))) return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1, N0.getOperand(1)); // fold (A + (0-B)) -> A-B - if (N1.getOpcode() == ISD::SUB && isa(N1.getOperand(0)) && - cast(N1.getOperand(0))->isNullValue()) + if (N1.getOpcode() == ISD::SUB && isNullConstant(N1.getOperand(0))) return DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, N1.getOperand(1)); // fold (A+(B-A)) -> B if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1)) @@ -1690,34 +1738,27 @@ SDValue DAGCombiner::visitADD(SDNode *N) { } // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n)) - if (N1.getOpcode() == ISD::SHL && - N1.getOperand(0).getOpcode() == ISD::SUB) - if (ConstantSDNode *C = - dyn_cast(N1.getOperand(0).getOperand(0))) - if (C->getAPIntValue() == 0) - return DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, - DAG.getNode(ISD::SHL, SDLoc(N), VT, - N1.getOperand(0).getOperand(1), - N1.getOperand(1))); - if (N0.getOpcode() == ISD::SHL && - N0.getOperand(0).getOpcode() == ISD::SUB) - if (ConstantSDNode *C = - dyn_cast(N0.getOperand(0).getOperand(0))) - if (C->getAPIntValue() == 0) - return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1, - DAG.getNode(ISD::SHL, SDLoc(N), VT, - N0.getOperand(0).getOperand(1), - N0.getOperand(1))); + if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB && + isNullConstant(N1.getOperand(0).getOperand(0))) + return DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, + DAG.getNode(ISD::SHL, SDLoc(N), VT, + N1.getOperand(0).getOperand(1), + N1.getOperand(1))); + if (N0.getOpcode() == ISD::SHL && N0.getOperand(0).getOpcode() == ISD::SUB && + isNullConstant(N0.getOperand(0).getOperand(0))) + return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1, + DAG.getNode(ISD::SHL, SDLoc(N), VT, + N0.getOperand(0).getOperand(1), + N0.getOperand(1))); if (N1.getOpcode() == ISD::AND) { SDValue AndOp0 = N1.getOperand(0); - ConstantSDNode *AndOp1 = dyn_cast(N1->getOperand(1)); unsigned NumSignBits = DAG.ComputeNumSignBits(AndOp0); unsigned DestBits = VT.getScalarType().getSizeInBits(); // (add z, (and (sbbl x, x), 1)) -> (sub z, (sbbl x, x)) // and similar xforms where the inner op is either ~0 or 0. - if (NumSignBits == DestBits && AndOp1 && AndOp1->isOne()) { + if (NumSignBits == DestBits && isOneConstant(N1->getOperand(1))) { SDLoc DL(N); return DAG.getNode(ISD::SUB, DL, VT, N->getOperand(0), AndOp0); } @@ -1738,7 +1779,7 @@ SDValue DAGCombiner::visitADD(SDNode *N) { if (TN->getVT() == MVT::i1) { SDLoc DL(N); SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0), - DAG.getConstant(1, VT)); + DAG.getConstant(1, DL, VT)); return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt); } } @@ -1764,7 +1805,7 @@ SDValue DAGCombiner::visitADDC(SDNode *N) { return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N1, N0); // fold (addc x, 0) -> x + no carry out - if (N1C && N1C->isNullValue()) + if (isNullConstant(N1)) return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, SDLoc(N), MVT::Glue)); @@ -1812,9 +1853,9 @@ static SDValue tryFoldToZero(SDLoc DL, const TargetLowering &TLI, EVT VT, SelectionDAG &DAG, bool LegalOperations, bool LegalTypes) { if (!VT.isVector()) - return DAG.getConstant(0, VT); + return DAG.getConstant(0, DL, VT); if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) - return DAG.getConstant(0, VT); + return DAG.getConstant(0, DL, VT); return SDValue(); } @@ -1838,16 +1879,18 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { if (N0 == N1) return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes); // fold (sub c1, c2) -> c1-c2 - ConstantSDNode *N0C = dyn_cast(N0.getNode()); - ConstantSDNode *N1C = dyn_cast(N1.getNode()); + ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); + ConstantSDNode *N1C = getAsNonOpaqueConstant(N1); if (N0C && N1C) - return DAG.FoldConstantArithmetic(ISD::SUB, VT, N0C, N1C); + return DAG.FoldConstantArithmetic(ISD::SUB, SDLoc(N), VT, N0C, N1C); // fold (sub x, c) -> (add x, -c) - if (N1C) - return DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, - DAG.getConstant(-N1C->getAPIntValue(), VT)); + if (N1C) { + SDLoc DL(N); + return DAG.getNode(ISD::ADD, DL, VT, N0, + DAG.getConstant(-N1C->getAPIntValue(), DL, VT)); + } // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) - if (N0C && N0C->isAllOnesValue()) + if (isAllOnesConstant(N0)) return DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0); // fold A-(A-B) -> B if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0)) @@ -1862,9 +1905,10 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { ConstantSDNode *N1C1 = N1.getOpcode() != ISD::ADD ? nullptr : dyn_cast(N1.getOperand(1).getNode()); if (N1.getOpcode() == ISD::ADD && N0C && N1C1) { + SDLoc DL(N); SDValue NewC = DAG.getConstant(N0C->getAPIntValue() - N1C1->getAPIntValue(), - VT); - return DAG.getNode(ISD::SUB, SDLoc(N), VT, NewC, + DL, VT); + return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0)); } // fold ((A+(B+or-C))-B) -> A+or-C @@ -1905,7 +1949,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { if (GlobalAddressSDNode *GB = dyn_cast(N1)) if (GA->getGlobal() == GB->getGlobal()) return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(), - VT); + SDLoc(N), VT); } // sub X, (sextinreg Y i1) -> add X, (and Y 1) @@ -1914,7 +1958,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { if (TN->getVT() == MVT::i1) { SDLoc DL(N); SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0), - DAG.getConstant(1, VT)); + DAG.getConstant(1, DL, VT)); return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt); } } @@ -1934,20 +1978,20 @@ SDValue DAGCombiner::visitSUBC(SDNode *N) { MVT::Glue)); // fold (subc x, x) -> 0 + no borrow - if (N0 == N1) - return CombineTo(N, DAG.getConstant(0, VT), - DAG.getNode(ISD::CARRY_FALSE, SDLoc(N), + if (N0 == N1) { + SDLoc DL(N); + return CombineTo(N, DAG.getConstant(0, DL, VT), + DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue)); + } // fold (subc x, 0) -> x + no borrow - ConstantSDNode *N0C = dyn_cast(N0); - ConstantSDNode *N1C = dyn_cast(N1); - if (N1C && N1C->isNullValue()) + if (isNullConstant(N1)) return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, SDLoc(N), MVT::Glue)); // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow - if (N0C && N0C->isAllOnesValue()) + if (isAllOnesConstant(N0)) return CombineTo(N, DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0), DAG.getNode(ISD::CARRY_FALSE, SDLoc(N), MVT::Glue)); @@ -1974,10 +2018,12 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { // fold (mul x, undef) -> 0 if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) - return DAG.getConstant(0, VT); + return DAG.getConstant(0, SDLoc(N), VT); bool N0IsConst = false; bool N1IsConst = false; + bool N1IsOpaqueConst = false; + bool N0IsOpaqueConst = false; APInt ConstValue0, ConstValue1; // fold vector ops if (VT.isVector()) { @@ -1987,17 +2033,22 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { N0IsConst = isConstantSplatVector(N0.getNode(), ConstValue0); N1IsConst = isConstantSplatVector(N1.getNode(), ConstValue1); } else { - N0IsConst = dyn_cast(N0) != nullptr; - ConstValue0 = N0IsConst ? (dyn_cast(N0))->getAPIntValue() - : APInt(); - N1IsConst = dyn_cast(N1) != nullptr; - ConstValue1 = N1IsConst ? (dyn_cast(N1))->getAPIntValue() - : APInt(); + N0IsConst = isa(N0); + if (N0IsConst) { + ConstValue0 = cast(N0)->getAPIntValue(); + N0IsOpaqueConst = cast(N0)->isOpaque(); + } + N1IsConst = isa(N1); + if (N1IsConst) { + ConstValue1 = cast(N1)->getAPIntValue(); + N1IsOpaqueConst = cast(N1)->isOpaque(); + } } // fold (mul c1, c2) -> c1*c2 - if (N0IsConst && N1IsConst) - return DAG.FoldConstantArithmetic(ISD::MUL, VT, N0.getNode(), N1.getNode()); + if (N0IsConst && N1IsConst && !N0IsOpaqueConst && !N1IsOpaqueConst) + return DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT, + N0.getNode(), N1.getNode()); // canonicalize constant to RHS (vector doesn't have to splat) if (isConstantIntBuildVectorOrConstantInt(N0) && @@ -2014,23 +2065,30 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { if (N1IsConst && ConstValue1 == 1 && IsFullSplat) return N0; // fold (mul x, -1) -> 0-x - if (N1IsConst && ConstValue1.isAllOnesValue()) - return DAG.getNode(ISD::SUB, SDLoc(N), VT, - DAG.getConstant(0, VT), N0); + if (N1IsConst && ConstValue1.isAllOnesValue()) { + SDLoc DL(N); + return DAG.getNode(ISD::SUB, DL, VT, + DAG.getConstant(0, DL, VT), N0); + } // fold (mul x, (1 << c)) -> x << c - if (N1IsConst && ConstValue1.isPowerOf2() && IsFullSplat) - return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, - DAG.getConstant(ConstValue1.logBase2(), + if (N1IsConst && !N1IsOpaqueConst && ConstValue1.isPowerOf2() && + IsFullSplat) { + SDLoc DL(N); + return DAG.getNode(ISD::SHL, DL, VT, N0, + DAG.getConstant(ConstValue1.logBase2(), DL, getShiftAmountTy(N0.getValueType()))); + } // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c - if (N1IsConst && (-ConstValue1).isPowerOf2() && IsFullSplat) { + if (N1IsConst && !N1IsOpaqueConst && (-ConstValue1).isPowerOf2() && + IsFullSplat) { unsigned Log2Val = (-ConstValue1).logBase2(); + SDLoc DL(N); // FIXME: If the input is something that is easily negated (e.g. a // single-use add), we should put the negate there. - return DAG.getNode(ISD::SUB, SDLoc(N), VT, - DAG.getConstant(0, VT), - DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, - DAG.getConstant(Log2Val, + return DAG.getNode(ISD::SUB, DL, VT, + DAG.getConstant(0, DL, VT), + DAG.getNode(ISD::SHL, DL, VT, N0, + DAG.getConstant(Log2Val, DL, getShiftAmountTy(N0.getValueType())))); } @@ -2100,15 +2158,17 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { // fold (sdiv c1, c2) -> c1/c2 ConstantSDNode *N0C = isConstOrConstSplat(N0); ConstantSDNode *N1C = isConstOrConstSplat(N1); - if (N0C && N1C && !N1C->isNullValue()) - return DAG.FoldConstantArithmetic(ISD::SDIV, VT, N0C, N1C); + if (N0C && N1C && !N0C->isOpaque() && !N1C->isOpaque()) + return DAG.FoldConstantArithmetic(ISD::SDIV, SDLoc(N), VT, N0C, N1C); // fold (sdiv X, 1) -> X - if (N1C && N1C->getAPIntValue() == 1LL) + if (N1C && N1C->isOne()) return N0; // fold (sdiv X, -1) -> 0-X - if (N1C && N1C->isAllOnesValue()) - return DAG.getNode(ISD::SUB, SDLoc(N), VT, - DAG.getConstant(0, VT), N0); + if (N1C && N1C->isAllOnesValue()) { + SDLoc DL(N); + return DAG.getNode(ISD::SUB, DL, VT, + DAG.getConstant(0, DL, VT), N0); + } // If we know the sign bits of both operands are zero, strength reduce to a // udiv instead. Handles (X&15) /s 4 -> X&15 >> 2 if (!VT.isVector()) { @@ -2118,8 +2178,13 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { } // fold (sdiv X, pow2) -> simple ops after legalize - if (N1C && !N1C->isNullValue() && (N1C->getAPIntValue().isPowerOf2() || - (-N1C->getAPIntValue()).isPowerOf2())) { + // FIXME: We check for the exact bit here because the generic lowering gives + // better results in that case. The target-specific lowering should learn how + // to handle exact sdivs efficiently. + if (N1C && !N1C->isNullValue() && !N1C->isOpaque() && + !cast(N)->Flags.hasExact() && + (N1C->getAPIntValue().isPowerOf2() || + (-N1C->getAPIntValue()).isPowerOf2())) { // If dividing by powers of two is cheap, then don't perform the following // fold. if (TLI.isPow2SDivCheap()) @@ -2131,24 +2196,26 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { return Res; unsigned lg2 = N1C->getAPIntValue().countTrailingZeros(); + SDLoc DL(N); // Splat the sign bit into the register SDValue SGN = - DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, - DAG.getConstant(VT.getScalarSizeInBits() - 1, + DAG.getNode(ISD::SRA, DL, VT, N0, + DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, getShiftAmountTy(N0.getValueType()))); AddToWorklist(SGN.getNode()); // Add (N0 < 0) ? abs2 - 1 : 0; SDValue SRL = - DAG.getNode(ISD::SRL, SDLoc(N), VT, SGN, - DAG.getConstant(VT.getScalarSizeInBits() - lg2, + DAG.getNode(ISD::SRL, DL, VT, SGN, + DAG.getConstant(VT.getScalarSizeInBits() - lg2, DL, getShiftAmountTy(SGN.getValueType()))); - SDValue ADD = DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, SRL); + SDValue ADD = DAG.getNode(ISD::ADD, DL, VT, N0, SRL); AddToWorklist(SRL.getNode()); AddToWorklist(ADD.getNode()); // Divide by pow2 - SDValue SRA = DAG.getNode(ISD::SRA, SDLoc(N), VT, ADD, - DAG.getConstant(lg2, getShiftAmountTy(ADD.getValueType()))); + SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, ADD, + DAG.getConstant(lg2, DL, + getShiftAmountTy(ADD.getValueType()))); // If we're dividing by a positive value, we're done. Otherwise, we must // negate the result. @@ -2156,7 +2223,7 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { return SRA; AddToWorklist(SRA.getNode()); - return DAG.getNode(ISD::SUB, SDLoc(N), VT, DAG.getConstant(0, VT), SRA); + return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), SRA); } // If integer divide is expensive and we satisfy the requirements, emit an @@ -2168,7 +2235,7 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { // undef / X -> 0 if (N0.getOpcode() == ISD::UNDEF) - return DAG.getConstant(0, VT); + return DAG.getConstant(0, SDLoc(N), VT); // X / undef -> undef if (N1.getOpcode() == ISD::UNDEF) return N1; @@ -2189,25 +2256,30 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) { // fold (udiv c1, c2) -> c1/c2 ConstantSDNode *N0C = isConstOrConstSplat(N0); ConstantSDNode *N1C = isConstOrConstSplat(N1); - if (N0C && N1C && !N1C->isNullValue()) - return DAG.FoldConstantArithmetic(ISD::UDIV, VT, N0C, N1C); + if (N0C && N1C) + if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UDIV, SDLoc(N), VT, + N0C, N1C)) + return Folded; // fold (udiv x, (1 << c)) -> x >>u c - if (N1C && N1C->getAPIntValue().isPowerOf2()) - return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, - DAG.getConstant(N1C->getAPIntValue().logBase2(), + if (N1C && !N1C->isOpaque() && N1C->getAPIntValue().isPowerOf2()) { + SDLoc DL(N); + return DAG.getNode(ISD::SRL, DL, VT, N0, + DAG.getConstant(N1C->getAPIntValue().logBase2(), DL, getShiftAmountTy(N0.getValueType()))); + } // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2 if (N1.getOpcode() == ISD::SHL) { - if (ConstantSDNode *SHC = dyn_cast(N1.getOperand(0))) { + if (ConstantSDNode *SHC = getAsNonOpaqueConstant(N1.getOperand(0))) { if (SHC->getAPIntValue().isPowerOf2()) { EVT ADDVT = N1.getOperand(1).getValueType(); - SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N), ADDVT, + SDLoc DL(N); + SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), DAG.getConstant(SHC->getAPIntValue() .logBase2(), - ADDVT)); + DL, ADDVT)); AddToWorklist(Add.getNode()); - return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, Add); + return DAG.getNode(ISD::SRL, DL, VT, N0, Add); } } } @@ -2219,7 +2291,7 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) { // undef / X -> 0 if (N0.getOpcode() == ISD::UNDEF) - return DAG.getConstant(0, VT); + return DAG.getConstant(0, SDLoc(N), VT); // X / undef -> undef if (N1.getOpcode() == ISD::UNDEF) return N1; @@ -2235,8 +2307,10 @@ SDValue DAGCombiner::visitSREM(SDNode *N) { // fold (srem c1, c2) -> c1%c2 ConstantSDNode *N0C = isConstOrConstSplat(N0); ConstantSDNode *N1C = isConstOrConstSplat(N1); - if (N0C && N1C && !N1C->isNullValue()) - return DAG.FoldConstantArithmetic(ISD::SREM, VT, N0C, N1C); + if (N0C && N1C) + if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::SREM, SDLoc(N), VT, + N0C, N1C)) + return Folded; // If we know the sign bits of both operands are zero, strength reduce to a // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15 if (!VT.isVector()) { @@ -2261,7 +2335,7 @@ SDValue DAGCombiner::visitSREM(SDNode *N) { // undef % X -> 0 if (N0.getOpcode() == ISD::UNDEF) - return DAG.getConstant(0, VT); + return DAG.getConstant(0, SDLoc(N), VT); // X % undef -> undef if (N1.getOpcode() == ISD::UNDEF) return N1; @@ -2277,22 +2351,28 @@ SDValue DAGCombiner::visitUREM(SDNode *N) { // fold (urem c1, c2) -> c1%c2 ConstantSDNode *N0C = isConstOrConstSplat(N0); ConstantSDNode *N1C = isConstOrConstSplat(N1); - if (N0C && N1C && !N1C->isNullValue()) - return DAG.FoldConstantArithmetic(ISD::UREM, VT, N0C, N1C); + if (N0C && N1C) + if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UREM, SDLoc(N), VT, + N0C, N1C)) + return Folded; // fold (urem x, pow2) -> (and x, pow2-1) - if (N1C && !N1C->isNullValue() && N1C->getAPIntValue().isPowerOf2()) - return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, - DAG.getConstant(N1C->getAPIntValue()-1,VT)); + if (N1C && !N1C->isNullValue() && !N1C->isOpaque() && + N1C->getAPIntValue().isPowerOf2()) { + SDLoc DL(N); + return DAG.getNode(ISD::AND, DL, VT, N0, + DAG.getConstant(N1C->getAPIntValue() - 1, DL, VT)); + } // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1)) if (N1.getOpcode() == ISD::SHL) { - if (ConstantSDNode *SHC = dyn_cast(N1.getOperand(0))) { + if (ConstantSDNode *SHC = getAsNonOpaqueConstant(N1.getOperand(0))) { if (SHC->getAPIntValue().isPowerOf2()) { + SDLoc DL(N); SDValue Add = - DAG.getNode(ISD::ADD, SDLoc(N), VT, N1, - DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), + DAG.getNode(ISD::ADD, DL, VT, N1, + DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), DL, VT)); AddToWorklist(Add.getNode()); - return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, Add); + return DAG.getNode(ISD::AND, DL, VT, N0, Add); } } } @@ -2314,7 +2394,7 @@ SDValue DAGCombiner::visitUREM(SDNode *N) { // undef % X -> 0 if (N0.getOpcode() == ISD::UNDEF) - return DAG.getConstant(0, VT); + return DAG.getConstant(0, SDLoc(N), VT); // X % undef -> undef if (N1.getOpcode() == ISD::UNDEF) return N1; @@ -2325,21 +2405,23 @@ SDValue DAGCombiner::visitUREM(SDNode *N) { SDValue DAGCombiner::visitMULHS(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - ConstantSDNode *N1C = dyn_cast(N1); EVT VT = N->getValueType(0); SDLoc DL(N); // fold (mulhs x, 0) -> 0 - if (N1C && N1C->isNullValue()) + if (isNullConstant(N1)) return N1; // fold (mulhs x, 1) -> (sra x, size(x)-1) - if (N1C && N1C->getAPIntValue() == 1) - return DAG.getNode(ISD::SRA, SDLoc(N), N0.getValueType(), N0, + if (isOneConstant(N1)) { + SDLoc DL(N); + return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0, DAG.getConstant(N0.getValueType().getSizeInBits() - 1, + DL, getShiftAmountTy(N0.getValueType()))); + } // fold (mulhs x, undef) -> 0 if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) - return DAG.getConstant(0, VT); + return DAG.getConstant(0, SDLoc(N), VT); // If the type twice as wide is legal, transform the mulhs to a wider multiply // plus a shift. @@ -2352,7 +2434,8 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) { N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1); N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1); N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1, - DAG.getConstant(SimpleSize, getShiftAmountTy(N1.getValueType()))); + DAG.getConstant(SimpleSize, DL, + getShiftAmountTy(N1.getValueType()))); return DAG.getNode(ISD::TRUNCATE, DL, VT, N1); } } @@ -2363,19 +2446,18 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) { SDValue DAGCombiner::visitMULHU(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - ConstantSDNode *N1C = dyn_cast(N1); EVT VT = N->getValueType(0); SDLoc DL(N); // fold (mulhu x, 0) -> 0 - if (N1C && N1C->isNullValue()) + if (isNullConstant(N1)) return N1; // fold (mulhu x, 1) -> 0 - if (N1C && N1C->getAPIntValue() == 1) - return DAG.getConstant(0, N0.getValueType()); + if (isOneConstant(N1)) + return DAG.getConstant(0, DL, N0.getValueType()); // fold (mulhu x, undef) -> 0 if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) - return DAG.getConstant(0, VT); + return DAG.getConstant(0, DL, VT); // If the type twice as wide is legal, transform the mulhu to a wider multiply // plus a shift. @@ -2388,7 +2470,8 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) { N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1); N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1); N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1, - DAG.getConstant(SimpleSize, getShiftAmountTy(N1.getValueType()))); + DAG.getConstant(SimpleSize, DL, + getShiftAmountTy(N1.getValueType()))); return DAG.getNode(ISD::TRUNCATE, DL, VT, N1); } } @@ -2466,7 +2549,8 @@ SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) { Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi); // Compute the high part as N1. Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo, - DAG.getConstant(SimpleSize, getShiftAmountTy(Lo.getValueType()))); + DAG.getConstant(SimpleSize, DL, + getShiftAmountTy(Lo.getValueType()))); Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi); // Compute the low part as N0. Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo); @@ -2496,7 +2580,8 @@ SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) { Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi); // Compute the high part as N1. Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo, - DAG.getConstant(SimpleSize, getShiftAmountTy(Lo.getValueType()))); + DAG.getConstant(SimpleSize, DL, + getShiftAmountTy(Lo.getValueType()))); Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi); // Compute the low part as N0. Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo); @@ -2652,7 +2737,7 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { // build vector of all zeros that might be illegal at this stage. if (N->getOpcode() == ISD::XOR && ShOp.getOpcode() != ISD::UNDEF) { if (!LegalTypes) - ShOp = DAG.getConstant(0, VT); + ShOp = DAG.getConstant(0, SDLoc(N), VT); else ShOp = SDValue(); } @@ -2673,7 +2758,7 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { ShOp = N0->getOperand(0); if (N->getOpcode() == ISD::XOR && ShOp.getOpcode() != ISD::UNDEF) { if (!LegalTypes) - ShOp = DAG.getConstant(0, VT); + ShOp = DAG.getConstant(0, SDLoc(N), VT); else ShOp = SDValue(); } @@ -2704,7 +2789,7 @@ SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, // fold (and x, undef) -> 0 if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) - return DAG.getConstant(0, VT); + return DAG.getConstant(0, SDLoc(LocReference), VT); // fold (and (setcc x), (setcc y)) -> (setcc (and x, y)) SDValue LL, LR, RL, RR, CC0, CC1; if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){ @@ -2714,39 +2799,42 @@ SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, if (LR == RR && isa(LR) && Op0 == Op1 && LL.getValueType().isInteger()) { // fold (and (seteq X, 0), (seteq Y, 0)) -> (seteq (or X, Y), 0) - if (cast(LR)->isNullValue() && Op1 == ISD::SETEQ) { + if (isNullConstant(LR) && Op1 == ISD::SETEQ) { SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0), LR.getValueType(), LL, RL); AddToWorklist(ORNode.getNode()); return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1); } - // fold (and (seteq X, -1), (seteq Y, -1)) -> (seteq (and X, Y), -1) - if (cast(LR)->isAllOnesValue() && Op1 == ISD::SETEQ) { - SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(N0), - LR.getValueType(), LL, RL); - AddToWorklist(ANDNode.getNode()); - return DAG.getSetCC(SDLoc(LocReference), VT, ANDNode, LR, Op1); - } - // fold (and (setgt X, -1), (setgt Y, -1)) -> (setgt (or X, Y), -1) - if (cast(LR)->isAllOnesValue() && Op1 == ISD::SETGT) { - SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0), - LR.getValueType(), LL, RL); - AddToWorklist(ORNode.getNode()); - return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1); + if (isAllOnesConstant(LR)) { + // fold (and (seteq X, -1), (seteq Y, -1)) -> (seteq (and X, Y), -1) + if (Op1 == ISD::SETEQ) { + SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(N0), + LR.getValueType(), LL, RL); + AddToWorklist(ANDNode.getNode()); + return DAG.getSetCC(SDLoc(LocReference), VT, ANDNode, LR, Op1); + } + // fold (and (setgt X, -1), (setgt Y, -1)) -> (setgt (or X, Y), -1) + if (Op1 == ISD::SETGT) { + SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0), + LR.getValueType(), LL, RL); + AddToWorklist(ORNode.getNode()); + return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1); + } } } // Simplify (and (setne X, 0), (setne X, -1)) -> (setuge (add X, 1), 2) if (LL == RL && isa(LR) && isa(RR) && Op0 == Op1 && LL.getValueType().isInteger() && - Op0 == ISD::SETNE && ((cast(LR)->isNullValue() && - cast(RR)->isAllOnesValue()) || - (cast(LR)->isAllOnesValue() && - cast(RR)->isNullValue()))) { - SDValue ADDNode = DAG.getNode(ISD::ADD, SDLoc(N0), LL.getValueType(), - LL, DAG.getConstant(1, LL.getValueType())); + Op0 == ISD::SETNE && ((isNullConstant(LR) && isAllOnesConstant(RR)) || + (isAllOnesConstant(LR) && isNullConstant(RR)))) { + SDLoc DL(N0); + SDValue ADDNode = DAG.getNode(ISD::ADD, DL, LL.getValueType(), + LL, DAG.getConstant(1, DL, + LL.getValueType())); AddToWorklist(ADDNode.getNode()); return DAG.getSetCC(SDLoc(LocReference), VT, ADDNode, - DAG.getConstant(2, LL.getValueType()), ISD::SETUGE); + DAG.getConstant(2, DL, LL.getValueType()), + ISD::SETUGE); } // canonicalize equivalent to ll == rl if (LL == RR && LR == RL) { @@ -2781,9 +2869,10 @@ SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) { ADDC |= Mask; if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) { + SDLoc DL(N0); SDValue NewAdd = - DAG.getNode(ISD::ADD, SDLoc(N0), VT, - N0.getOperand(0), DAG.getConstant(ADDC, VT)); + DAG.getNode(ISD::ADD, DL, VT, + N0.getOperand(0), DAG.getConstant(ADDC, DL, VT)); CombineTo(N0.getNode(), NewAdd); // Return N so it doesn't get rechecked! return SDValue(LocReference, 0); @@ -2813,13 +2902,13 @@ SDValue DAGCombiner::visitAND(SDNode *N) { return DAG.getConstant( APInt::getNullValue( N0.getValueType().getScalarType().getSizeInBits()), - N0.getValueType()); + SDLoc(N), N0.getValueType()); if (ISD::isBuildVectorAllZeros(N1.getNode())) // do not return N1, because undef node may exist in N1 return DAG.getConstant( APInt::getNullValue( N1.getValueType().getScalarType().getSizeInBits()), - N1.getValueType()); + SDLoc(N), N1.getValueType()); // fold (and x, -1) -> x, vector edition if (ISD::isBuildVectorAllOnes(N0.getNode())) @@ -2829,22 +2918,22 @@ SDValue DAGCombiner::visitAND(SDNode *N) { } // fold (and c1, c2) -> c1&c2 - ConstantSDNode *N0C = dyn_cast(N0); + ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); ConstantSDNode *N1C = dyn_cast(N1); - if (N0C && N1C) - return DAG.FoldConstantArithmetic(ISD::AND, VT, N0C, N1C); + if (N0C && N1C && !N1C->isOpaque()) + return DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, N0C, N1C); // canonicalize constant to RHS if (isConstantIntBuildVectorOrConstantInt(N0) && !isConstantIntBuildVectorOrConstantInt(N1)) return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0); // fold (and x, -1) -> x - if (N1C && N1C->isAllOnesValue()) + if (isAllOnesConstant(N1)) return N0; // if (and x, c) is known to be zero, return 0 unsigned BitWidth = VT.getScalarType().getSizeInBits(); if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnesValue(BitWidth))) - return DAG.getConstant(0, VT); + return DAG.getConstant(0, SDLoc(N), VT); // reassociate and if (SDValue RAND = ReassociateOps(ISD::AND, SDLoc(N), N0, N1)) return RAND; @@ -3022,12 +3111,13 @@ SDValue DAGCombiner::visitAND(SDNode *N) { // For big endian targets, we need to add an offset to the pointer // to load the correct bytes. For little endian systems, we merely // need to read fewer bytes from the same pointer. - if (TLI.isBigEndian()) { + if (DAG.getDataLayout().isBigEndian()) { unsigned LVTStoreBytes = LoadedVT.getStoreSize(); unsigned EVTStoreBytes = ExtVT.getStoreSize(); unsigned PtrOff = LVTStoreBytes - EVTStoreBytes; - NewPtr = DAG.getNode(ISD::ADD, SDLoc(LN0), PtrType, - NewPtr, DAG.getConstant(PtrOff, PtrType)); + SDLoc DL(LN0); + NewPtr = DAG.getNode(ISD::ADD, DL, PtrType, + NewPtr, DAG.getConstant(PtrOff, DL, PtrType)); Alignment = MinAlign(Alignment, PtrOff); } @@ -3211,9 +3301,12 @@ SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, } SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00); - if (OpSizeInBits > 16) - Res = DAG.getNode(ISD::SRL, SDLoc(N), VT, Res, - DAG.getConstant(OpSizeInBits-16, getShiftAmountTy(VT))); + if (OpSizeInBits > 16) { + SDLoc DL(N); + Res = DAG.getNode(ISD::SRL, DL, VT, Res, + DAG.getConstant(OpSizeInBits - 16, DL, + getShiftAmountTy(VT))); + } return Res; } @@ -3351,19 +3444,20 @@ SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) { if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3]) return SDValue(); - SDValue BSwap = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, - SDValue(Parts[0],0)); + SDLoc DL(N); + SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, + SDValue(Parts[0], 0)); // Result of the bswap should be rotated by 16. If it's not legal, then // do (x << 16) | (x >> 16). - SDValue ShAmt = DAG.getConstant(16, getShiftAmountTy(VT)); + SDValue ShAmt = DAG.getConstant(16, DL, getShiftAmountTy(VT)); if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT)) - return DAG.getNode(ISD::ROTL, SDLoc(N), VT, BSwap, ShAmt); + return DAG.getNode(ISD::ROTL, DL, VT, BSwap, ShAmt); if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT)) - return DAG.getNode(ISD::ROTR, SDLoc(N), VT, BSwap, ShAmt); - return DAG.getNode(ISD::OR, SDLoc(N), VT, - DAG.getNode(ISD::SHL, SDLoc(N), VT, BSwap, ShAmt), - DAG.getNode(ISD::SRL, SDLoc(N), VT, BSwap, ShAmt)); + return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt); + return DAG.getNode(ISD::OR, DL, VT, + DAG.getNode(ISD::SHL, DL, VT, BSwap, ShAmt), + DAG.getNode(ISD::SRL, DL, VT, BSwap, ShAmt)); } /// This contains all DAGCombine rules which reduce two values combined by @@ -3374,7 +3468,8 @@ SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *LocReference) { if (!LegalOperations && (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)) { EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT; - return DAG.getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), VT); + return DAG.getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), + SDLoc(LocReference), VT); } // fold (or (setcc x), (setcc y)) -> (setcc (or x, y)) SDValue LL, LR, RL, RR, CC0, CC1; @@ -3382,12 +3477,10 @@ SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *LocReference) { ISD::CondCode Op0 = cast(CC0)->get(); ISD::CondCode Op1 = cast(CC1)->get(); - if (LR == RR && isa(LR) && Op0 == Op1 && - LL.getValueType().isInteger()) { + if (LR == RR && Op0 == Op1 && LL.getValueType().isInteger()) { // fold (or (setne X, 0), (setne Y, 0)) -> (setne (or X, Y), 0) // fold (or (setlt X, 0), (setlt Y, 0)) -> (setne (or X, Y), 0) - if (cast(LR)->isNullValue() && - (Op1 == ISD::SETNE || Op1 == ISD::SETLT)) { + if (isNullConstant(LR) && (Op1 == ISD::SETNE || Op1 == ISD::SETLT)) { SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(LR), LR.getValueType(), LL, RL); AddToWorklist(ORNode.getNode()); @@ -3395,8 +3488,7 @@ SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *LocReference) { } // fold (or (setne X, -1), (setne Y, -1)) -> (setne (and X, Y), -1) // fold (or (setgt X, -1), (setgt Y -1)) -> (setgt (and X, Y), -1) - if (cast(LR)->isAllOnesValue() && - (Op1 == ISD::SETNE || Op1 == ISD::SETGT)) { + if (isAllOnesConstant(LR) && (Op1 == ISD::SETNE || Op1 == ISD::SETGT)) { SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(LR), LR.getValueType(), LL, RL); AddToWorklist(ANDNode.getNode()); @@ -3422,25 +3514,29 @@ SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *LocReference) { } // (or (and X, C1), (and Y, C2)) -> (and (or X, Y), C3) if possible. - if (N0.getOpcode() == ISD::AND && - N1.getOpcode() == ISD::AND && - N0.getOperand(1).getOpcode() == ISD::Constant && - N1.getOperand(1).getOpcode() == ISD::Constant && + if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND && // Don't increase # computations. (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) { // We can only do this xform if we know that bits from X that are set in C2 // but not in C1 are already zero. Likewise for Y. - const APInt &LHSMask = - cast(N0.getOperand(1))->getAPIntValue(); - const APInt &RHSMask = - cast(N1.getOperand(1))->getAPIntValue(); - - if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) && - DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) { - SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT, - N0.getOperand(0), N1.getOperand(0)); - return DAG.getNode(ISD::AND, SDLoc(LocReference), VT, X, - DAG.getConstant(LHSMask | RHSMask, VT)); + if (const ConstantSDNode *N0O1C = + getAsNonOpaqueConstant(N0.getOperand(1))) { + if (const ConstantSDNode *N1O1C = + getAsNonOpaqueConstant(N1.getOperand(1))) { + // We can only do this xform if we know that bits from X that are set in + // C2 but not in C1 are already zero. Likewise for Y. + const APInt &LHSMask = N0O1C->getAPIntValue(); + const APInt &RHSMask = N1O1C->getAPIntValue(); + + if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) && + DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) { + SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT, + N0.getOperand(0), N1.getOperand(0)); + SDLoc DL(LocReference); + return DAG.getNode(ISD::AND, DL, VT, X, + DAG.getConstant(LHSMask | RHSMask, DL, VT)); + } + } } } @@ -3480,13 +3576,13 @@ SDValue DAGCombiner::visitOR(SDNode *N) { return DAG.getConstant( APInt::getAllOnesValue( N0.getValueType().getScalarType().getSizeInBits()), - N0.getValueType()); + SDLoc(N), N0.getValueType()); if (ISD::isBuildVectorAllOnes(N1.getNode())) // do not return N1, because undef node may exist in N1 return DAG.getConstant( APInt::getAllOnesValue( N1.getValueType().getScalarType().getSizeInBits()), - N1.getValueType()); + SDLoc(N), N1.getValueType()); // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask1) // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf B, A, Mask2) @@ -3546,19 +3642,19 @@ SDValue DAGCombiner::visitOR(SDNode *N) { } // fold (or c1, c2) -> c1|c2 - ConstantSDNode *N0C = dyn_cast(N0); + ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); ConstantSDNode *N1C = dyn_cast(N1); - if (N0C && N1C) - return DAG.FoldConstantArithmetic(ISD::OR, VT, N0C, N1C); + if (N0C && N1C && !N1C->isOpaque()) + return DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, N0C, N1C); // canonicalize constant to RHS if (isConstantIntBuildVectorOrConstantInt(N0) && !isConstantIntBuildVectorOrConstantInt(N1)) return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0); // fold (or x, 0) -> x - if (N1C && N1C->isNullValue()) + if (isNullConstant(N1)) return N0; // fold (or x, -1) -> -1 - if (N1C && N1C->isAllOnesValue()) + if (isAllOnesConstant(N1)) return N1; // fold (or x, c) -> c iff (x & ~c) == 0 if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue())) @@ -3584,7 +3680,8 @@ SDValue DAGCombiner::visitOR(SDNode *N) { isa(N0.getOperand(1))) { ConstantSDNode *C1 = cast(N0.getOperand(1)); if ((C1->getAPIntValue() & N1C->getAPIntValue()) != 0) { - if (SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, VT, N1C, C1)) + if (SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N1), VT, + N1C, C1)) return DAG.getNode( ISD::AND, SDLoc(N), VT, DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1), COR); @@ -3824,7 +3921,7 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) { Mask &= cast(RHSMask)->getAPIntValue() | LHSBits; } - Rot = DAG.getNode(ISD::AND, DL, VT, Rot, DAG.getConstant(Mask, VT)); + Rot = DAG.getNode(ISD::AND, DL, VT, Rot, DAG.getConstant(Mask, DL, VT)); } return Rot.getNode(); @@ -3882,23 +3979,23 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { // fold (xor undef, undef) -> 0. This is a common idiom (misuse). if (N0.getOpcode() == ISD::UNDEF && N1.getOpcode() == ISD::UNDEF) - return DAG.getConstant(0, VT); + return DAG.getConstant(0, SDLoc(N), VT); // fold (xor x, undef) -> undef if (N0.getOpcode() == ISD::UNDEF) return N0; if (N1.getOpcode() == ISD::UNDEF) return N1; // fold (xor c1, c2) -> c1^c2 - ConstantSDNode *N0C = dyn_cast(N0); - ConstantSDNode *N1C = dyn_cast(N1); + ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); + ConstantSDNode *N1C = getAsNonOpaqueConstant(N1); if (N0C && N1C) - return DAG.FoldConstantArithmetic(ISD::XOR, VT, N0C, N1C); + return DAG.FoldConstantArithmetic(ISD::XOR, SDLoc(N), VT, N0C, N1C); // canonicalize constant to RHS if (isConstantIntBuildVectorOrConstantInt(N0) && !isConstantIntBuildVectorOrConstantInt(N1)) return DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0); // fold (xor x, 0) -> x - if (N1C && N1C->isNullValue()) + if (isNullConstant(N1)) return N0; // reassociate xor if (SDValue RXOR = ReassociateOps(ISD::XOR, SDLoc(N), N0, N1)) @@ -3926,18 +4023,19 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { } // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y))) - if (N1C && N1C->getAPIntValue() == 1 && N0.getOpcode() == ISD::ZERO_EXTEND && + if (isOneConstant(N1) && N0.getOpcode() == ISD::ZERO_EXTEND && N0.getNode()->hasOneUse() && isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){ SDValue V = N0.getOperand(0); - V = DAG.getNode(ISD::XOR, SDLoc(N0), V.getValueType(), V, - DAG.getConstant(1, V.getValueType())); + SDLoc DL(N0); + V = DAG.getNode(ISD::XOR, DL, V.getValueType(), V, + DAG.getConstant(1, DL, V.getValueType())); AddToWorklist(V.getNode()); return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, V); } // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc - if (N1C && N1C->getAPIntValue() == 1 && VT == MVT::i1 && + if (isOneConstant(N1) && VT == MVT::i1 && (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) { SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1); if (isOneUseSetCC(RHS) || isOneUseSetCC(LHS)) { @@ -3949,7 +4047,7 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { } } // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants - if (N1C && N1C->isAllOnesValue() && + if (isAllOnesConstant(N1) && (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) { SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1); if (isa(RHS) || isa(LHS)) { @@ -3970,16 +4068,18 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { } // fold (xor (xor x, c1), c2) -> (xor x, (xor c1, c2)) if (N1C && N0.getOpcode() == ISD::XOR) { - ConstantSDNode *N00C = dyn_cast(N0.getOperand(0)); - ConstantSDNode *N01C = dyn_cast(N0.getOperand(1)); - if (N00C) - return DAG.getNode(ISD::XOR, SDLoc(N), VT, N0.getOperand(1), + if (const ConstantSDNode *N00C = getAsNonOpaqueConstant(N0.getOperand(0))) { + SDLoc DL(N); + return DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1), DAG.getConstant(N1C->getAPIntValue() ^ - N00C->getAPIntValue(), VT)); - if (N01C) - return DAG.getNode(ISD::XOR, SDLoc(N), VT, N0.getOperand(0), + N00C->getAPIntValue(), DL, VT)); + } + if (const ConstantSDNode *N01C = getAsNonOpaqueConstant(N0.getOperand(1))) { + SDLoc DL(N); + return DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0), DAG.getConstant(N1C->getAPIntValue() ^ - N01C->getAPIntValue(), VT)); + N01C->getAPIntValue(), DL, VT)); + } } // fold (xor x, x) -> 0 if (N0 == N1) @@ -4003,13 +4103,12 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { // consistent result. // - Pushing the zero left requires shifting one bits in from the right. // A rotate left of ~1 is a nice way of achieving the desired result. - if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT)) - if (auto *N1C = dyn_cast(N1.getNode())) - if (N0.getOpcode() == ISD::SHL) - if (auto *ShlLHS = dyn_cast(N0.getOperand(0))) - if (N1C->isAllOnesValue() && ShlLHS->isOne()) - return DAG.getNode(ISD::ROTL, SDLoc(N), VT, DAG.getConstant(~1, VT), - N0.getOperand(1)); + if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && N0.getOpcode() == ISD::SHL + && isAllOnesConstant(N1) && isOneConstant(N0.getOperand(0))) { + SDLoc DL(N); + return DAG.getNode(ISD::ROTL, DL, VT, DAG.getConstant(~1, DL, VT), + N0.getOperand(1)); + } // Simplify: xor (op x...), (op y...) -> (op (xor x, y)) if (N0.getOpcode() == N1.getOpcode()) { @@ -4028,10 +4127,6 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { /// Handle transforms common to the three shifts, when the shift amount is a /// constant. SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) { - // We can't and shouldn't fold opaque constants. - if (Amt->isOpaque()) - return SDValue(); - SDNode *LHS = N->getOperand(0).getNode(); if (!LHS->hasOneUse()) return SDValue(); @@ -4058,8 +4153,8 @@ SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) { } // We require the RHS of the binop to be a constant and not opaque as well. - ConstantSDNode *BinOpCst = dyn_cast(LHS->getOperand(1)); - if (!BinOpCst || BinOpCst->isOpaque()) return SDValue(); + ConstantSDNode *BinOpCst = getAsNonOpaqueConstant(LHS->getOperand(1)); + if (!BinOpCst) return SDValue(); // FIXME: disable this unless the input to the binop is a shift by a constant. // If it is not a shift, it pessimizes some common cases like: @@ -4112,14 +4207,17 @@ SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) { SDValue N01 = N->getOperand(0).getOperand(1); if (ConstantSDNode *N01C = isConstOrConstSplat(N01)) { - EVT TruncVT = N->getValueType(0); - SDValue N00 = N->getOperand(0).getOperand(0); - APInt TruncC = N01C->getAPIntValue(); - TruncC = TruncC.trunc(TruncVT.getScalarSizeInBits()); + if (!N01C->isOpaque()) { + EVT TruncVT = N->getValueType(0); + SDValue N00 = N->getOperand(0).getOperand(0); + APInt TruncC = N01C->getAPIntValue(); + TruncC = TruncC.trunc(TruncVT.getScalarSizeInBits()); + SDLoc DL(N); - return DAG.getNode(ISD::AND, SDLoc(N), TruncVT, - DAG.getNode(ISD::TRUNCATE, SDLoc(N), TruncVT, N00), - DAG.getConstant(TruncC, TruncVT)); + return DAG.getNode(ISD::AND, DL, TruncVT, + DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00), + DAG.getConstant(TruncC, DL, TruncVT)); + } } } @@ -4162,7 +4260,8 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC && TLI.getBooleanContents(N00.getOperand(0).getValueType()) == TargetLowering::ZeroOrNegativeOneBooleanContent) { - if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, VT, N01CV, N1CV)) + if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, + N01CV, N1CV)) return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C); } } else { @@ -4172,25 +4271,25 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { } // fold (shl c1, c2) -> c1<(N0); - if (N0C && N1C) - return DAG.FoldConstantArithmetic(ISD::SHL, VT, N0C, N1C); + ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); + if (N0C && N1C && !N1C->isOpaque()) + return DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, N0C, N1C); // fold (shl 0, x) -> 0 - if (N0C && N0C->isNullValue()) + if (isNullConstant(N0)) return N0; // fold (shl x, c >= size(x)) -> undef - if (N1C && N1C->getZExtValue() >= OpSizeInBits) + if (N1C && N1C->getAPIntValue().uge(OpSizeInBits)) return DAG.getUNDEF(VT); // fold (shl x, 0) -> x if (N1C && N1C->isNullValue()) return N0; // fold (shl undef, x) -> 0 if (N0.getOpcode() == ISD::UNDEF) - return DAG.getConstant(0, VT); + return DAG.getConstant(0, SDLoc(N), VT); // if (shl x, c) is known to be zero, return 0 if (DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnesValue(OpSizeInBits))) - return DAG.getConstant(0, VT); + return DAG.getConstant(0, SDLoc(N), VT); // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))). if (N1.getOpcode() == ISD::TRUNCATE && N1.getOperand(0).getOpcode() == ISD::AND) { @@ -4207,10 +4306,11 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) { uint64_t c1 = N0C1->getZExtValue(); uint64_t c2 = N1C->getZExtValue(); + SDLoc DL(N); if (c1 + c2 >= OpSizeInBits) - return DAG.getConstant(0, VT); - return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0.getOperand(0), - DAG.getConstant(c1 + c2, N1.getValueType())); + return DAG.getConstant(0, DL, VT); + return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), + DAG.getConstant(c1 + c2, DL, N1.getValueType())); } } @@ -4230,12 +4330,13 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { EVT InnerShiftVT = N0Op0.getValueType(); uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits(); if (c2 >= OpSizeInBits - InnerShiftSize) { + SDLoc DL(N0); if (c1 + c2 >= OpSizeInBits) - return DAG.getConstant(0, VT); - return DAG.getNode(ISD::SHL, SDLoc(N0), VT, - DAG.getNode(N0.getOpcode(), SDLoc(N0), VT, + return DAG.getConstant(0, DL, VT); + return DAG.getNode(ISD::SHL, DL, VT, + DAG.getNode(N0.getOpcode(), DL, VT, N0Op0->getOperand(0)), - DAG.getConstant(c1 + c2, N1.getValueType())); + DAG.getConstant(c1 + c2, DL, N1.getValueType())); } } } @@ -4253,8 +4354,10 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { if (c1 == c2) { SDValue NewOp0 = N0.getOperand(0); EVT CountVT = NewOp0.getOperand(1).getValueType(); - SDValue NewSHL = DAG.getNode(ISD::SHL, SDLoc(N), NewOp0.getValueType(), - NewOp0, DAG.getConstant(c2, CountVT)); + SDLoc DL(N); + SDValue NewSHL = DAG.getNode(ISD::SHL, DL, NewOp0.getValueType(), + NewOp0, + DAG.getConstant(c2, DL, CountVT)); AddToWorklist(NewSHL.getNode()); return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL); } @@ -4262,6 +4365,22 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { } } + // fold (shl (sr[la] exact X, C1), C2) -> (shl X, (C2-C1)) if C1 <= C2 + // fold (shl (sr[la] exact X, C1), C2) -> (sr[la] X, (C2-C1)) if C1 > C2 + if (N1C && (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) && + cast(N0)->Flags.hasExact()) { + if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) { + uint64_t C1 = N0C1->getZExtValue(); + uint64_t C2 = N1C->getZExtValue(); + SDLoc DL(N); + if (C1 <= C2) + return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), + DAG.getConstant(C2 - C1, DL, N1.getValueType())); + return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0), + DAG.getConstant(C1 - C2, DL, N1.getValueType())); + } + } + // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or // (and (srl x, (sub c1, c2), MASK) // Only fold this if the inner shift has no other uses -- if it does, folding @@ -4275,25 +4394,30 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { SDValue Shift; if (c2 > c1) { Mask = Mask.shl(c2 - c1); - Shift = DAG.getNode(ISD::SHL, SDLoc(N), VT, N0.getOperand(0), - DAG.getConstant(c2 - c1, N1.getValueType())); + SDLoc DL(N); + Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), + DAG.getConstant(c2 - c1, DL, N1.getValueType())); } else { Mask = Mask.lshr(c1 - c2); - Shift = DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), - DAG.getConstant(c1 - c2, N1.getValueType())); + SDLoc DL(N); + Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), + DAG.getConstant(c1 - c2, DL, N1.getValueType())); } - return DAG.getNode(ISD::AND, SDLoc(N0), VT, Shift, - DAG.getConstant(Mask, VT)); + SDLoc DL(N0); + return DAG.getNode(ISD::AND, DL, VT, Shift, + DAG.getConstant(Mask, DL, VT)); } } } // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1)) if (N1C && N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1)) { unsigned BitSize = VT.getScalarSizeInBits(); + SDLoc DL(N); SDValue HiBitsMask = DAG.getConstant(APInt::getHighBitsSet(BitSize, - BitSize - N1C->getZExtValue()), VT); - return DAG.getNode(ISD::AND, SDLoc(N), VT, N0.getOperand(0), + BitSize - N1C->getZExtValue()), + DL, VT); + return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), HiBitsMask); } @@ -4309,7 +4433,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { return DAG.getNode(ISD::ADD, SDLoc(N), VT, Shl0, Shl1); } - if (N1C) { + if (N1C && !N1C->isOpaque()) { SDValue NewSHL = visitShiftByConstant(N, N1C); if (NewSHL.getNode()) return NewSHL; @@ -4334,14 +4458,14 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { } // fold (sra c1, c2) -> (sra c1, c2) - ConstantSDNode *N0C = dyn_cast(N0); - if (N0C && N1C) - return DAG.FoldConstantArithmetic(ISD::SRA, VT, N0C, N1C); + ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); + if (N0C && N1C && !N1C->isOpaque()) + return DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, N0C, N1C); // fold (sra 0, x) -> 0 - if (N0C && N0C->isNullValue()) + if (isNullConstant(N0)) return N0; // fold (sra -1, x) -> -1 - if (N0C && N0C->isAllOnesValue()) + if (isAllOnesConstant(N0)) return N0; // fold (sra x, (setge c, size(x))) -> undef if (N1C && N1C->getZExtValue() >= OpSizeInBits) @@ -4369,8 +4493,9 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { unsigned Sum = N1C->getZExtValue() + C1->getZExtValue(); if (Sum >= OpSizeInBits) Sum = OpSizeInBits - 1; - return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0.getOperand(0), - DAG.getConstant(Sum, N1.getValueType())); + SDLoc DL(N); + return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), + DAG.getConstant(Sum, DL, N1.getValueType())); } } @@ -4402,14 +4527,15 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) && TLI.isTruncateFree(VT, TruncVT)) { - SDValue Amt = DAG.getConstant(ShiftAmt, - getShiftAmountTy(N0.getOperand(0).getValueType())); - SDValue Shift = DAG.getNode(ISD::SRL, SDLoc(N0), VT, - N0.getOperand(0), Amt); - SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), TruncVT, - Shift); - return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), - N->getValueType(0), Trunc); + SDLoc DL(N); + SDValue Amt = DAG.getConstant(ShiftAmt, DL, + getShiftAmountTy(N0.getOperand(0).getValueType())); + SDValue Shift = DAG.getNode(ISD::SRL, DL, VT, + N0.getOperand(0), Amt); + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, + Shift); + return DAG.getNode(ISD::SIGN_EXTEND, DL, + N->getValueType(0), Trunc); } } } @@ -4436,12 +4562,13 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { EVT LargeVT = N0Op0.getValueType(); if (LargeVT.getScalarSizeInBits() - OpSizeInBits == LargeShiftVal) { + SDLoc DL(N); SDValue Amt = - DAG.getConstant(LargeShiftVal + N1C->getZExtValue(), + DAG.getConstant(LargeShiftVal + N1C->getZExtValue(), DL, getShiftAmountTy(N0Op0.getOperand(0).getValueType())); - SDValue SRA = DAG.getNode(ISD::SRA, SDLoc(N), LargeVT, + SDValue SRA = DAG.getNode(ISD::SRA, DL, LargeVT, N0Op0.getOperand(0), Amt); - return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, SRA); + return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA); } } } @@ -4455,7 +4582,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { if (DAG.SignBitIsZero(N0)) return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1); - if (N1C) { + if (N1C && !N1C->isOpaque()) { SDValue NewSRA = visitShiftByConstant(N, N1C); if (NewSRA.getNode()) return NewSRA; @@ -4480,11 +4607,11 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { } // fold (srl c1, c2) -> c1 >>u c2 - ConstantSDNode *N0C = dyn_cast(N0); - if (N0C && N1C) - return DAG.FoldConstantArithmetic(ISD::SRL, VT, N0C, N1C); + ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); + if (N0C && N1C && !N1C->isOpaque()) + return DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, N0C, N1C); // fold (srl 0, x) -> 0 - if (N0C && N0C->isNullValue()) + if (isNullConstant(N0)) return N0; // fold (srl x, c >= size(x)) -> undef if (N1C && N1C->getZExtValue() >= OpSizeInBits) @@ -4495,17 +4622,18 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { // if (srl x, c) is known to be zero, return 0 if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnesValue(OpSizeInBits))) - return DAG.getConstant(0, VT); + return DAG.getConstant(0, SDLoc(N), VT); // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2)) if (N1C && N0.getOpcode() == ISD::SRL) { if (ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1))) { uint64_t c1 = N01C->getZExtValue(); uint64_t c2 = N1C->getZExtValue(); + SDLoc DL(N); if (c1 + c2 >= OpSizeInBits) - return DAG.getConstant(0, VT); - return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), - DAG.getConstant(c1 + c2, N1.getValueType())); + return DAG.getConstant(0, DL, VT); + return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), + DAG.getConstant(c1 + c2, DL, N1.getValueType())); } } @@ -4521,12 +4649,14 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { uint64_t InnerShiftSize = InnerShiftVT.getScalarType().getSizeInBits(); // This is only valid if the OpSizeInBits + c1 = size of inner shift. if (c1 + OpSizeInBits == InnerShiftSize) { + SDLoc DL(N0); if (c1 + c2 >= InnerShiftSize) - return DAG.getConstant(0, VT); - return DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, - DAG.getNode(ISD::SRL, SDLoc(N0), InnerShiftVT, + return DAG.getConstant(0, DL, VT); + return DAG.getNode(ISD::TRUNCATE, DL, VT, + DAG.getNode(ISD::SRL, DL, InnerShiftVT, N0.getOperand(0)->getOperand(0), - DAG.getConstant(c1 + c2, ShiftCountVT))); + DAG.getConstant(c1 + c2, DL, + ShiftCountVT))); } } @@ -4535,8 +4665,9 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { unsigned BitSize = N0.getScalarValueSizeInBits(); if (BitSize <= 64) { uint64_t ShAmt = N1C->getZExtValue() + 64 - BitSize; - return DAG.getNode(ISD::AND, SDLoc(N), VT, N0.getOperand(0), - DAG.getConstant(~0ULL >> ShAmt, VT)); + SDLoc DL(N); + return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), + DAG.getConstant(~0ULL >> ShAmt, DL, VT)); } } @@ -4550,14 +4681,17 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) { uint64_t ShiftAmt = N1C->getZExtValue(); - SDValue SmallShift = DAG.getNode(ISD::SRL, SDLoc(N0), SmallVT, + SDLoc DL0(N0); + SDValue SmallShift = DAG.getNode(ISD::SRL, DL0, SmallVT, N0.getOperand(0), - DAG.getConstant(ShiftAmt, getShiftAmountTy(SmallVT))); + DAG.getConstant(ShiftAmt, DL0, + getShiftAmountTy(SmallVT))); AddToWorklist(SmallShift.getNode()); APInt Mask = APInt::getAllOnesValue(OpSizeInBits).lshr(ShiftAmt); - return DAG.getNode(ISD::AND, SDLoc(N), VT, - DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, SmallShift), - DAG.getConstant(Mask, VT)); + SDLoc DL(N); + return DAG.getNode(ISD::AND, DL, VT, + DAG.getNode(ISD::ANY_EXTEND, DL, VT, SmallShift), + DAG.getConstant(Mask, DL, VT)); } } @@ -4576,12 +4710,12 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { // If any of the input bits are KnownOne, then the input couldn't be all // zeros, thus the result of the srl will always be zero. - if (KnownOne.getBoolValue()) return DAG.getConstant(0, VT); + if (KnownOne.getBoolValue()) return DAG.getConstant(0, SDLoc(N0), VT); // If all of the bits input the to ctlz node are known to be zero, then // the result of the ctlz is "32" and the result of the shift is one. APInt UnknownBits = ~KnownZero; - if (UnknownBits == 0) return DAG.getConstant(1, VT); + if (UnknownBits == 0) return DAG.getConstant(1, SDLoc(N0), VT); // Otherwise, check to see if there is exactly one bit input to the ctlz. if ((UnknownBits & (UnknownBits - 1)) == 0) { @@ -4593,13 +4727,16 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { SDValue Op = N0.getOperand(0); if (ShAmt) { - Op = DAG.getNode(ISD::SRL, SDLoc(N0), VT, Op, - DAG.getConstant(ShAmt, getShiftAmountTy(Op.getValueType()))); + SDLoc DL(N0); + Op = DAG.getNode(ISD::SRL, DL, VT, Op, + DAG.getConstant(ShAmt, DL, + getShiftAmountTy(Op.getValueType()))); AddToWorklist(Op.getNode()); } - return DAG.getNode(ISD::XOR, SDLoc(N), VT, - Op, DAG.getConstant(1, VT)); + SDLoc DL(N); + return DAG.getNode(ISD::XOR, DL, VT, + Op, DAG.getConstant(1, DL, VT)); } } @@ -4616,7 +4753,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { if (N1C && SimplifyDemandedBits(SDValue(N, 0))) return SDValue(N, 0); - if (N1C) { + if (N1C && !N1C->isOpaque()) { SDValue NewSRL = visitShiftByConstant(N, N1C); if (NewSRL.getNode()) return NewSRL; @@ -4659,12 +4796,25 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { return SDValue(); } +SDValue DAGCombiner::visitBSWAP(SDNode *N) { + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + + // fold (bswap c1) -> c2 + if (isConstantIntBuildVectorOrConstantInt(N0)) + return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N0); + // fold (bswap (bswap x)) -> x + if (N0.getOpcode() == ISD::BSWAP) + return N0->getOperand(0); + return SDValue(); +} + SDValue DAGCombiner::visitCTLZ(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); // fold (ctlz c1) -> c2 - if (isa(N0)) + if (isConstantIntBuildVectorOrConstantInt(N0)) return DAG.getNode(ISD::CTLZ, SDLoc(N), VT, N0); return SDValue(); } @@ -4674,7 +4824,7 @@ SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) { EVT VT = N->getValueType(0); // fold (ctlz_zero_undef c1) -> c2 - if (isa(N0)) + if (isConstantIntBuildVectorOrConstantInt(N0)) return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0); return SDValue(); } @@ -4684,7 +4834,7 @@ SDValue DAGCombiner::visitCTTZ(SDNode *N) { EVT VT = N->getValueType(0); // fold (cttz c1) -> c2 - if (isa(N0)) + if (isConstantIntBuildVectorOrConstantInt(N0)) return DAG.getNode(ISD::CTTZ, SDLoc(N), VT, N0); return SDValue(); } @@ -4694,7 +4844,7 @@ SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) { EVT VT = N->getValueType(0); // fold (cttz_zero_undef c1) -> c2 - if (isa(N0)) + if (isConstantIntBuildVectorOrConstantInt(N0)) return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0); return SDValue(); } @@ -4704,7 +4854,7 @@ SDValue DAGCombiner::visitCTPOP(SDNode *N) { EVT VT = N->getValueType(0); // fold (ctpop c1) -> c2 - if (isa(N0)) + if (isConstantIntBuildVectorOrConstantInt(N0)) return DAG.getNode(ISD::CTPOP, SDLoc(N), VT, N0); return SDValue(); } @@ -4756,16 +4906,13 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { // fold (select C, X, X) -> X if (N1 == N2) return N1; - // fold (select true, X, Y) -> X - ConstantSDNode *N0C = dyn_cast(N0); - if (N0C && !N0C->isNullValue()) - return N1; - // fold (select false, X, Y) -> Y - if (N0C && N0C->isNullValue()) - return N2; + if (const ConstantSDNode *N0C = dyn_cast(N0)) { + // fold (select true, X, Y) -> X + // fold (select false, X, Y) -> Y + return !N0C->isNullValue() ? N1 : N2; + } // fold (select C, 1, X) -> (or C, X) - ConstantSDNode *N1C = dyn_cast(N1); - if (VT == MVT::i1 && N1C && N1C->getAPIntValue() == 1) + if (VT == MVT::i1 && isOneConstant(N1)) return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N2); // fold (select C, 0, 1) -> (xor C, 1) // We can't do this reliably if integer based booleans have different contents @@ -4776,47 +4923,49 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { // undiscoverable (or not reasonably discoverable). For example, it could be // in another basic block or it could require searching a complicated // expression. - ConstantSDNode *N2C = dyn_cast(N2); if (VT.isInteger() && (VT0 == MVT::i1 || (VT0.isInteger() && TLI.getBooleanContents(false, false) == TLI.getBooleanContents(false, true) && TLI.getBooleanContents(false, false) == TargetLowering::ZeroOrOneBooleanContent)) && - N1C && N2C && N1C->isNullValue() && N2C->getAPIntValue() == 1) { + isNullConstant(N1) && isOneConstant(N2)) { SDValue XORNode; - if (VT == VT0) - return DAG.getNode(ISD::XOR, SDLoc(N), VT0, - N0, DAG.getConstant(1, VT0)); - XORNode = DAG.getNode(ISD::XOR, SDLoc(N0), VT0, - N0, DAG.getConstant(1, VT0)); + if (VT == VT0) { + SDLoc DL(N); + return DAG.getNode(ISD::XOR, DL, VT0, + N0, DAG.getConstant(1, DL, VT0)); + } + SDLoc DL0(N0); + XORNode = DAG.getNode(ISD::XOR, DL0, VT0, + N0, DAG.getConstant(1, DL0, VT0)); AddToWorklist(XORNode.getNode()); if (VT.bitsGT(VT0)) return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, XORNode); return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, XORNode); } // fold (select C, 0, X) -> (and (not C), X) - if (VT == VT0 && VT == MVT::i1 && N1C && N1C->isNullValue()) { + if (VT == VT0 && VT == MVT::i1 && isNullConstant(N1)) { SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT); AddToWorklist(NOTNode.getNode()); return DAG.getNode(ISD::AND, SDLoc(N), VT, NOTNode, N2); } // fold (select C, X, 1) -> (or (not C), X) - if (VT == VT0 && VT == MVT::i1 && N2C && N2C->getAPIntValue() == 1) { + if (VT == VT0 && VT == MVT::i1 && isOneConstant(N2)) { SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT); AddToWorklist(NOTNode.getNode()); return DAG.getNode(ISD::OR, SDLoc(N), VT, NOTNode, N1); } // fold (select C, X, 0) -> (and C, X) - if (VT == MVT::i1 && N2C && N2C->isNullValue()) + if (VT == MVT::i1 && isNullConstant(N2)) return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, N1); // fold (select X, X, Y) -> (or X, Y) // fold (select X, 1, Y) -> (or X, Y) - if (VT == MVT::i1 && (N0 == N1 || (N1C && N1C->getAPIntValue() == 1))) + if (VT == MVT::i1 && (N0 == N1 || isOneConstant(N1))) return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N2); // fold (select X, Y, X) -> (and X, Y) // fold (select X, Y, 0) -> (and X, Y) - if (VT == MVT::i1 && (N0 == N2 || (N2C && N2C->getAPIntValue() == 0))) + if (VT == MVT::i1 && (N0 == N2 || isNullConstant(N2))) return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, N1); // If we can fold this based on the true/false value, do so. @@ -4884,7 +5033,7 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { SDValue N1_0 = N1->getOperand(0); SDValue N1_1 = N1->getOperand(1); SDValue N1_2 = N1->getOperand(2); - if (N1_2 == N2) { + if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) { // Create the actual and node if we can generate good code for it. if (!TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT)) { SDValue And = DAG.getNode(ISD::AND, SDLoc(N), N0.getValueType(), @@ -4903,7 +5052,7 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { SDValue N2_0 = N2->getOperand(0); SDValue N2_1 = N2->getOperand(1); SDValue N2_2 = N2->getOperand(2); - if (N2_1 == N1) { + if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) { // Create the actual or node if we can generate good code for it. if (!TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT)) { SDValue Or = DAG.getNode(ISD::OR, SDLoc(N), N0.getValueType(), @@ -4994,6 +5143,67 @@ static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) { TopHalf->isNullValue() ? RHS->getOperand(1) : LHS->getOperand(1)); } +SDValue DAGCombiner::visitMSCATTER(SDNode *N) { + + if (Level >= AfterLegalizeTypes) + return SDValue(); + + MaskedScatterSDNode *MSC = cast(N); + SDValue Mask = MSC->getMask(); + SDValue Data = MSC->getValue(); + SDLoc DL(N); + + // If the MSCATTER data type requires splitting and the mask is provided by a + // SETCC, then split both nodes and its operands before legalization. This + // prevents the type legalizer from unrolling SETCC into scalar comparisons + // and enables future optimizations (e.g. min/max pattern matching on X86). + if (Mask.getOpcode() != ISD::SETCC) + return SDValue(); + + // Check if any splitting is required. + if (TLI.getTypeAction(*DAG.getContext(), Data.getValueType()) != + TargetLowering::TypeSplitVector) + return SDValue(); + SDValue MaskLo, MaskHi, Lo, Hi; + std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG); + + EVT LoVT, HiVT; + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MSC->getValueType(0)); + + SDValue Chain = MSC->getChain(); + + EVT MemoryVT = MSC->getMemoryVT(); + unsigned Alignment = MSC->getOriginalAlignment(); + + EVT LoMemVT, HiMemVT; + std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); + + SDValue DataLo, DataHi; + std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL); + + SDValue BasePtr = MSC->getBasePtr(); + SDValue IndexLo, IndexHi; + std::tie(IndexLo, IndexHi) = DAG.SplitVector(MSC->getIndex(), DL); + + MachineMemOperand *MMO = DAG.getMachineFunction(). + getMachineMemOperand(MSC->getPointerInfo(), + MachineMemOperand::MOStore, LoMemVT.getStoreSize(), + Alignment, MSC->getAAInfo(), MSC->getRanges()); + + SDValue OpsLo[] = { Chain, DataLo, MaskLo, BasePtr, IndexLo }; + Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataLo.getValueType(), + DL, OpsLo, MMO); + + SDValue OpsHi[] = {Chain, DataHi, MaskHi, BasePtr, IndexHi}; + Hi = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(), + DL, OpsHi, MMO); + + AddToWorklist(Lo.getNode()); + AddToWorklist(Hi.getNode()); + + return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi); +} + SDValue DAGCombiner::visitMSTORE(SDNode *N) { if (Level >= AfterLegalizeTypes) @@ -5049,7 +5259,7 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) { unsigned IncrementSize = LoMemVT.getSizeInBits()/8; Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, - DAG.getConstant(IncrementSize, Ptr.getValueType())); + DAG.getConstant(IncrementSize, DL, Ptr.getValueType())); MMO = DAG.getMachineFunction(). getMachineMemOperand(MST->getPointerInfo(), @@ -5068,6 +5278,83 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) { return SDValue(); } +SDValue DAGCombiner::visitMGATHER(SDNode *N) { + + if (Level >= AfterLegalizeTypes) + return SDValue(); + + MaskedGatherSDNode *MGT = dyn_cast(N); + SDValue Mask = MGT->getMask(); + SDLoc DL(N); + + // If the MGATHER result requires splitting and the mask is provided by a + // SETCC, then split both nodes and its operands before legalization. This + // prevents the type legalizer from unrolling SETCC into scalar comparisons + // and enables future optimizations (e.g. min/max pattern matching on X86). + + if (Mask.getOpcode() != ISD::SETCC) + return SDValue(); + + EVT VT = N->getValueType(0); + + // Check if any splitting is required. + if (TLI.getTypeAction(*DAG.getContext(), VT) != + TargetLowering::TypeSplitVector) + return SDValue(); + + SDValue MaskLo, MaskHi, Lo, Hi; + std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG); + + SDValue Src0 = MGT->getValue(); + SDValue Src0Lo, Src0Hi; + std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, DL); + + EVT LoVT, HiVT; + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT); + + SDValue Chain = MGT->getChain(); + EVT MemoryVT = MGT->getMemoryVT(); + unsigned Alignment = MGT->getOriginalAlignment(); + + EVT LoMemVT, HiMemVT; + std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); + + SDValue BasePtr = MGT->getBasePtr(); + SDValue Index = MGT->getIndex(); + SDValue IndexLo, IndexHi; + std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, DL); + + MachineMemOperand *MMO = DAG.getMachineFunction(). + getMachineMemOperand(MGT->getPointerInfo(), + MachineMemOperand::MOLoad, LoMemVT.getStoreSize(), + Alignment, MGT->getAAInfo(), MGT->getRanges()); + + SDValue OpsLo[] = { Chain, Src0Lo, MaskLo, BasePtr, IndexLo }; + Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, DL, OpsLo, + MMO); + + SDValue OpsHi[] = {Chain, Src0Hi, MaskHi, BasePtr, IndexHi}; + Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, DL, OpsHi, + MMO); + + AddToWorklist(Lo.getNode()); + AddToWorklist(Hi.getNode()); + + // Build a factor node to remember that this load is independent of the + // other one. + Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1), + Hi.getValue(1)); + + // Legalized the chain result - switch anything that used the old chain to + // use the new one. + DAG.ReplaceAllUsesOfValueWith(SDValue(MGT, 1), Chain); + + SDValue GatherRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi); + + SDValue RetOps[] = { GatherRes, Chain }; + return DAG.getMergeValues(RetOps, DL); +} + SDValue DAGCombiner::visitMLOAD(SDNode *N) { if (Level >= AfterLegalizeTypes) @@ -5124,7 +5411,7 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) { unsigned IncrementSize = LoMemVT.getSizeInBits()/8; Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, - DAG.getConstant(IncrementSize, Ptr.getValueType())); + DAG.getConstant(IncrementSize, DL, Ptr.getValueType())); MMO = DAG.getMachineFunction(). getMachineMemOperand(MLD->getPointerInfo(), @@ -5183,7 +5470,7 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) { EVT VT = LHS.getValueType(); SDValue Shift = DAG.getNode( ISD::SRA, DL, VT, LHS, - DAG.getConstant(VT.getScalarType().getSizeInBits() - 1, VT)); + DAG.getConstant(VT.getScalarType().getSizeInBits() - 1, DL, VT)); SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift); AddToWorklist(Shift.getNode()); AddToWorklist(Add.getNode()); @@ -5191,6 +5478,9 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) { } } + if (SimplifySelectOps(N, N1, N2)) + return SDValue(N, 0); // Don't revisit N. + // If the VSELECT result requires splitting and the mask is provided by a // SETCC, then split both nodes and its operands before legalization. This // prevents the type legalizer from unrolling SETCC into scalar comparisons @@ -5289,12 +5579,12 @@ SDValue DAGCombiner::visitSETCC(SDNode *N) { SDLoc(N)); } -// tryToFoldExtendOfConstant - Try to fold a sext/zext/aext -// dag node into a ConstantSDNode or a build_vector of constants. -// This function is called by the DAGCombiner when visiting sext/zext/aext -// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND). -// Vector extends are not folded if operations are legal; this is to -// avoid introducing illegal build_vector dag nodes. +/// Try to fold a sext/zext/aext dag node into a ConstantSDNode or +/// a build_vector of constants. +/// This function is called by the DAGCombiner when visiting sext/zext/aext +/// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND). +/// Vector extends are not folded if operations are legal; this is to +/// avoid introducing illegal build_vector dag nodes. static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI, SelectionDAG &DAG, bool LegalTypes, bool LegalOperations) { @@ -5303,7 +5593,8 @@ static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI, EVT VT = N->getValueType(0); assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND || - Opcode == ISD::ANY_EXTEND) && "Expected EXTEND dag node in input!"); + Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG) + && "Expected EXTEND dag node in input!"); // fold (sext c1) -> c1 // fold (zext c1) -> c1 @@ -5323,9 +5614,8 @@ static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI, // We can fold this node into a build_vector. unsigned VTBits = SVT.getSizeInBits(); unsigned EVTBits = N0->getValueType(0).getScalarType().getSizeInBits(); - unsigned ShAmt = VTBits - EVTBits; SmallVector Elts; - unsigned NumElts = N0->getNumOperands(); + unsigned NumElts = VT.getVectorNumElements(); SDLoc DL(N); for (unsigned i=0; i != NumElts; ++i) { @@ -5335,14 +5625,14 @@ static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI, continue; } - ConstantSDNode *CurrentND = cast(Op); - const APInt &C = APInt(VTBits, CurrentND->getAPIntValue().getZExtValue()); - if (Opcode == ISD::SIGN_EXTEND) - Elts.push_back(DAG.getConstant(C.shl(ShAmt).ashr(ShAmt).getZExtValue(), - SVT)); + SDLoc DL(Op); + // Get the constant value and if needed trunc it to the size of the type. + // Nodes like build_vector might have constants wider than the scalar type. + APInt C = cast(Op)->getAPIntValue().zextOrTrunc(EVTBits); + if (Opcode == ISD::SIGN_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG) + Elts.push_back(DAG.getConstant(C.sext(VTBits), DL, SVT)); else - Elts.push_back(DAG.getConstant(C.shl(ShAmt).lshr(ShAmt).getZExtValue(), - SVT)); + Elts.push_back(DAG.getConstant(C.zext(VTBits), DL, SVT)); } return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Elts).getNode(); @@ -5508,7 +5798,7 @@ SDValue DAGCombiner::CombineExtLoad(SDNode *N) { Align, LN0->getAAInfo()); BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr, - DAG.getConstant(Stride, BasePtr.getValueType())); + DAG.getConstant(Stride, DL, BasePtr.getValueType())); Loads.push_back(SplitLoad.getValue(0)); Chains.push_back(SplitLoad.getValue(1)); @@ -5671,14 +5961,15 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { LN0->getMemOperand()); APInt Mask = cast(N0.getOperand(1))->getAPIntValue(); Mask = Mask.sext(VT.getSizeInBits()); - SDValue And = DAG.getNode(N0.getOpcode(), SDLoc(N), VT, - ExtLoad, DAG.getConstant(Mask, VT)); + SDLoc DL(N); + SDValue And = DAG.getNode(N0.getOpcode(), DL, VT, + ExtLoad, DAG.getConstant(Mask, DL, VT)); SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0.getOperand(0)), N0.getOperand(0).getValueType(), ExtLoad); CombineTo(N, And); CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1)); - ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N), + ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL, ISD::SIGN_EXTEND); return SDValue(N, 0); // Return N so it doesn't get rechecked! } @@ -5721,11 +6012,12 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { // sext(setcc x, y, cc) -> (select (setcc x, y, cc), -1, 0) unsigned ElementWidth = VT.getScalarType().getSizeInBits(); + SDLoc DL(N); SDValue NegOne = - DAG.getConstant(APInt::getAllOnesValue(ElementWidth), VT); + DAG.getConstant(APInt::getAllOnesValue(ElementWidth), DL, VT); SDValue SCC = - SimplifySelectCC(SDLoc(N), N0.getOperand(0), N0.getOperand(1), - NegOne, DAG.getConstant(0, VT), + SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), + NegOne, DAG.getConstant(0, DL, VT), cast(N0.getOperand(2))->get(), true); if (SCC.getNode()) return SCC; @@ -5737,7 +6029,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { SDValue SetCC = DAG.getSetCC(DL, SetCCVT, N0.getOperand(0), N0.getOperand(1), CC); return DAG.getSelect(DL, VT, SetCC, - NegOne, DAG.getConstant(0, VT)); + NegOne, DAG.getConstant(0, DL, VT)); } } } @@ -5771,11 +6063,9 @@ static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op, SDValue Op1 = N->getOperand(1); assert(Op0.getValueType() == Op1.getValueType()); - ConstantSDNode *COp0 = dyn_cast(Op0); - ConstantSDNode *COp1 = dyn_cast(Op1); - if (COp0 && COp0->isNullValue()) + if (isNullConstant(Op0)) Op = Op1; - else if (COp1 && COp1->isNullValue()) + else if (isNullConstant(Op1)) Op = Op0; else return false; @@ -5886,8 +6176,9 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { } APInt Mask = cast(N0.getOperand(1))->getAPIntValue(); Mask = Mask.zext(VT.getSizeInBits()); - return DAG.getNode(ISD::AND, SDLoc(N), VT, - X, DAG.getConstant(Mask, VT)); + SDLoc DL(N); + return DAG.getNode(ISD::AND, DL, VT, + X, DAG.getConstant(Mask, DL, VT)); } // fold (zext (load x)) -> (zext (truncate (zextload x))) @@ -5947,14 +6238,15 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { LN0->getMemOperand()); APInt Mask = cast(N0.getOperand(1))->getAPIntValue(); Mask = Mask.zext(VT.getSizeInBits()); - SDValue And = DAG.getNode(N0.getOpcode(), SDLoc(N), VT, - ExtLoad, DAG.getConstant(Mask, VT)); + SDLoc DL(N); + SDValue And = DAG.getNode(N0.getOpcode(), DL, VT, + ExtLoad, DAG.getConstant(Mask, DL, VT)); SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0.getOperand(0)), N0.getOperand(0).getValueType(), ExtLoad); CombineTo(N, And); CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1)); - ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N), + ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL, ISD::ZERO_EXTEND); return SDValue(N, 0); // Return N so it doesn't get rechecked! } @@ -5992,19 +6284,20 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { // zext(setcc) -> (and (vsetcc), (1, 1, ...) for vectors. // Only do this before legalize for now. EVT EltVT = VT.getVectorElementType(); + SDLoc DL(N); SmallVector OneOps(VT.getVectorNumElements(), - DAG.getConstant(1, EltVT)); + DAG.getConstant(1, DL, EltVT)); if (VT.getSizeInBits() == N0VT.getSizeInBits()) // We know that the # elements of the results is the same as the // # elements of the compare (and the # elements of the compare result // for that matter). Check to see that they are the same size. If so, // we know that the element size of the sext'd result matches the // element size of the compare operands. - return DAG.getNode(ISD::AND, SDLoc(N), VT, - DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0), + return DAG.getNode(ISD::AND, DL, VT, + DAG.getSetCC(DL, VT, N0.getOperand(0), N0.getOperand(1), cast(N0.getOperand(2))->get()), - DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, + DAG.getNode(ISD::BUILD_VECTOR, DL, VT, OneOps)); // If the desired elements are smaller or larger than the source @@ -6017,18 +6310,19 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { EVT::getVectorVT(*DAG.getContext(), MatchingElementType, N0VT.getVectorNumElements()); SDValue VsetCC = - DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0), + DAG.getSetCC(DL, MatchingVectorType, N0.getOperand(0), N0.getOperand(1), cast(N0.getOperand(2))->get()); - return DAG.getNode(ISD::AND, SDLoc(N), VT, - DAG.getSExtOrTrunc(VsetCC, SDLoc(N), VT), - DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, OneOps)); + return DAG.getNode(ISD::AND, DL, VT, + DAG.getSExtOrTrunc(VsetCC, DL, VT), + DAG.getNode(ISD::BUILD_VECTOR, DL, VT, OneOps)); } // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc + SDLoc DL(N); SDValue SCC = - SimplifySelectCC(SDLoc(N), N0.getOperand(0), N0.getOperand(1), - DAG.getConstant(1, VT), DAG.getConstant(0, VT), + SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), + DAG.getConstant(1, DL, VT), DAG.getConstant(0, DL, VT), cast(N0.getOperand(2))->get(), true); if (SCC.getNode()) return SCC; } @@ -6120,8 +6414,9 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { } APInt Mask = cast(N0.getOperand(1))->getAPIntValue(); Mask = Mask.zext(VT.getSizeInBits()); - return DAG.getNode(ISD::AND, SDLoc(N), VT, - X, DAG.getConstant(Mask, VT)); + SDLoc DL(N); + return DAG.getNode(ISD::AND, DL, VT, + X, DAG.getConstant(Mask, DL, VT)); } // fold (aext (load x)) -> (aext (truncate (extload x))) @@ -6204,9 +6499,10 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { } // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc + SDLoc DL(N); SDValue SCC = - SimplifySelectCC(SDLoc(N), N0.getOperand(0), N0.getOperand(1), - DAG.getConstant(1, VT), DAG.getConstant(0, VT), + SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), + DAG.getConstant(1, DL, VT), DAG.getConstant(0, DL, VT), cast(N0.getOperand(2))->get(), true); if (SCC.getNode()) return SCC; @@ -6227,7 +6523,7 @@ SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) { const APInt &CVal = CV->getAPIntValue(); APInt NewVal = CVal & Mask; if (NewVal != CVal) - return DAG.getConstant(NewVal, V.getValueType()); + return DAG.getConstant(NewVal, SDLoc(V), V.getValueType()); break; } case ISD::OR: @@ -6242,7 +6538,7 @@ SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) { // Only look at single-use SRLs. if (!V.getNode()->hasOneUse()) break; - if (ConstantSDNode *RHSC = dyn_cast(V.getOperand(1))) { + if (ConstantSDNode *RHSC = getAsNonOpaqueConstant(V.getOperand(1))) { // See if we can recursively simplify the LHS. unsigned Amt = RHSC->getZExtValue(); @@ -6379,7 +6675,7 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { // For big endian targets, we need to adjust the offset to the pointer to // load the correct bytes. - if (TLI.isBigEndian()) { + if (DAG.getDataLayout().isBigEndian()) { unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits(); unsigned EVTStoreBits = ExtVT.getStoreSizeInBits(); ShAmt = LVTStoreBits - EVTStoreBits - ShAmt; @@ -6387,9 +6683,10 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { uint64_t PtrOff = ShAmt / 8; unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff); - SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(LN0), + SDLoc DL(LN0); + SDValue NewPtr = DAG.getNode(ISD::ADD, DL, PtrType, LN0->getBasePtr(), - DAG.getConstant(PtrOff, PtrType)); + DAG.getConstant(PtrOff, DL, PtrType)); AddToWorklist(NewPtr.getNode()); SDValue Load; @@ -6418,11 +6715,12 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { // no larger than the source) then the useful bits of the result are // zero; we can't simply return the shortened shift, because the result // of that operation is undefined. + SDLoc DL(N0); if (ShLeftAmt >= VT.getSizeInBits()) - Result = DAG.getConstant(0, VT); + Result = DAG.getConstant(0, DL, VT); else - Result = DAG.getNode(ISD::SHL, SDLoc(N0), VT, - Result, DAG.getConstant(ShLeftAmt, ShImmTy)); + Result = DAG.getNode(ISD::SHL, DL, VT, + Result, DAG.getConstant(ShLeftAmt, DL, ShImmTy)); } // Return the new loaded value. @@ -6549,7 +6847,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { ConstantSDNode *CurrentND = cast(Op); const APInt &C = APInt(VTBits, CurrentND->getAPIntValue().getZExtValue()); Elts.push_back(DAG.getConstant(C.shl(ShAmt).ashr(ShAmt).getZExtValue(), - Op.getValueType())); + SDLoc(Op), Op.getValueType())); } return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Elts); @@ -6558,10 +6856,24 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { return SDValue(); } +SDValue DAGCombiner::visitSIGN_EXTEND_VECTOR_INREG(SDNode *N) { + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + + if (N0.getOpcode() == ISD::UNDEF) + return DAG.getUNDEF(VT); + + if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes, + LegalOperations)) + return SDValue(Res, 0); + + return SDValue(); +} + SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); - bool isLE = TLI.isLittleEndian(); + bool isLE = DAG.getDataLayout().isLittleEndian(); // noop truncate if (N0.getValueType() == N->getValueType(0)) @@ -6614,15 +6926,16 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { SDValue EltNo = N0->getOperand(1); if (isa(EltNo) && isTypeLegal(NVT)) { int Elt = cast(EltNo)->getZExtValue(); - EVT IndexTy = TLI.getVectorIdxTy(); + EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout()); int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1)); SDValue V = DAG.getNode(ISD::BITCAST, SDLoc(N), NVT, N0.getOperand(0)); + SDLoc DL(N); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, - SDLoc(N), TrTy, V, - DAG.getConstant(Index, IndexTy)); + DL, TrTy, V, + DAG.getConstant(Index, DL, IndexTy)); } } @@ -6780,8 +7093,8 @@ SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) { !LD2->isVolatile() && DAG.isConsecutiveLoad(LD2, LD1, LD1VT.getSizeInBits()/8, 1)) { unsigned Align = LD1->getAlignment(); - unsigned NewAlign = TLI.getDataLayout()-> - getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext())); + unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment( + VT.getTypeForEVT(*DAG.getContext())); if (NewAlign <= Align && (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT))) @@ -6837,13 +7150,13 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { // Do not change the width of a volatile load. !cast(N0)->isVolatile() && // Do not remove the cast if the types differ in endian layout. - TLI.hasBigEndianPartOrdering(N0.getValueType()) == - TLI.hasBigEndianPartOrdering(VT) && + TLI.hasBigEndianPartOrdering(N0.getValueType(), DAG.getDataLayout()) == + TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()) && (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) && TLI.isLoadBitCastBeneficial(N0.getValueType(), VT)) { LoadSDNode *LN0 = cast(N0); - unsigned Align = TLI.getDataLayout()-> - getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext())); + unsigned Align = DAG.getDataLayout().getABITypeAlignment( + VT.getTypeForEVT(*DAG.getContext())); unsigned OrigAlign = LN0->getAlignment(); if (Align <= OrigAlign) { @@ -6868,13 +7181,14 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { N0.getOperand(0)); AddToWorklist(NewConv.getNode()); + SDLoc DL(N); APInt SignBit = APInt::getSignBit(VT.getSizeInBits()); if (N0.getOpcode() == ISD::FNEG) - return DAG.getNode(ISD::XOR, SDLoc(N), VT, - NewConv, DAG.getConstant(SignBit, VT)); + return DAG.getNode(ISD::XOR, DL, VT, + NewConv, DAG.getConstant(SignBit, DL, VT)); assert(N0.getOpcode() == ISD::FABS); - return DAG.getNode(ISD::AND, SDLoc(N), VT, - NewConv, DAG.getConstant(~SignBit, VT)); + return DAG.getNode(ISD::AND, DL, VT, + NewConv, DAG.getConstant(~SignBit, DL, VT)); } // fold (bitconvert (fcopysign cst, x)) -> @@ -6899,9 +7213,11 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { } else if (OrigXWidth > VTWidth) { // To get the sign bit in the right place, we have to shift it right // before truncating. - X = DAG.getNode(ISD::SRL, SDLoc(X), + SDLoc DL(X); + X = DAG.getNode(ISD::SRL, DL, X.getValueType(), X, - DAG.getConstant(OrigXWidth-VTWidth, X.getValueType())); + DAG.getConstant(OrigXWidth-VTWidth, DL, + X.getValueType())); AddToWorklist(X.getNode()); X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X); AddToWorklist(X.getNode()); @@ -6909,13 +7225,13 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { APInt SignBit = APInt::getSignBit(VT.getSizeInBits()); X = DAG.getNode(ISD::AND, SDLoc(X), VT, - X, DAG.getConstant(SignBit, VT)); + X, DAG.getConstant(SignBit, SDLoc(X), VT)); AddToWorklist(X.getNode()); SDValue Cst = DAG.getNode(ISD::BITCAST, SDLoc(N0), VT, N0.getOperand(0)); Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT, - Cst, DAG.getConstant(~SignBit, VT)); + Cst, DAG.getConstant(~SignBit, SDLoc(Cst), VT)); AddToWorklist(Cst.getNode()); return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst); @@ -6929,6 +7245,51 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { return CombineLD; } + // Remove double bitcasts from shuffles - this is often a legacy of + // XformToShuffleWithZero being used to combine bitmaskings (of + // float vectors bitcast to integer vectors) into shuffles. + // bitcast(shuffle(bitcast(s0),bitcast(s1))) -> shuffle(s0,s1) + if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT) && VT.isVector() && + N0->getOpcode() == ISD::VECTOR_SHUFFLE && + VT.getVectorNumElements() >= N0.getValueType().getVectorNumElements() && + !(VT.getVectorNumElements() % N0.getValueType().getVectorNumElements())) { + ShuffleVectorSDNode *SVN = cast(N0); + + // If operands are a bitcast, peek through if it casts the original VT. + // If operands are a UNDEF or constant, just bitcast back to original VT. + auto PeekThroughBitcast = [&](SDValue Op) { + if (Op.getOpcode() == ISD::BITCAST && + Op.getOperand(0)->getValueType(0) == VT) + return SDValue(Op.getOperand(0)); + if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) || + ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode())) + return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op); + return SDValue(); + }; + + SDValue SV0 = PeekThroughBitcast(N0->getOperand(0)); + SDValue SV1 = PeekThroughBitcast(N0->getOperand(1)); + if (!(SV0 && SV1)) + return SDValue(); + + int MaskScale = + VT.getVectorNumElements() / N0.getValueType().getVectorNumElements(); + SmallVector NewMask; + for (int M : SVN->getMask()) + for (int i = 0; i != MaskScale; ++i) + NewMask.push_back(M < 0 ? -1 : M * MaskScale + i); + + bool LegalMask = TLI.isShuffleMaskLegal(NewMask, VT); + if (!LegalMask) { + std::swap(SV0, SV1); + ShuffleVectorSDNode::commuteMask(NewMask); + LegalMask = TLI.isShuffleMaskLegal(NewMask, VT); + } + + if (LegalMask) + return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask); + } + return SDValue(); } @@ -6963,8 +7324,7 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { DstEltVT, BV->getOperand(0))); SmallVector Ops; - for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) { - SDValue Op = BV->getOperand(i); + for (SDValue Op : BV->op_values()) { // If the vector element type is not legal, the BUILD_VECTOR operands // are promoted and implicitly truncated. Make that explicit here. if (Op.getValueType() != SrcEltVT) @@ -6997,6 +7357,8 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT); } + SDLoc DL(BV); + // Okay, we know the src/dst types are both integers of differing types. // Handling growing first. assert(SrcEltVT.isInteger() && DstEltVT.isInteger()); @@ -7006,7 +7368,7 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { SmallVector Ops; for (unsigned i = 0, e = BV->getNumOperands(); i != e; i += NumInputsPerOutput) { - bool isLE = TLI.isLittleEndian(); + bool isLE = DAG.getDataLayout().isLittleEndian(); APInt NewBits = APInt(DstBitSize, 0); bool EltIsUndef = true; for (unsigned j = 0; j != NumInputsPerOutput; ++j) { @@ -7023,11 +7385,11 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { if (EltIsUndef) Ops.push_back(DAG.getUNDEF(DstEltVT)); else - Ops.push_back(DAG.getConstant(NewBits, DstEltVT)); + Ops.push_back(DAG.getConstant(NewBits, DL, DstEltVT)); } EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size()); - return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BV), VT, Ops); + return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Ops); } // Finally, this must be the case where we are shrinking elements: each input @@ -7037,27 +7399,27 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { NumOutputsPerInput*BV->getNumOperands()); SmallVector Ops; - for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) { - if (BV->getOperand(i).getOpcode() == ISD::UNDEF) { + for (const SDValue &Op : BV->op_values()) { + if (Op.getOpcode() == ISD::UNDEF) { Ops.append(NumOutputsPerInput, DAG.getUNDEF(DstEltVT)); continue; } - APInt OpVal = cast(BV->getOperand(i))-> + APInt OpVal = cast(Op)-> getAPIntValue().zextOrTrunc(SrcBitSize); for (unsigned j = 0; j != NumOutputsPerInput; ++j) { APInt ThisVal = OpVal.trunc(DstBitSize); - Ops.push_back(DAG.getConstant(ThisVal, DstEltVT)); + Ops.push_back(DAG.getConstant(ThisVal, DL, DstEltVT)); OpVal = OpVal.lshr(DstBitSize); } // For big endian targets, swap the order of the pieces of each element. - if (TLI.isBigEndian()) + if (DAG.getDataLayout().isBigEndian()) std::reverse(Ops.end()-NumOutputsPerInput, Ops.end()); } - return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BV), VT, Ops); + return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Ops); } /// Try to perform FMA combining on a given FADD node. @@ -7132,7 +7494,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { } // More folding opportunities when target permits. - if (UnsafeFPMath && Aggressive) { + if ((UnsafeFPMath || HasFMAD) && Aggressive) { // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z)) if (N0.getOpcode() == PreferredFusedOpcode && N0.getOperand(2).getOpcode() == ISD::FMUL) { @@ -7155,7 +7517,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { N0)); } - if (LookThroughFPExt) { + if (UnsafeFPMath && LookThroughFPExt) { // fold (fadd (fma x, y, (fpext (fmul u, v))), z) // -> (fma x, y, (fma (fpext u), (fpext v), z)) auto FoldFAddFMAFPExtFMul = [&] ( @@ -7372,7 +7734,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { } // More folding opportunities when target permits. - if (UnsafeFPMath && Aggressive) { + if ((UnsafeFPMath || HasFMAD) && Aggressive) { // fold (fsub (fma x, y, (fmul u, v)), z) // -> (fma x, y (fma u, v, (fneg z))) if (N0.getOpcode() == PreferredFusedOpcode && @@ -7398,10 +7760,11 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { N1.getOperand(1), DAG.getNode(PreferredFusedOpcode, SL, VT, DAG.getNode(ISD::FNEG, SL, VT, N20), + N21, N0)); } - if (LookThroughFPExt) { + if (UnsafeFPMath && LookThroughFPExt) { // fold (fsub (fma x, y, (fpext (fmul u, v))), z) // -> (fma x, y (fma (fpext u), (fpext v), (fneg z))) if (N0.getOpcode() == PreferredFusedOpcode) { @@ -7508,6 +7871,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { ConstantFPSDNode *N0CFP = dyn_cast(N0); ConstantFPSDNode *N1CFP = dyn_cast(N1); EVT VT = N->getValueType(0); + SDLoc DL(N); const TargetOptions &Options = DAG.getTarget().Options; // fold vector ops @@ -7517,22 +7881,22 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { // fold (fadd c1, c2) -> c1 + c2 if (N0CFP && N1CFP) - return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N1); + return DAG.getNode(ISD::FADD, DL, VT, N0, N1); // canonicalize constant to RHS if (N0CFP && !N1CFP) - return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N0); + return DAG.getNode(ISD::FADD, DL, VT, N1, N0); // fold (fadd A, (fneg B)) -> (fsub A, B) if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) && isNegatibleForFree(N1, LegalOperations, TLI, &Options) == 2) - return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0, + return DAG.getNode(ISD::FSUB, DL, VT, N0, GetNegatedExpression(N1, DAG, LegalOperations)); // fold (fadd (fneg A), B) -> (fsub B, A) if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) && isNegatibleForFree(N0, LegalOperations, TLI, &Options) == 2) - return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N1, + return DAG.getNode(ISD::FSUB, DL, VT, N1, GetNegatedExpression(N0, DAG, LegalOperations)); // If 'unsafe math' is enabled, fold lots of things. @@ -7542,23 +7906,22 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { bool AllowNewConst = (Level < AfterLegalizeDAG); // fold (fadd A, 0) -> A - if (N1CFP && N1CFP->getValueAPF().isZero()) + if (N1CFP && N1CFP->isZero()) return N0; // fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2)) if (N1CFP && N0.getOpcode() == ISD::FADD && N0.getNode()->hasOneUse() && isa(N0.getOperand(1))) - return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0.getOperand(0), - DAG.getNode(ISD::FADD, SDLoc(N), VT, - N0.getOperand(1), N1)); + return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0), + DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1)); // If allowed, fold (fadd (fneg x), x) -> 0.0 if (AllowNewConst && N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1) - return DAG.getConstantFP(0.0, VT); + return DAG.getConstantFP(0.0, DL, VT); // If allowed, fold (fadd x, (fneg x)) -> 0.0 if (AllowNewConst && N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0) - return DAG.getConstantFP(0.0, VT); + return DAG.getConstantFP(0.0, DL, VT); // We can fold chains of FADD's of the same value into multiplications. // This transform is not safe in general because we are reducing the number @@ -7570,21 +7933,18 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { // (fadd (fmul x, c), x) -> (fmul x, c+1) if (CFP01 && !CFP00 && N0.getOperand(0) == N1) { - SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT, - SDValue(CFP01, 0), - DAG.getConstantFP(1.0, VT)); - return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N1, NewCFP); + SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, SDValue(CFP01, 0), + DAG.getConstantFP(1.0, DL, VT)); + return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP); } // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2) if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD && N1.getOperand(0) == N1.getOperand(1) && N0.getOperand(0) == N1.getOperand(0)) { - SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT, - SDValue(CFP01, 0), - DAG.getConstantFP(2.0, VT)); - return DAG.getNode(ISD::FMUL, SDLoc(N), VT, - N0.getOperand(0), NewCFP); + SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, SDValue(CFP01, 0), + DAG.getConstantFP(2.0, DL, VT)); + return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP); } } @@ -7594,20 +7954,18 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { // (fadd x, (fmul x, c)) -> (fmul x, c+1) if (CFP11 && !CFP10 && N1.getOperand(0) == N0) { - SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT, - SDValue(CFP11, 0), - DAG.getConstantFP(1.0, VT)); - return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0, NewCFP); + SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, SDValue(CFP11, 0), + DAG.getConstantFP(1.0, DL, VT)); + return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP); } // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2) if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD && N0.getOperand(0) == N0.getOperand(1) && N1.getOperand(0) == N0.getOperand(0)) { - SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT, - SDValue(CFP11, 0), - DAG.getConstantFP(2.0, VT)); - return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N1.getOperand(0), NewCFP); + SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, SDValue(CFP11, 0), + DAG.getConstantFP(2.0, DL, VT)); + return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP); } } @@ -7615,18 +7973,20 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { ConstantFPSDNode *CFP = dyn_cast(N0.getOperand(0)); // (fadd (fadd x, x), x) -> (fmul x, 3.0) if (!CFP && N0.getOperand(0) == N0.getOperand(1) && - (N0.getOperand(0) == N1)) - return DAG.getNode(ISD::FMUL, SDLoc(N), VT, - N1, DAG.getConstantFP(3.0, VT)); + (N0.getOperand(0) == N1)) { + return DAG.getNode(ISD::FMUL, DL, VT, + N1, DAG.getConstantFP(3.0, DL, VT)); + } } if (N1.getOpcode() == ISD::FADD && AllowNewConst) { ConstantFPSDNode *CFP10 = dyn_cast(N1.getOperand(0)); // (fadd x, (fadd x, x)) -> (fmul x, 3.0) if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) && - N1.getOperand(0) == N0) - return DAG.getNode(ISD::FMUL, SDLoc(N), VT, - N0, DAG.getConstantFP(3.0, VT)); + N1.getOperand(0) == N0) { + return DAG.getNode(ISD::FMUL, DL, VT, + N0, DAG.getConstantFP(3.0, DL, VT)); + } } // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0) @@ -7634,9 +7994,10 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD && N0.getOperand(0) == N0.getOperand(1) && N1.getOperand(0) == N1.getOperand(1) && - N0.getOperand(0) == N1.getOperand(0)) - return DAG.getNode(ISD::FMUL, SDLoc(N), VT, - N0.getOperand(0), DAG.getConstantFP(4.0, VT)); + N0.getOperand(0) == N1.getOperand(0)) { + return DAG.getNode(ISD::FMUL, DL, VT, + N0.getOperand(0), DAG.getConstantFP(4.0, DL, VT)); + } } } // enable-unsafe-fp-math @@ -7666,7 +8027,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { // fold (fsub c1, c2) -> c1-c2 if (N0CFP && N1CFP) - return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0, N1); + return DAG.getNode(ISD::FSUB, dl, VT, N0, N1); // fold (fsub A, (fneg B)) -> (fadd A, B) if (isNegatibleForFree(N1, LegalOperations, TLI, &Options)) @@ -7676,11 +8037,11 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { // If 'unsafe math' is enabled, fold lots of things. if (Options.UnsafeFPMath) { // (fsub A, 0) -> A - if (N1CFP && N1CFP->getValueAPF().isZero()) + if (N1CFP && N1CFP->isZero()) return N0; // (fsub 0, B) -> -B - if (N0CFP && N0CFP->getValueAPF().isZero()) { + if (N0CFP && N0CFP->isZero()) { if (isNegatibleForFree(N1, LegalOperations, TLI, &Options)) return GetNegatedExpression(N1, DAG, LegalOperations); if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) @@ -7689,7 +8050,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { // (fsub x, x) -> 0.0 if (N0 == N1) - return DAG.getConstantFP(0.0f, VT); + return DAG.getConstantFP(0.0f, dl, VT); // (fsub x, (fadd x, y)) -> (fneg y) // (fsub x, (fadd y, x)) -> (fneg y) @@ -7721,6 +8082,7 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0); ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1); EVT VT = N->getValueType(0); + SDLoc DL(N); const TargetOptions &Options = DAG.getTarget().Options; // fold vector ops @@ -7732,12 +8094,12 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { // fold (fmul c1, c2) -> c1*c2 if (N0CFP && N1CFP) - return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0, N1); + return DAG.getNode(ISD::FMUL, DL, VT, N0, N1); // canonicalize constant to RHS if (isConstantFPBuildVectorOrConstantFP(N0) && !isConstantFPBuildVectorOrConstantFP(N1)) - return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N1, N0); + return DAG.getNode(ISD::FMUL, DL, VT, N1, N0); // fold (fmul A, 1.0) -> A if (N1CFP && N1CFP->isExactlyValue(1.0)) @@ -7745,7 +8107,7 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { if (Options.UnsafeFPMath) { // fold (fmul A, 0) -> 0 - if (N1CFP && N1CFP->getValueAPF().isZero()) + if (N1CFP && N1CFP->isZero()) return N1; // fold (fmul (fmul x, c1), c2) -> (fmul x, (fmul c1, c2)) @@ -7758,7 +8120,7 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { auto *BV1 = dyn_cast(N1); auto *BV00 = dyn_cast(N00); auto *BV01 = dyn_cast(N01); - + // Check 1: Make sure that the first operand of the inner multiply is NOT // a constant. Otherwise, we may induce infinite looping. if (!(isConstOrConstSplatFP(N00) || (BV00 && BV00->isConstant()))) { @@ -7766,9 +8128,8 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { // the second operand of the outer multiply are constants. if ((N1CFP && isConstOrConstSplatFP(N01)) || (BV1 && BV01 && BV1->isConstant() && BV01->isConstant())) { - SDLoc SL(N); - SDValue MulConsts = DAG.getNode(ISD::FMUL, SL, VT, N01, N1); - return DAG.getNode(ISD::FMUL, SL, VT, N00, MulConsts); + SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1); + return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts); } } } @@ -7778,21 +8139,20 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { // during an early run of DAGCombiner can prevent folding with fmuls // inserted during lowering. if (N0.getOpcode() == ISD::FADD && N0.getOperand(0) == N0.getOperand(1)) { - SDLoc SL(N); - const SDValue Two = DAG.getConstantFP(2.0, VT); - SDValue MulConsts = DAG.getNode(ISD::FMUL, SL, VT, Two, N1); - return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0.getOperand(0), MulConsts); + const SDValue Two = DAG.getConstantFP(2.0, DL, VT); + SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1); + return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts); } } // fold (fmul X, 2.0) -> (fadd X, X) if (N1CFP && N1CFP->isExactlyValue(+2.0)) - return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N0); + return DAG.getNode(ISD::FADD, DL, VT, N0, N0); // fold (fmul X, -1.0) -> (fneg X) if (N1CFP && N1CFP->isExactlyValue(-1.0)) if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) - return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0); + return DAG.getNode(ISD::FNEG, DL, VT, N0); // fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y) if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options)) { @@ -7800,7 +8160,7 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { // Both can be negated for free, check to see if at least one is cheaper // negated. if (LHSNeg == 2 || RHSNeg == 2) - return DAG.getNode(ISD::FMUL, SDLoc(N), VT, + return DAG.getNode(ISD::FMUL, DL, VT, GetNegatedExpression(N0, DAG, LegalOperations), GetNegatedExpression(N1, DAG, LegalOperations)); } @@ -7879,14 +8239,14 @@ SDValue DAGCombiner::visitFMA(SDNode *N) { if (Options.UnsafeFPMath && N1CFP && N0 == N2) return DAG.getNode(ISD::FMUL, dl, VT, N0, DAG.getNode(ISD::FADD, dl, VT, - N1, DAG.getConstantFP(1.0, VT))); + N1, DAG.getConstantFP(1.0, dl, VT))); // (fma x, c, (fneg x)) -> (fmul x, (c-1)) if (Options.UnsafeFPMath && N1CFP && N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) return DAG.getNode(ISD::FMUL, dl, VT, N0, DAG.getNode(ISD::FADD, dl, VT, - N1, DAG.getConstantFP(-1.0, VT))); + N1, DAG.getConstantFP(-1.0, dl, VT))); return SDValue(); @@ -7926,8 +8286,8 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { // TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT) || TLI.isOperationLegal(llvm::ISD::ConstantFP, VT) || TLI.isFPImmLegal(Recip, VT))) - return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0, - DAG.getConstantFP(Recip, VT)); + return DAG.getNode(ISD::FMUL, DL, VT, N0, + DAG.getConstantFP(Recip, DL, VT)); } // If this FDIV is part of a reciprocal square root, it may be folded @@ -8006,24 +8366,22 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { SmallVector Users; // Find all FDIV users of the same divisor. - for (SDNode::use_iterator UI = N1.getNode()->use_begin(), - UE = N1.getNode()->use_end(); - UI != UE; ++UI) { - SDNode *User = UI.getUse().getUser(); - if (User->getOpcode() == ISD::FDIV && User->getOperand(1) == N1) - Users.push_back(User); + for (auto *U : N1->uses()) { + if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) + Users.push_back(U); } if (TLI.combineRepeatedFPDivisors(Users.size())) { - SDValue FPOne = DAG.getConstantFP(1.0, VT); // floating point 1.0 - SDValue Reciprocal = DAG.getNode(ISD::FDIV, SDLoc(N), VT, FPOne, N1); + SDValue FPOne = DAG.getConstantFP(1.0, DL, VT); + SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1); // Dividend / Divisor -> Dividend * Reciprocal - for (auto I = Users.begin(), E = Users.end(); I != E; ++I) { - if ((*I)->getOperand(0) != FPOne) { - SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(*I), VT, - (*I)->getOperand(0), Reciprocal); - DAG.ReplaceAllUsesWith(*I, NewNode.getNode()); + for (auto *U : Users) { + SDValue Dividend = U->getOperand(0); + if (Dividend != FPOne) { + SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend, + Reciprocal); + DAG.ReplaceAllUsesWith(U, NewNode.getNode()); } } return SDValue(); @@ -8048,29 +8406,29 @@ SDValue DAGCombiner::visitFREM(SDNode *N) { } SDValue DAGCombiner::visitFSQRT(SDNode *N) { - if (DAG.getTarget().Options.UnsafeFPMath && - !TLI.isFsqrtCheap()) { - // Compute this as X * (1/sqrt(X)) = X * (X ** -0.5) - if (SDValue RV = BuildRsqrtEstimate(N->getOperand(0))) { - EVT VT = RV.getValueType(); - RV = DAG.getNode(ISD::FMUL, SDLoc(N), VT, N->getOperand(0), RV); - AddToWorklist(RV.getNode()); + if (!DAG.getTarget().Options.UnsafeFPMath || TLI.isFsqrtCheap()) + return SDValue(); - // Unfortunately, RV is now NaN if the input was exactly 0. - // Select out this case and force the answer to 0. - SDValue Zero = DAG.getConstantFP(0.0, VT); - SDValue ZeroCmp = - DAG.getSetCC(SDLoc(N), TLI.getSetCCResultType(*DAG.getContext(), VT), - N->getOperand(0), Zero, ISD::SETEQ); - AddToWorklist(ZeroCmp.getNode()); - AddToWorklist(RV.getNode()); + // Compute this as X * (1/sqrt(X)) = X * (X ** -0.5) + SDValue RV = BuildRsqrtEstimate(N->getOperand(0)); + if (!RV) + return SDValue(); + + EVT VT = RV.getValueType(); + SDLoc DL(N); + RV = DAG.getNode(ISD::FMUL, DL, VT, N->getOperand(0), RV); + AddToWorklist(RV.getNode()); - RV = DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT, - SDLoc(N), VT, ZeroCmp, Zero, RV); - return RV; - } - } - return SDValue(); + // Unfortunately, RV is now NaN if the input was exactly 0. + // Select out this case and force the answer to 0. + SDValue Zero = DAG.getConstantFP(0.0, DL, VT); + EVT CCVT = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); + SDValue ZeroCmp = DAG.getSetCC(DL, CCVT, N->getOperand(0), Zero, ISD::SETEQ); + AddToWorklist(ZeroCmp.getNode()); + AddToWorklist(RV.getNode()); + + return DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT, DL, VT, + ZeroCmp, Zero, RV); } SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) { @@ -8151,11 +8509,12 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) { !VT.isVector() && (!LegalOperations || TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) { + SDLoc DL(N); SDValue Ops[] = { N0.getOperand(0), N0.getOperand(1), - DAG.getConstantFP(-1.0, VT) , DAG.getConstantFP(0.0, VT), + DAG.getConstantFP(-1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT), N0.getOperand(2) }; - return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, Ops); + return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops); } // fold (sint_to_fp (zext (setcc x, y, cc))) -> @@ -8164,11 +8523,12 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) { N0.getOperand(0).getOpcode() == ISD::SETCC &&!VT.isVector() && (!LegalOperations || TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) { + SDLoc DL(N); SDValue Ops[] = { N0.getOperand(0).getOperand(0), N0.getOperand(0).getOperand(1), - DAG.getConstantFP(1.0, VT) , DAG.getConstantFP(0.0, VT), + DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT), N0.getOperand(0).getOperand(2) }; - return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, Ops); + return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops); } } @@ -8203,11 +8563,12 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) { if (N0.getOpcode() == ISD::SETCC && !VT.isVector() && (!LegalOperations || TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) { + SDLoc DL(N); SDValue Ops[] = { N0.getOperand(0), N0.getOperand(1), - DAG.getConstantFP(1.0, VT), DAG.getConstantFP(0.0, VT), + DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT), N0.getOperand(2) }; - return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, Ops); + return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops); } } @@ -8260,11 +8621,10 @@ static SDValue FoldIntToFPToInt(SDNode *N, SelectionDAG &DAG) { SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) { SDValue N0 = N->getOperand(0); - ConstantFPSDNode *N0CFP = dyn_cast(N0); EVT VT = N->getValueType(0); // fold (fp_to_sint c1fp) -> c1 - if (N0CFP) + if (isConstantFPBuildVectorOrConstantFP(N0)) return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0); return FoldIntToFPToInt(N, DAG); @@ -8272,11 +8632,10 @@ SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) { SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) { SDValue N0 = N->getOperand(0); - ConstantFPSDNode *N0CFP = dyn_cast(N0); EVT VT = N->getValueType(0); // fold (fp_to_uint c1fp) -> c1 - if (N0CFP) + if (isConstantFPBuildVectorOrConstantFP(N0)) return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0); return FoldIntToFPToInt(N, DAG); @@ -8305,9 +8664,11 @@ SDValue DAGCombiner::visitFP_ROUND(SDNode *N) { // single-step fp_round we want to fold to. // In other words, double rounding isn't the same as rounding. // Also, this is a value preserving truncation iff both fp_round's are. - if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc) - return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0.getOperand(0), - DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc)); + if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc) { + SDLoc DL(N); + return DAG.getNode(ISD::FP_ROUND, DL, VT, N0.getOperand(0), + DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL)); + } } // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y) @@ -8330,8 +8691,9 @@ SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) { // fold (fp_round_inreg c1fp) -> c1fp if (N0CFP && isTypeLegal(EVT)) { - SDValue Round = DAG.getConstantFP(*N0CFP->getConstantFPValue(), EVT); - return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, Round); + SDLoc DL(N); + SDValue Round = DAG.getConstantFP(*N0CFP->getConstantFPValue(), DL, EVT); + return DAG.getNode(ISD::FP_EXTEND, DL, VT, Round); } return SDValue(); @@ -8350,6 +8712,11 @@ SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) { if (isConstantFPBuildVectorOrConstantFP(N0)) return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, N0); + // fold (fp_extend (fp16_to_fp op)) -> (fp16_to_fp op) + if (N0.getOpcode() == ISD::FP16_TO_FP && + TLI.getOperationAction(ISD::FP16_TO_FP, VT) == TargetLowering::Legal) + return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), VT, N0.getOperand(0)); + // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the // value of X. if (N0.getOpcode() == ISD::FP_ROUND @@ -8373,7 +8740,8 @@ SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) { CombineTo(N, ExtLoad); CombineTo(N0.getNode(), DAG.getNode(ISD::FP_ROUND, SDLoc(N0), - N0.getValueType(), ExtLoad, DAG.getIntPtrConstant(1)), + N0.getValueType(), ExtLoad, + DAG.getIntPtrConstant(1, SDLoc(N0))), ExtLoad.getValue(1)); return SDValue(N, 0); // Return N so it doesn't get rechecked! } @@ -8445,15 +8813,17 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) { // For a scalar, just generate 0x80... SignMask = APInt::getSignBit(IntVT.getSizeInBits()); } - Int = DAG.getNode(ISD::XOR, SDLoc(N0), IntVT, Int, - DAG.getConstant(SignMask, IntVT)); + SDLoc DL0(N0); + Int = DAG.getNode(ISD::XOR, DL0, IntVT, Int, + DAG.getConstant(SignMask, DL0, IntVT)); AddToWorklist(Int.getNode()); return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Int); } } // (fneg (fmul c, x)) -> (fmul -c, x) - if (N0.getOpcode() == ISD::FMUL) { + if (N0.getOpcode() == ISD::FMUL && + (N0.getNode()->hasOneUse() || !TLI.isFNegFree(VT))) { ConstantFPSDNode *CFP1 = dyn_cast(N0.getOperand(1)); if (CFP1) { APFloat CVal = CFP1->getValueAPF(); @@ -8479,7 +8849,7 @@ SDValue DAGCombiner::visitFMINNUM(SDNode *N) { if (N0CFP && N1CFP) { const APFloat &C0 = N0CFP->getValueAPF(); const APFloat &C1 = N1CFP->getValueAPF(); - return DAG.getConstantFP(minnum(C0, C1), N->getValueType(0)); + return DAG.getConstantFP(minnum(C0, C1), SDLoc(N), N->getValueType(0)); } if (N0CFP) { @@ -8500,7 +8870,7 @@ SDValue DAGCombiner::visitFMAXNUM(SDNode *N) { if (N0CFP && N1CFP) { const APFloat &C0 = N0CFP->getValueAPF(); const APFloat &C1 = N1CFP->getValueAPF(); - return DAG.getConstantFP(maxnum(C0, C1), N->getValueType(0)); + return DAG.getConstantFP(maxnum(C0, C1), SDLoc(N), N->getValueType(0)); } if (N0CFP) { @@ -8547,8 +8917,9 @@ SDValue DAGCombiner::visitFABS(SDNode *N) { // For a scalar, just generate 0x7f... SignMask = ~APInt::getSignBit(IntVT.getSizeInBits()); } - Int = DAG.getNode(ISD::AND, SDLoc(N0), IntVT, Int, - DAG.getConstant(SignMask, IntVT)); + SDLoc DL(N0); + Int = DAG.getNode(ISD::AND, DL, IntVT, Int, + DAG.getConstant(SignMask, DL, IntVT)); AddToWorklist(Int.getNode()); return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), Int); } @@ -8618,13 +8989,14 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) { if (AndConst.isPowerOf2() && cast(Op1)->getAPIntValue()==AndConst.logBase2()) { + SDLoc DL(N); SDValue SetCC = - DAG.getSetCC(SDLoc(N), + DAG.getSetCC(DL, getSetCCResultType(Op0.getValueType()), - Op0, DAG.getConstant(0, Op0.getValueType()), + Op0, DAG.getConstant(0, DL, Op0.getValueType()), ISD::SETNE); - SDValue NewBRCond = DAG.getNode(ISD::BRCOND, SDLoc(N), + SDValue NewBRCond = DAG.getNode(ISD::BRCOND, DL, MVT::Other, Chain, SetCC, N2); // Don't add the new BRCond into the worklist or else SimplifySelectCC // will convert it back to (X & C1) >> C2. @@ -8677,12 +9049,11 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) { if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) { bool Equal = false; - if (ConstantSDNode *RHSCI = dyn_cast(Op0)) - if (RHSCI->getAPIntValue() == 1 && Op0.hasOneUse() && - Op0.getOpcode() == ISD::XOR) { - TheXor = Op0.getNode(); - Equal = true; - } + if (isOneConstant(Op0) && Op0.hasOneUse() && + Op0.getOpcode() == ISD::XOR) { + TheXor = Op0.getNode(); + Equal = true; + } EVT SetCCVT = N1.getValueType(); if (LegalTypes) @@ -8737,14 +9108,18 @@ static bool canFoldInAddressingMode(SDNode *N, SDNode *Use, SelectionDAG &DAG, const TargetLowering &TLI) { EVT VT; + unsigned AS; + if (LoadSDNode *LD = dyn_cast(Use)) { if (LD->isIndexed() || LD->getBasePtr().getNode() != N) return false; - VT = Use->getValueType(0); + VT = LD->getMemoryVT(); + AS = LD->getAddressSpace(); } else if (StoreSDNode *ST = dyn_cast(Use)) { if (ST->isIndexed() || ST->getBasePtr().getNode() != N) return false; - VT = ST->getValue().getValueType(); + VT = ST->getMemoryVT(); + AS = ST->getAddressSpace(); } else return false; @@ -8768,7 +9143,8 @@ static bool canFoldInAddressingMode(SDNode *N, SDNode *Use, } else return false; - return TLI.isLegalAddressingMode(AM, VT.getTypeForEVT(*DAG.getContext())); + return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, + VT.getTypeForEVT(*DAG.getContext()), AS); } /// Try turning a load/store into a pre-indexed load/store when the base @@ -8827,8 +9203,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { } // Don't create a indexed load / store with zero offset. - if (isa(Offset) && - cast(Offset)->isNullValue()) + if (isNullConstant(Offset)) return false; // Try turning it into a pre-indexed load / store except when: @@ -8856,24 +9231,25 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { // a copy of the original base pointer. SmallVector OtherUses; if (isa(Offset)) - for (SDNode *Use : BasePtr.getNode()->uses()) { - if (Use == Ptr.getNode()) + for (SDNode::use_iterator UI = BasePtr.getNode()->use_begin(), + UE = BasePtr.getNode()->use_end(); + UI != UE; ++UI) { + SDUse &Use = UI.getUse(); + // Skip the use that is Ptr and uses of other results from BasePtr's + // node (important for nodes that return multiple results). + if (Use.getUser() == Ptr.getNode() || Use != BasePtr) continue; - if (Use->isPredecessorOf(N)) + if (Use.getUser()->isPredecessorOf(N)) continue; - if (Use->getOpcode() != ISD::ADD && Use->getOpcode() != ISD::SUB) { + if (Use.getUser()->getOpcode() != ISD::ADD && + Use.getUser()->getOpcode() != ISD::SUB) { OtherUses.clear(); break; } - SDValue Op0 = Use->getOperand(0), Op1 = Use->getOperand(1); - if (Op1.getNode() == BasePtr.getNode()) - std::swap(Op0, Op1); - assert(Op0.getNode() == BasePtr.getNode() && - "Use of ADD/SUB but not an operand"); - + SDValue Op1 = Use.getUser()->getOperand((UI.getOperandNo() + 1) & 1); if (!isa(Op1)) { OtherUses.clear(); break; @@ -8885,7 +9261,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { break; } - OtherUses.push_back(Use); + OtherUses.push_back(Use.getUser()); } if (Swapped) @@ -8978,12 +9354,14 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1; else CNV = CNV - Offset1; + SDLoc DL(OtherUses[i]); + // We can now generate the new expression. - SDValue NewOp1 = DAG.getConstant(CNV, CN->getValueType(0)); + SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0)); SDValue NewOp2 = Result.getValue(isLoad ? 1 : 0); SDValue NewUse = DAG.getNode(Opcode, - SDLoc(OtherUses[i]), + DL, OtherUses[i]->getValueType(0), NewOp1, NewOp2); DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse); deleteAndRecombine(OtherUses[i]); @@ -9041,8 +9419,7 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { ISD::MemIndexedMode AM = ISD::UNINDEXED; if (TLI.getPostIndexedAddressParts(N, Op, BasePtr, Offset, AM, DAG)) { // Don't create a indexed load / store with zero offset. - if (isa(Offset) && - cast(Offset)->isNullValue()) + if (isNullConstant(Offset)) continue; // Try turning it into a post-indexed load / store except when @@ -9132,7 +9509,7 @@ SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) { "Cannot split out indexing using opaque target constants"); if (Inc.getOpcode() == ISD::TargetConstant) { ConstantSDNode *ConstInc = cast(Inc); - Inc = DAG.getConstant(*ConstInc->getConstantIntValue(), + Inc = DAG.getConstant(*ConstInc->getConstantIntValue(), SDLoc(Inc), ConstInc->getValueType(0)); } @@ -9492,8 +9869,7 @@ struct LoadedSlice { /// \pre DAG != nullptr. uint64_t getOffsetFromBase() const { assert(DAG && "Missing context."); - bool IsBigEndian = - DAG->getTargetLoweringInfo().getDataLayout()->isBigEndian(); + bool IsBigEndian = DAG->getDataLayout().isBigEndian(); assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported."); uint64_t Offset = Shift / 8; unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8; @@ -9525,8 +9901,9 @@ struct LoadedSlice { if (Offset) { // BaseAddr = BaseAddr + Offset. EVT ArithType = BaseAddr.getValueType(); - BaseAddr = DAG->getNode(ISD::ADD, SDLoc(Origin), ArithType, BaseAddr, - DAG->getConstant(Offset, ArithType)); + SDLoc DL(Origin); + BaseAddr = DAG->getNode(ISD::ADD, DL, ArithType, BaseAddr, + DAG->getConstant(Offset, DL, ArithType)); } // Create the type of the loaded slice according to its size. @@ -9575,7 +9952,7 @@ struct LoadedSlice { // Check if it will be merged with the load. // 1. Check the alignment constraint. - unsigned RequiredAlignment = TLI.getDataLayout()->getABITypeAlignment( + unsigned RequiredAlignment = DAG->getDataLayout().getABITypeAlignment( ResVT.getTypeForEVT(*DAG->getContext())); if (RequiredAlignment > getAlignment()) @@ -9856,8 +10233,8 @@ CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) { return Result; // Fail. else { bool isOk = false; - for (unsigned i = 0, e = Chain->getNumOperands(); i != e; ++i) - if (Chain->getOperand(i).getNode() == LD) { + for (const SDValue &ChainOp : Chain->op_values()) + if (ChainOp.getNode() == LD) { isOk = true; break; } @@ -9932,24 +10309,27 @@ ShrinkLoadReplaceStoreWithStore(const std::pair &MaskInfo, // Okay, we can do this! Replace the 'St' store with a store of IVal that is // shifted by ByteShift and truncated down to NumBytes. - if (ByteShift) - IVal = DAG.getNode(ISD::SRL, SDLoc(IVal), IVal.getValueType(), IVal, - DAG.getConstant(ByteShift*8, + if (ByteShift) { + SDLoc DL(IVal); + IVal = DAG.getNode(ISD::SRL, DL, IVal.getValueType(), IVal, + DAG.getConstant(ByteShift*8, DL, DC->getShiftAmountTy(IVal.getValueType()))); + } // Figure out the offset for the store and the alignment of the access. unsigned StOffset; unsigned NewAlign = St->getAlignment(); - if (DAG.getTargetLoweringInfo().isLittleEndian()) + if (DAG.getDataLayout().isLittleEndian()) StOffset = ByteShift; else StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes; SDValue Ptr = St->getBasePtr(); if (StOffset) { - Ptr = DAG.getNode(ISD::ADD, SDLoc(IVal), Ptr.getValueType(), - Ptr, DAG.getConstant(StOffset, Ptr.getValueType())); + SDLoc DL(IVal); + Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), + Ptr, DAG.getConstant(StOffset, DL, Ptr.getValueType())); NewAlign = MinAlign(NewAlign, StOffset); } @@ -10053,17 +10433,18 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { uint64_t PtrOff = ShAmt / 8; // For big endian targets, we need to adjust the offset to the pointer to // load the correct bytes. - if (TLI.isBigEndian()) + if (DAG.getDataLayout().isBigEndian()) PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff; unsigned NewAlign = MinAlign(LD->getAlignment(), PtrOff); Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext()); - if (NewAlign < TLI.getDataLayout()->getABITypeAlignment(NewVTTy)) + if (NewAlign < DAG.getDataLayout().getABITypeAlignment(NewVTTy)) return SDValue(); SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(LD), Ptr.getValueType(), Ptr, - DAG.getConstant(PtrOff, Ptr.getValueType())); + DAG.getConstant(PtrOff, SDLoc(LD), + Ptr.getValueType())); SDValue NewLD = DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr, LD->getPointerInfo().getWithOffset(PtrOff), @@ -10071,7 +10452,8 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { LD->isInvariant(), NewAlign, LD->getAAInfo()); SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD, - DAG.getConstant(NewImm, NewVT)); + DAG.getConstant(NewImm, SDLoc(Value), + NewVT)); SDValue NewST = DAG.getStore(Chain, SDLoc(N), NewVal, NewPtr, ST->getPointerInfo().getWithOffset(PtrOff), @@ -10120,7 +10502,7 @@ SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) { unsigned LDAlign = LD->getAlignment(); unsigned STAlign = ST->getAlignment(); Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext()); - unsigned ABIAlign = TLI.getDataLayout()->getABITypeAlignment(IntVTTy); + unsigned ABIAlign = DAG.getDataLayout().getABITypeAlignment(IntVTTy); if (LDAlign < ABIAlign || STAlign < ABIAlign) return SDValue(); @@ -10235,6 +10617,18 @@ struct BaseIndexOffset { }; } // namespace +SDValue DAGCombiner::getMergedConstantVectorStore(SelectionDAG &DAG, + SDLoc SL, + ArrayRef Stores, + EVT Ty) const { + SmallVector BuildVector; + + for (unsigned I = 0, E = Ty.getVectorNumElements(); I != E; ++I) + BuildVector.push_back(cast(Stores[I].MemNode)->getValue()); + + return DAG.getNode(ISD::BUILD_VECTOR, SL, Ty, BuildVector); +} + bool DAGCombiner::MergeStoresOfConstantsOrVecElts( SmallVectorImpl &StoreNodes, EVT MemVT, unsigned NumElem, bool IsConstantSrc, bool UseVector) { @@ -10265,12 +10659,7 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts( EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, NumElem); assert(TLI.isTypeLegal(Ty) && "Illegal vector store"); if (IsConstantSrc) { - // A vector store with a constant source implies that the constant is - // zero; we only handle merging stores of constant zeros because the zero - // can be materialized without a load. - // It may be beneficial to loosen this restriction to allow non-zero - // store merging. - StoredVal = DAG.getConstant(0, Ty); + StoredVal = getMergedConstantVectorStore(DAG, DL, StoreNodes, Ty); } else { SmallVector Ops; for (unsigned i = 0; i < NumElem ; ++i) { @@ -10290,29 +10679,29 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts( // elements, so this path implies a store of constants. assert(IsConstantSrc && "Merged vector elements should use vector store"); - unsigned StoreBW = NumElem * ElementSizeBytes * 8; - APInt StoreInt(StoreBW, 0); + unsigned SizeInBits = NumElem * ElementSizeBytes * 8; + APInt StoreInt(SizeInBits, 0); // Construct a single integer constant which is made of the smaller // constant inputs. - bool IsLE = TLI.isLittleEndian(); + bool IsLE = DAG.getDataLayout().isLittleEndian(); for (unsigned i = 0; i < NumElem ; ++i) { unsigned Idx = IsLE ? (NumElem - 1 - i) : i; StoreSDNode *St = cast(StoreNodes[Idx].MemNode); SDValue Val = St->getValue(); - StoreInt <<= ElementSizeBytes*8; + StoreInt <<= ElementSizeBytes * 8; if (ConstantSDNode *C = dyn_cast(Val)) { - StoreInt |= C->getAPIntValue().zext(StoreBW); + StoreInt |= C->getAPIntValue().zext(SizeInBits); } else if (ConstantFPSDNode *C = dyn_cast(Val)) { - StoreInt |= C->getValueAPF().bitcastToAPInt().zext(StoreBW); + StoreInt |= C->getValueAPF().bitcastToAPInt().zext(SizeInBits); } else { llvm_unreachable("Invalid constant element type"); } } // Create the new Load and Store operations. - EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW); - StoredVal = DAG.getConstant(StoreInt, StoreTy); + EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits); + StoredVal = DAG.getConstant(StoreInt, DL, StoreTy); } SDValue NewStore = DAG.getStore(LatestOp->getChain(), DL, StoredVal, @@ -10346,58 +10735,36 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts( return true; } -bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { - if (OptLevel == CodeGenOpt::None) - return false; - - EVT MemVT = St->getMemoryVT(); - int64_t ElementSizeBytes = MemVT.getSizeInBits()/8; - bool NoVectors = DAG.getMachineFunction().getFunction()->hasFnAttribute( - Attribute::NoImplicitFloat); - - // Don't merge vectors into wider inputs. - if (MemVT.isVector() || !MemVT.isSimple()) - return false; - - // Perform an early exit check. Do not bother looking at stored values that - // are not constants, loads, or extracted vector elements. - SDValue StoredVal = St->getValue(); - bool IsLoadSrc = isa(StoredVal); - bool IsConstantSrc = isa(StoredVal) || - isa(StoredVal); - bool IsExtractVecEltSrc = (StoredVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT); - - if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecEltSrc) - return false; +static bool allowableAlignment(const SelectionDAG &DAG, + const TargetLowering &TLI, EVT EVTTy, + unsigned AS, unsigned Align) { + if (TLI.allowsMisalignedMemoryAccesses(EVTTy, AS, Align)) + return true; - // Only look at ends of store sequences. - SDValue Chain = SDValue(St, 0); - if (Chain->hasOneUse() && Chain->use_begin()->getOpcode() == ISD::STORE) - return false; + Type *Ty = EVTTy.getTypeForEVT(*DAG.getContext()); + unsigned ABIAlignment = DAG.getDataLayout().getPrefTypeAlignment(Ty); + return (Align >= ABIAlignment); +} +void DAGCombiner::getStoreMergeAndAliasCandidates( + StoreSDNode* St, SmallVectorImpl &StoreNodes, + SmallVectorImpl &AliasLoadNodes) { // This holds the base pointer, index, and the offset in bytes from the base // pointer. BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr()); // We must have a base and an offset. if (!BasePtr.Base.getNode()) - return false; + return; // Do not handle stores to undef base pointers. if (BasePtr.Base.getOpcode() == ISD::UNDEF) - return false; - - // Save the LoadSDNodes that we find in the chain. - // We need to make sure that these nodes do not interfere with - // any of the store nodes. - SmallVector AliasLoadNodes; - - // Save the StoreSDNodes that we find in the chain. - SmallVector StoreNodes; + return; // Walk up the chain and look for nodes with offsets from the same // base pointer. Stop when reaching an instruction with a different kind // or instruction which has a different base pointer. + EVT MemVT = St->getMemoryVT(); unsigned Seq = 0; StoreSDNode *Index = St; while (Index) { @@ -10412,10 +10779,6 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { if (!Ptr.equalBaseIndex(BasePtr)) break; - // Check that the alignment is the same. - if (Index->getAlignment() != St->getAlignment()) - break; - // The memory operands must not be volatile. if (Index->isVolatile() || Index->isIndexed()) break; @@ -10429,11 +10792,6 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { if (Index->getMemoryVT() != MemVT) break; - // We do not allow unaligned stores because we want to prevent overriding - // stores. - if (Index->getAlignment()*8 != MemVT.getSizeInBits()) - break; - // We found a potential memory operand to merge. StoreNodes.push_back(MemOpLink(Index, Ptr.Offset, Seq++)); @@ -10463,7 +10821,51 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { } } } +} + +bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { + if (OptLevel == CodeGenOpt::None) + return false; + + EVT MemVT = St->getMemoryVT(); + int64_t ElementSizeBytes = MemVT.getSizeInBits() / 8; + bool NoVectors = DAG.getMachineFunction().getFunction()->hasFnAttribute( + Attribute::NoImplicitFloat); + + // This function cannot currently deal with non-byte-sized memory sizes. + if (ElementSizeBytes * 8 != MemVT.getSizeInBits()) + return false; + + // Don't merge vectors into wider inputs. + if (MemVT.isVector() || !MemVT.isSimple()) + return false; + // Perform an early exit check. Do not bother looking at stored values that + // are not constants, loads, or extracted vector elements. + SDValue StoredVal = St->getValue(); + bool IsLoadSrc = isa(StoredVal); + bool IsConstantSrc = isa(StoredVal) || + isa(StoredVal); + bool IsExtractVecEltSrc = (StoredVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT); + + if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecEltSrc) + return false; + + // Only look at ends of store sequences. + SDValue Chain = SDValue(St, 0); + if (Chain->hasOneUse() && Chain->use_begin()->getOpcode() == ISD::STORE) + return false; + + // Save the LoadSDNodes that we find in the chain. + // We need to make sure that these nodes do not interfere with + // any of the store nodes. + SmallVector AliasLoadNodes; + + // Save the StoreSDNodes that we find in the chain. + SmallVector StoreNodes; + + getStoreMergeAndAliasCandidates(St, StoreNodes, AliasLoadNodes); + // Check if there is anything to merge. if (StoreNodes.size() < 2) return false; @@ -10507,6 +10909,8 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { // The node with the lowest store address. LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; + unsigned FirstStoreAS = FirstInChain->getAddressSpace(); + unsigned FirstStoreAlign = FirstInChain->getAlignment(); // Store the constants into memory as one consecutive store. if (IsConstantSrc) { @@ -10527,29 +10931,43 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { } // Find a legal type for the constant store. - unsigned StoreBW = (i+1) * ElementSizeBytes * 8; - EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW); - if (TLI.isTypeLegal(StoreTy)) + unsigned SizeInBits = (i+1) * ElementSizeBytes * 8; + EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits); + if (TLI.isTypeLegal(StoreTy) && + allowableAlignment(DAG, TLI, StoreTy, FirstStoreAS, + FirstStoreAlign)) { LastLegalType = i+1; // Or check whether a truncstore is legal. - else if (TLI.getTypeAction(*DAG.getContext(), StoreTy) == - TargetLowering::TypePromoteInteger) { + } else if (TLI.getTypeAction(*DAG.getContext(), StoreTy) == + TargetLowering::TypePromoteInteger) { EVT LegalizedStoredValueTy = TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType()); - if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy)) - LastLegalType = i+1; + if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) && + allowableAlignment(DAG, TLI, LegalizedStoredValueTy, FirstStoreAS, + FirstStoreAlign)) { + LastLegalType = i + 1; + } } // Find a legal type for the vector store. EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1); - if (TLI.isTypeLegal(Ty)) + if (TLI.isTypeLegal(Ty) && + allowableAlignment(DAG, TLI, Ty, FirstStoreAS, FirstStoreAlign)) { LastLegalVectorType = i + 1; + } } - // We only use vectors if the constant is known to be zero and the - // function is not marked with the noimplicitfloat attribute. - if (NonZero || NoVectors) + + // We only use vectors if the constant is known to be zero or the target + // allows it and the function is not marked with the noimplicitfloat + // attribute. + if (NoVectors) { + LastLegalVectorType = 0; + } else if (NonZero && !TLI.storeOfVectorConstantIsCheap(MemVT, + LastLegalVectorType, + FirstStoreAS)) { LastLegalVectorType = 0; + } // Check if we found a legal integer type to store. if (LastLegalType == 0 && LastLegalVectorType == 0) @@ -10579,7 +10997,8 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { // Find a legal type for the vector store. EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1); - if (TLI.isTypeLegal(Ty)) + if (TLI.isTypeLegal(Ty) && + allowableAlignment(DAG, TLI, Ty, FirstStoreAS, FirstStoreAlign)) NumElem = i + 1; } @@ -10606,10 +11025,6 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { if (!Ld->hasNUsesOfValue(1, 0)) break; - // Check that the alignment is the same as the stores. - if (Ld->getAlignment() != St->getAlignment()) - break; - // The memory operands must not be volatile. if (Ld->isVolatile() || Ld->isIndexed()) break; @@ -10647,6 +11062,10 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { St->getAlignment() >= RequiredAlignment) return false; + LoadSDNode *FirstLoad = cast(LoadNodes[0].MemNode); + unsigned FirstLoadAS = FirstLoad->getAddressSpace(); + unsigned FirstLoadAlign = FirstLoad->getAlignment(); + // Scan the memory operations on the chain and find the first non-consecutive // load memory address. These variables hold the index in the store node // array. @@ -10655,7 +11074,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { unsigned LastLegalVectorType = 0; unsigned LastLegalIntegerType = 0; StartAddress = LoadNodes[0].OffsetFromBase; - SDValue FirstChain = LoadNodes[0].MemNode->getChain(); + SDValue FirstChain = FirstLoad->getChain(); for (unsigned i = 1; i < LoadNodes.size(); ++i) { // All loads much share the same chain. if (LoadNodes[i].MemNode->getChain() != FirstChain) @@ -10668,13 +11087,18 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { // Find a legal type for the vector store. EVT StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1); - if (TLI.isTypeLegal(StoreTy)) + if (TLI.isTypeLegal(StoreTy) && + allowableAlignment(DAG, TLI, StoreTy, FirstStoreAS, FirstStoreAlign) && + allowableAlignment(DAG, TLI, StoreTy, FirstLoadAS, FirstLoadAlign)) { LastLegalVectorType = i + 1; + } // Find a legal type for the integer store. - unsigned StoreBW = (i+1) * ElementSizeBytes * 8; - StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW); - if (TLI.isTypeLegal(StoreTy)) + unsigned SizeInBits = (i+1) * ElementSizeBytes * 8; + StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits); + if (TLI.isTypeLegal(StoreTy) && + allowableAlignment(DAG, TLI, StoreTy, FirstStoreAS, FirstStoreAlign) && + allowableAlignment(DAG, TLI, StoreTy, FirstLoadAS, FirstLoadAlign)) LastLegalIntegerType = i + 1; // Or check whether a truncstore and extload is legal. else if (TLI.getTypeAction(*DAG.getContext(), StoreTy) == @@ -10684,7 +11108,11 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) && TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValueTy, StoreTy) && TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValueTy, StoreTy) && - TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValueTy, StoreTy)) + TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValueTy, StoreTy) && + allowableAlignment(DAG, TLI, LegalizedStoredValueTy, FirstStoreAS, + FirstStoreAlign) && + allowableAlignment(DAG, TLI, LegalizedStoredValueTy, FirstLoadAS, + FirstLoadAlign)) LastLegalIntegerType = i+1; } } @@ -10721,25 +11149,20 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { if (UseVectorTy) { JointMemOpVT = EVT::getVectorVT(*DAG.getContext(), MemVT, NumElem); } else { - unsigned StoreBW = NumElem * ElementSizeBytes * 8; - JointMemOpVT = EVT::getIntegerVT(*DAG.getContext(), StoreBW); + unsigned SizeInBits = NumElem * ElementSizeBytes * 8; + JointMemOpVT = EVT::getIntegerVT(*DAG.getContext(), SizeInBits); } SDLoc LoadDL(LoadNodes[0].MemNode); SDLoc StoreDL(StoreNodes[0].MemNode); - LoadSDNode *FirstLoad = cast(LoadNodes[0].MemNode); - SDValue NewLoad = DAG.getLoad(JointMemOpVT, LoadDL, - FirstLoad->getChain(), - FirstLoad->getBasePtr(), - FirstLoad->getPointerInfo(), - false, false, false, - FirstLoad->getAlignment()); - - SDValue NewStore = DAG.getStore(LatestOp->getChain(), StoreDL, NewLoad, - FirstInChain->getBasePtr(), - FirstInChain->getPointerInfo(), false, false, - FirstInChain->getAlignment()); + SDValue NewLoad = DAG.getLoad( + JointMemOpVT, LoadDL, FirstLoad->getChain(), FirstLoad->getBasePtr(), + FirstLoad->getPointerInfo(), false, false, false, FirstLoadAlign); + + SDValue NewStore = DAG.getStore( + LatestOp->getChain(), StoreDL, NewLoad, FirstInChain->getBasePtr(), + FirstInChain->getPointerInfo(), false, false, FirstStoreAlign); // Replace one of the loads with the new load. LoadSDNode *Ld = cast(LoadNodes[0].MemNode); @@ -10781,8 +11204,8 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { ST->isUnindexed()) { unsigned OrigAlign = ST->getAlignment(); EVT SVT = Value.getOperand(0).getValueType(); - unsigned Align = TLI.getDataLayout()-> - getABITypeAlignment(SVT.getTypeForEVT(*DAG.getContext())); + unsigned Align = DAG.getDataLayout().getABITypeAlignment( + SVT.getTypeForEVT(*DAG.getContext())); if (Align <= OrigAlign && ((!LegalOperations && !ST->isVolatile()) || TLI.isOperationLegalOrCustom(ISD::STORE, SVT))) @@ -10814,8 +11237,10 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { case MVT::f32: if ((isTypeLegal(MVT::i32) && !LegalOperations && !ST->isVolatile()) || TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) { + ; Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF(). - bitcastToAPInt().getZExtValue(), MVT::i32); + bitcastToAPInt().getZExtValue(), SDLoc(CFP), + MVT::i32); return DAG.getStore(Chain, SDLoc(N), Tmp, Ptr, ST->getMemOperand()); } @@ -10824,8 +11249,9 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations && !ST->isVolatile()) || TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) { + ; Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt(). - getZExtValue(), MVT::i64); + getZExtValue(), SDLoc(CFP), MVT::i64); return DAG.getStore(Chain, SDLoc(N), Tmp, Ptr, ST->getMemOperand()); } @@ -10836,27 +11262,30 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { // argument passing. Since this is so common, custom legalize the // 64-bit integer store into two 32-bit stores. uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue(); - SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, MVT::i32); - SDValue Hi = DAG.getConstant(Val >> 32, MVT::i32); - if (TLI.isBigEndian()) std::swap(Lo, Hi); + SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32); + SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32); + if (DAG.getDataLayout().isBigEndian()) + std::swap(Lo, Hi); unsigned Alignment = ST->getAlignment(); bool isVolatile = ST->isVolatile(); bool isNonTemporal = ST->isNonTemporal(); AAMDNodes AAInfo = ST->getAAInfo(); + SDLoc DL(N); + SDValue St0 = DAG.getStore(Chain, SDLoc(ST), Lo, Ptr, ST->getPointerInfo(), isVolatile, isNonTemporal, ST->getAlignment(), AAInfo); - Ptr = DAG.getNode(ISD::ADD, SDLoc(N), Ptr.getValueType(), Ptr, - DAG.getConstant(4, Ptr.getValueType())); + Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, + DAG.getConstant(4, DL, Ptr.getValueType())); Alignment = MinAlign(Alignment, 4U); SDValue St1 = DAG.getStore(Chain, SDLoc(ST), Hi, Ptr, ST->getPointerInfo().getWithOffset(4), isVolatile, isNonTemporal, Alignment, AAInfo); - return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, + return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, St0, St1); } @@ -11085,7 +11514,7 @@ SDValue DAGCombiner::ReplaceExtractVectorEltOfLoadWithNarrowedLoad( EVT ResultVT = EVE->getValueType(0); EVT VecEltVT = InVecVT.getVectorElementType(); unsigned Align = OriginalLoad->getAlignment(); - unsigned NewAlign = TLI.getDataLayout()->getABITypeAlignment( + unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment( VecEltVT.getTypeForEVT(*DAG.getContext())); if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT)) @@ -11097,24 +11526,20 @@ SDValue DAGCombiner::ReplaceExtractVectorEltOfLoadWithNarrowedLoad( SDValue Offset; EVT PtrType = NewPtr.getValueType(); MachinePointerInfo MPI; + SDLoc DL(EVE); if (auto *ConstEltNo = dyn_cast(EltNo)) { int Elt = ConstEltNo->getZExtValue(); unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8; - if (TLI.isBigEndian()) - PtrOff = InVecVT.getSizeInBits() / 8 - PtrOff; - Offset = DAG.getConstant(PtrOff, PtrType); + Offset = DAG.getConstant(PtrOff, DL, PtrType); MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff); } else { + Offset = DAG.getZExtOrTrunc(EltNo, DL, PtrType); Offset = DAG.getNode( - ISD::MUL, SDLoc(EVE), EltNo.getValueType(), EltNo, - DAG.getConstant(VecEltVT.getStoreSize(), EltNo.getValueType())); - if (TLI.isBigEndian()) - Offset = DAG.getNode( - ISD::SUB, SDLoc(EVE), EltNo.getValueType(), - DAG.getConstant(InVecVT.getStoreSize(), EltNo.getValueType()), Offset); + ISD::MUL, DL, PtrType, Offset, + DAG.getConstant(VecEltVT.getStoreSize(), DL, PtrType)); MPI = OriginalLoad->getPointerInfo(); } - NewPtr = DAG.getNode(ISD::ADD, SDLoc(EVE), PtrType, NewPtr, Offset); + NewPtr = DAG.getNode(ISD::ADD, DL, PtrType, NewPtr, Offset); // The replacement we need to do here is a little tricky: we need to // replace an extractelement of a load with a load. @@ -11223,9 +11648,9 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { // scalar_to_vector here as well. if (!LegalOperations) { - EVT IndexTy = TLI.getVectorIdxTy(); - return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), NVT, - SVInVec, DAG.getConstant(OrigElt, IndexTy)); + EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout()); + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), NVT, SVInVec, + DAG.getConstant(OrigElt, SDLoc(SVOp), IndexTy)); } } @@ -11314,7 +11739,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { if (ISD::isNormalLoad(InVec.getNode())) { LN0 = cast(InVec); Elt = (Idx < (int)NumElems) ? Idx : Idx - (int)NumElems; - EltNo = DAG.getConstant(Elt, EltNo.getValueType()); + EltNo = DAG.getConstant(Elt, SDLoc(EltNo), EltNo.getValueType()); } } @@ -11400,11 +11825,11 @@ SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) { if (!ValidTypes) return SDValue(); - bool isLE = TLI.isLittleEndian(); + bool isLE = DAG.getDataLayout().isLittleEndian(); unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits(); assert(ElemRatio > 1 && "Invalid element size ratio"); SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType): - DAG.getConstant(0, SourceType); + DAG.getConstant(0, SDLoc(N), SourceType); unsigned NewBVElems = ElemRatio * VT.getVectorNumElements(); SmallVector Ops(NewBVElems, Filler); @@ -11549,10 +11974,7 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { if (Op.getOpcode() == ISD::UNDEF) continue; // See if we can combine this build_vector into a blend with a zero vector. - if (!VecIn2.getNode() && ((Op.getOpcode() == ISD::Constant && - cast(Op.getNode())->isNullValue()) || - (Op.getOpcode() == ISD::ConstantFP && - cast(Op.getNode())->getValueAPF().isZero()))) { + if (!VecIn2.getNode() && (isNullConstant(Op) || isNullFPConstant(Op))) { UsesZeroVector = true; continue; } @@ -11657,17 +12079,20 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { // Try to replace VecIn1 with two extract_subvectors // No need to update the masks, they should still be correct. - VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, VecIn1, - DAG.getConstant(VT.getVectorNumElements(), TLI.getVectorIdxTy())); - VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, VecIn1, - DAG.getConstant(0, TLI.getVectorIdxTy())); + VecIn2 = DAG.getNode( + ISD::EXTRACT_SUBVECTOR, dl, VT, VecIn1, + DAG.getConstant(VT.getVectorNumElements(), dl, + TLI.getVectorIdxTy(DAG.getDataLayout()))); + VecIn1 = DAG.getNode( + ISD::EXTRACT_SUBVECTOR, dl, VT, VecIn1, + DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); } else return SDValue(); } if (UsesZeroVector) - VecIn2 = VT.isInteger() ? DAG.getConstant(0, VT) : - DAG.getConstantFP(0.0, VT); + VecIn2 = VT.isInteger() ? DAG.getConstant(0, dl, VT) : + DAG.getConstantFP(0.0, dl, VT); else // If VecIn2 is unused then change it to undef. VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT); @@ -11688,6 +12113,68 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { return SDValue(); } +static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + EVT OpVT = N->getOperand(0).getValueType(); + + // If the operands are legal vectors, leave them alone. + if (TLI.isTypeLegal(OpVT)) + return SDValue(); + + SDLoc DL(N); + EVT VT = N->getValueType(0); + SmallVector Ops; + + EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits()); + SDValue ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT); + + // Keep track of what we encounter. + bool AnyInteger = false; + bool AnyFP = false; + for (const SDValue &Op : N->ops()) { + if (ISD::BITCAST == Op.getOpcode() && + !Op.getOperand(0).getValueType().isVector()) + Ops.push_back(Op.getOperand(0)); + else if (ISD::UNDEF == Op.getOpcode()) + Ops.push_back(ScalarUndef); + else + return SDValue(); + + // Note whether we encounter an integer or floating point scalar. + // If it's neither, bail out, it could be something weird like x86mmx. + EVT LastOpVT = Ops.back().getValueType(); + if (LastOpVT.isFloatingPoint()) + AnyFP = true; + else if (LastOpVT.isInteger()) + AnyInteger = true; + else + return SDValue(); + } + + // If any of the operands is a floating point scalar bitcast to a vector, + // use floating point types throughout, and bitcast everything. + // Replace UNDEFs by another scalar UNDEF node, of the final desired type. + if (AnyFP) { + SVT = EVT::getFloatingPointVT(OpVT.getSizeInBits()); + ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT); + if (AnyInteger) { + for (SDValue &Op : Ops) { + if (Op.getValueType() == SVT) + continue; + if (Op.getOpcode() == ISD::UNDEF) + Op = ScalarUndef; + else + Op = DAG.getNode(ISD::BITCAST, DL, SVT, Op); + } + } + } + + EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SVT, + VT.getSizeInBits() / SVT.getSizeInBits()); + return DAG.getNode(ISD::BITCAST, DL, VT, + DAG.getNode(ISD::BUILD_VECTOR, DL, VecVT, Ops)); +} + SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { // TODO: Check to see if this is a CONCAT_VECTORS of a bunch of // EXTRACT_SUBVECTOR operations. If so, and if the EXTRACT_SUBVECTOR vector @@ -11703,9 +12190,10 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { if (ISD::allOperandsUndef(N)) return DAG.getUNDEF(VT); - // Optimize concat_vectors where one of the vectors is undef. - if (N->getNumOperands() == 2 && - N->getOperand(1)->getOpcode() == ISD::UNDEF) { + // Optimize concat_vectors where all but the first of the vectors are undef. + if (std::all_of(std::next(N->op_begin()), N->op_end(), [](const SDValue &Op) { + return Op.getOpcode() == ISD::UNDEF; + })) { SDValue In = N->getOperand(0); assert(In.getValueType().isVector() && "Must concat vectors"); @@ -11713,6 +12201,15 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { if (In->getOpcode() == ISD::BITCAST && !In->getOperand(0)->getValueType(0).isVector()) { SDValue Scalar = In->getOperand(0); + + // If the bitcast type isn't legal, it might be a trunc of a legal type; + // look through the trunc so we can still do the transform: + // concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar) + if (Scalar->getOpcode() == ISD::TRUNCATE && + !TLI.isTypeLegal(Scalar.getValueType()) && + TLI.isTypeLegal(Scalar->getOperand(0).getValueType())) + Scalar = Scalar->getOperand(0); + EVT SclTy = Scalar->getValueType(0); if (!SclTy.isFloatingPoint() && !SclTy.isInteger()) @@ -11780,6 +12277,10 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Opnds); } + // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR. + if (SDValue V = combineConcatVectorOfScalars(N, DAG)) + return V; + // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR // nodes often generate nop CONCAT_VECTOR nodes. // Scan the CONCAT_VECTOR operands and look for a CONCAT operations that @@ -11841,7 +12342,7 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) { // type. if (V->getOperand(0).getValueType() != NVT) return SDValue(); - unsigned Idx = dyn_cast(N->getOperand(1))->getZExtValue(); + unsigned Idx = N->getConstantOperandVal(1); unsigned NumElems = NVT.getVectorNumElements(); assert((Idx % NumElems) == 0 && "IDX in concat is not a multiple of the result vector length."); @@ -12462,6 +12963,16 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) { return SDValue(); } +SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) { + SDValue N0 = N->getOperand(0); + + // fold (fp_to_fp16 (fp16_to_fp op)) -> op + if (N0->getOpcode() == ISD::FP16_TO_FP) + return N0->getOperand(0); + + return SDValue(); +} + /// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle /// with the destination vector and a zero vector. /// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==> @@ -12472,7 +12983,7 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) { SDValue RHS = N->getOperand(1); SDLoc dl(N); - // Make sure we're not running after operation legalization where it + // Make sure we're not running after operation legalization where it // may have custom lowered the vector shuffles. if (LegalOperations) return SDValue(); @@ -12489,12 +13000,9 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) { for (unsigned i = 0; i != NumElts; ++i) { SDValue Elt = RHS.getOperand(i); - if (!isa(Elt)) - return SDValue(); - - if (cast(Elt)->isAllOnesValue()) + if (isAllOnesConstant(Elt)) Indices.push_back(i); - else if (cast(Elt)->isNullValue()) + else if (isNullConstant(Elt)) Indices.push_back(NumElts+i); else return SDValue(); @@ -12508,8 +13016,8 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) { // Return the new VECTOR_SHUFFLE node. EVT EltVT = RVT.getVectorElementType(); SmallVector ZeroOps(RVT.getVectorNumElements(), - DAG.getConstant(0, EltVT)); - SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), RVT, ZeroOps); + DAG.getConstant(0, dl, EltVT)); + SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, dl, RVT, ZeroOps); LHS = DAG.getNode(ISD::BITCAST, dl, RVT, LHS); SDValue Shuf = DAG.getVectorShuffle(RVT, dl, LHS, Zero, &Indices[0]); return DAG.getNode(ISD::BITCAST, dl, VT, Shuf); @@ -12546,10 +13054,8 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { // Can't fold divide by zero. if (N->getOpcode() == ISD::SDIV || N->getOpcode() == ISD::UDIV || N->getOpcode() == ISD::FDIV) { - if ((RHSOp.getOpcode() == ISD::Constant && - cast(RHSOp.getNode())->isNullValue()) || - (RHSOp.getOpcode() == ISD::ConstantFP && - cast(RHSOp.getNode())->getValueAPF().isZero())) + if (isNullConstant(RHSOp) || (RHSOp.getOpcode() == ISD::ConstantFP && + cast(RHSOp.getNode())->isZero())) break; } @@ -12641,6 +13147,38 @@ SDValue DAGCombiner::SimplifySelect(SDLoc DL, SDValue N0, bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS, SDValue RHS) { + // fold (select (setcc x, -0.0, *lt), NaN, (fsqrt x)) + // The select + setcc is redundant, because fsqrt returns NaN for X < -0. + if (const ConstantFPSDNode *NaN = isConstOrConstSplatFP(LHS)) { + if (NaN->isNaN() && RHS.getOpcode() == ISD::FSQRT) { + // We have: (select (setcc ?, ?, ?), NaN, (fsqrt ?)) + SDValue Sqrt = RHS; + ISD::CondCode CC; + SDValue CmpLHS; + const ConstantFPSDNode *NegZero = nullptr; + + if (TheSelect->getOpcode() == ISD::SELECT_CC) { + CC = dyn_cast(TheSelect->getOperand(4))->get(); + CmpLHS = TheSelect->getOperand(0); + NegZero = isConstOrConstSplatFP(TheSelect->getOperand(1)); + } else { + // SELECT or VSELECT + SDValue Cmp = TheSelect->getOperand(0); + if (Cmp.getOpcode() == ISD::SETCC) { + CC = dyn_cast(Cmp.getOperand(2))->get(); + CmpLHS = Cmp.getOperand(0); + NegZero = isConstOrConstSplatFP(Cmp.getOperand(1)); + } + } + if (NegZero && NegZero->isNegative() && NegZero->isZero() && + Sqrt.getOperand(0) == CmpLHS && (CC == ISD::SETOLT || + CC == ISD::SETULT || CC == ISD::SETLT)) { + // We have: (select (setcc x, -0.0, *lt), NaN, (fsqrt x)) + CombineTo(TheSelect, Sqrt); + return true; + } + } + } // Cannot simplify select with vector condition if (TheSelect->getOperand(0).getValueType().isVector()) return false; @@ -12662,6 +13200,9 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS, if (LHS.getOperand(0) != RHS.getOperand(0) || // Do not let this transformation reduce the number of volatile loads. LLD->isVolatile() || RLD->isVolatile() || + // FIXME: If either is a pre/post inc/dec load, + // we'd need to split out the address adjustment. + LLD->isIndexed() || RLD->isIndexed() || // If this is an EXTLOAD, the VT's must match. LLD->getMemoryVT() != RLD->getMemoryVT() || // If this is an EXTLOAD, the kind of extension must match. @@ -12763,25 +13304,22 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, EVT VT = N2.getValueType(); ConstantSDNode *N1C = dyn_cast(N1.getNode()); ConstantSDNode *N2C = dyn_cast(N2.getNode()); - ConstantSDNode *N3C = dyn_cast(N3.getNode()); // Determine if the condition we're dealing with is constant SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1, CC, DL, false); if (SCC.getNode()) AddToWorklist(SCC.getNode()); - ConstantSDNode *SCCC = dyn_cast_or_null(SCC.getNode()); - // fold select_cc true, x, y -> x - if (SCCC && !SCCC->isNullValue()) - return N2; - // fold select_cc false, x, y -> y - if (SCCC && SCCC->isNullValue()) - return N3; + if (ConstantSDNode *SCCC = dyn_cast_or_null(SCC.getNode())) { + // fold select_cc true, x, y -> x + // fold select_cc false, x, y -> y + return !SCCC->isNullValue() ? N2 : N3; + } // Check to see if we can simplify the select into an fabs node if (ConstantFPSDNode *CFP = dyn_cast(N1)) { // Allow either -0.0 or 0.0 - if (CFP->getValueAPF().isZero()) { + if (CFP->isZero()) { // select (setg[te] X, +/-0.0), X, fneg(X) -> fabs if ((CC == ISD::SETGE || CC == ISD::SETGT) && N0 == N2 && N3.getOpcode() == ISD::FNEG && @@ -12819,19 +13357,20 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, const_cast(TV->getConstantFPValue()) }; Type *FPTy = Elts[0]->getType(); - const DataLayout &TD = *TLI.getDataLayout(); + const DataLayout &TD = DAG.getDataLayout(); // Create a ConstantArray of the two constants. Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts); - SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(), - TD.getPrefTypeAlignment(FPTy)); + SDValue CPIdx = + DAG.getConstantPool(CA, TLI.getPointerTy(DAG.getDataLayout()), + TD.getPrefTypeAlignment(FPTy)); unsigned Alignment = cast(CPIdx)->getAlignment(); // Get the offsets to the 0 and 1 element of the array so that we can // select between them. - SDValue Zero = DAG.getIntPtrConstant(0); + SDValue Zero = DAG.getIntPtrConstant(0, DL); unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType()); - SDValue One = DAG.getIntPtrConstant(EltSize); + SDValue One = DAG.getIntPtrConstant(EltSize, SDLoc(FV)); SDValue Cond = DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()), @@ -12846,24 +13385,23 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx, MachinePointerInfo::getConstantPool(), false, false, false, Alignment); - } } // Check to see if we can perform the "gzip trick", transforming // (select_cc setlt X, 0, A, 0) -> (and (sra X, (sub size(X), 1), A) - if (N1C && N3C && N3C->isNullValue() && CC == ISD::SETLT && - (N1C->isNullValue() || // (a < 0) ? b : 0 - (N1C->getAPIntValue() == 1 && N0 == N2))) { // (a < 1) ? a : 0 + if (isNullConstant(N3) && CC == ISD::SETLT && + (isNullConstant(N1) || // (a < 0) ? b : 0 + (isOneConstant(N1) && N0 == N2))) { // (a < 1) ? a : 0 EVT XType = N0.getValueType(); EVT AType = N2.getValueType(); if (XType.bitsGE(AType)) { // and (sra X, size(X)-1, A) -> "and (srl X, C2), A" iff A is a // single-bit constant. - if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue()-1)) == 0)) { + if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) { unsigned ShCtV = N2C->getAPIntValue().logBase2(); - ShCtV = XType.getSizeInBits()-ShCtV-1; - SDValue ShCt = DAG.getConstant(ShCtV, + ShCtV = XType.getSizeInBits() - ShCtV - 1; + SDValue ShCt = DAG.getConstant(ShCtV, SDLoc(N0), getShiftAmountTy(N0.getValueType())); SDValue Shift = DAG.getNode(ISD::SRL, SDLoc(N0), XType, N0, ShCt); @@ -12879,7 +13417,8 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, SDValue Shift = DAG.getNode(ISD::SRA, SDLoc(N0), XType, N0, - DAG.getConstant(XType.getSizeInBits()-1, + DAG.getConstant(XType.getSizeInBits() - 1, + SDLoc(N0), getShiftAmountTy(N0.getValueType()))); AddToWorklist(Shift.getNode()); @@ -12899,23 +13438,21 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, // single bit-test can be materialized as an all-ones register with // shift-left and shift-right-arith. if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND && - N0->getValueType(0) == VT && - N1C && N1C->isNullValue() && - N2C && N2C->isNullValue()) { + N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) { SDValue AndLHS = N0->getOperand(0); ConstantSDNode *ConstAndRHS = dyn_cast(N0->getOperand(1)); if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) { // Shift the tested bit over the sign bit. APInt AndMask = ConstAndRHS->getAPIntValue(); SDValue ShlAmt = - DAG.getConstant(AndMask.countLeadingZeros(), + DAG.getConstant(AndMask.countLeadingZeros(), SDLoc(AndLHS), getShiftAmountTy(AndLHS.getValueType())); SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt); // Now arithmetic right shift it all the way over, so the result is either // all-ones, or zero. SDValue ShrAmt = - DAG.getConstant(AndMask.getBitWidth()-1, + DAG.getConstant(AndMask.getBitWidth() - 1, SDLoc(Shl), getShiftAmountTy(Shl.getValueType())); SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt); @@ -12924,13 +13461,13 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, } // fold select C, 16, 0 -> shl C, 4 - if (N2C && N3C && N3C->isNullValue() && N2C->getAPIntValue().isPowerOf2() && + if (N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2() && TLI.getBooleanContents(N0.getValueType()) == TargetLowering::ZeroOrOneBooleanContent) { // If the caller doesn't want us to simplify this into a zext of a compare, // don't do it. - if (NotExtCompare && N2C->getAPIntValue() == 1) + if (NotExtCompare && N2C->isOne()) return SDValue(); // Get a SetCC of the condition @@ -12958,13 +13495,13 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, AddToWorklist(SCC.getNode()); AddToWorklist(Temp.getNode()); - if (N2C->getAPIntValue() == 1) + if (N2C->isOne()) return Temp; // shl setcc result by log2 n2c return DAG.getNode( ISD::SHL, DL, N2.getValueType(), Temp, - DAG.getConstant(N2C->getAPIntValue().logBase2(), + DAG.getConstant(N2C->getAPIntValue().logBase2(), SDLoc(Temp), getShiftAmountTy(Temp.getValueType()))); } } @@ -12972,7 +13509,7 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, // Check to see if this is the equivalent of setcc // FIXME: Turn all of these into setcc if setcc if setcc is legal // otherwise, go ahead with the folds. - if (0 && N3C && N3C->isNullValue() && N2C && (N2C->getAPIntValue() == 1ULL)) { + if (0 && isNullConstant(N3) && isOneConstant(N2)) { EVT XType = N0.getValueType(); if (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, getSetCCResultType(XType))) { @@ -12983,30 +13520,34 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, } // fold (seteq X, 0) -> (srl (ctlz X, log2(size(X)))) - if (N1C && N1C->isNullValue() && CC == ISD::SETEQ && + if (isNullConstant(N1) && CC == ISD::SETEQ && (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ, XType))) { SDValue Ctlz = DAG.getNode(ISD::CTLZ, SDLoc(N0), XType, N0); return DAG.getNode(ISD::SRL, DL, XType, Ctlz, DAG.getConstant(Log2_32(XType.getSizeInBits()), + SDLoc(Ctlz), getShiftAmountTy(Ctlz.getValueType()))); } // fold (setgt X, 0) -> (srl (and (-X, ~X), size(X)-1)) - if (N1C && N1C->isNullValue() && CC == ISD::SETGT) { - SDValue NegN0 = DAG.getNode(ISD::SUB, SDLoc(N0), - XType, DAG.getConstant(0, XType), N0); - SDValue NotN0 = DAG.getNOT(SDLoc(N0), N0, XType); + if (isNullConstant(N1) && CC == ISD::SETGT) { + SDLoc DL(N0); + SDValue NegN0 = DAG.getNode(ISD::SUB, DL, + XType, DAG.getConstant(0, DL, XType), N0); + SDValue NotN0 = DAG.getNOT(DL, N0, XType); return DAG.getNode(ISD::SRL, DL, XType, DAG.getNode(ISD::AND, DL, XType, NegN0, NotN0), - DAG.getConstant(XType.getSizeInBits()-1, + DAG.getConstant(XType.getSizeInBits() - 1, DL, getShiftAmountTy(XType))); } // fold (setgt X, -1) -> (xor (srl (X, size(X)-1), 1)) - if (N1C && N1C->isAllOnesValue() && CC == ISD::SETGT) { - SDValue Sign = DAG.getNode(ISD::SRL, SDLoc(N0), XType, N0, - DAG.getConstant(XType.getSizeInBits()-1, + if (isAllOnesConstant(N1) && CC == ISD::SETGT) { + SDLoc DL(N0); + SDValue Sign = DAG.getNode(ISD::SRL, DL, XType, N0, + DAG.getConstant(XType.getSizeInBits() - 1, DL, getShiftAmountTy(N0.getValueType()))); - return DAG.getNode(ISD::XOR, DL, XType, Sign, DAG.getConstant(1, XType)); + return DAG.getNode(ISD::XOR, DL, XType, Sign, DAG.getConstant(1, DL, + XType)); } } @@ -13029,11 +13570,12 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, EVT XType = N0.getValueType(); if (SubC && SubC->isNullValue() && XType.isInteger()) { - SDValue Shift = DAG.getNode(ISD::SRA, SDLoc(N0), XType, + SDLoc DL(N0); + SDValue Shift = DAG.getNode(ISD::SRA, DL, XType, N0, - DAG.getConstant(XType.getSizeInBits()-1, + DAG.getConstant(XType.getSizeInBits() - 1, DL, getShiftAmountTy(N0.getValueType()))); - SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), + SDValue Add = DAG.getNode(ISD::ADD, DL, XType, N0, Shift); AddToWorklist(Shift.getNode()); AddToWorklist(Add.getNode()); @@ -13063,7 +13605,7 @@ SDValue DAGCombiner::BuildSDIV(SDNode *N) { return SDValue(); // Avoid division by zero. - if (!C->getAPIntValue()) + if (C->isNullValue()) return SDValue(); std::vector Built; @@ -13083,7 +13625,7 @@ SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) { return SDValue(); // Avoid division by zero. - if (!C->getAPIntValue()) + if (C->isNullValue()) return SDValue(); std::vector Built; @@ -13104,7 +13646,7 @@ SDValue DAGCombiner::BuildUDIV(SDNode *N) { return SDValue(); // Avoid division by zero. - if (!C->getAPIntValue()) + if (C->isNullValue()) return SDValue(); std::vector Built; @@ -13134,7 +13676,7 @@ SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op) { // does not require additional intermediate precision] EVT VT = Op.getValueType(); SDLoc DL(Op); - SDValue FPOne = DAG.getConstantFP(1.0, VT); + SDValue FPOne = DAG.getConstantFP(1.0, DL, VT); AddToWorklist(Est.getNode()); @@ -13169,7 +13711,7 @@ SDValue DAGCombiner::BuildRsqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations) { EVT VT = Arg.getValueType(); SDLoc DL(Arg); - SDValue ThreeHalves = DAG.getConstantFP(1.5, VT); + SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT); // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that // this entire sequence requires only one FP constant. @@ -13205,8 +13747,8 @@ SDValue DAGCombiner::BuildRsqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations) { EVT VT = Arg.getValueType(); SDLoc DL(Arg); - SDValue MinusThree = DAG.getConstantFP(-3.0, VT); - SDValue MinusHalf = DAG.getConstantFP(-0.5, VT); + SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT); + SDValue MinusHalf = DAG.getConstantFP(-0.5, DL, VT); // Newton iterations: Est = -0.5 * Est * (-3.0 + Arg * Est * Est) for (unsigned i = 0; i < Iterations; ++i) { @@ -13362,14 +13904,12 @@ bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const { Op0->getSrcValueOffset() - MinOffset; int64_t Overlap2 = (Op1->getMemoryVT().getSizeInBits() >> 3) + Op1->getSrcValueOffset() - MinOffset; - AliasAnalysis::AliasResult AAResult = - AA.alias(AliasAnalysis::Location(Op0->getMemOperand()->getValue(), - Overlap1, - UseTBAA ? Op0->getAAInfo() : AAMDNodes()), - AliasAnalysis::Location(Op1->getMemOperand()->getValue(), - Overlap2, - UseTBAA ? Op1->getAAInfo() : AAMDNodes())); - if (AAResult == AliasAnalysis::NoAlias) + AliasResult AAResult = + AA.alias(MemoryLocation(Op0->getMemOperand()->getValue(), Overlap1, + UseTBAA ? Op0->getAAInfo() : AAMDNodes()), + MemoryLocation(Op1->getMemOperand()->getValue(), Overlap2, + UseTBAA ? Op1->getAAInfo() : AAMDNodes())); + if (AAResult == NoAlias) return false; } @@ -13395,8 +13935,7 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain, // aliases list. If not, then continue up the chain looking for the next // candidate. while (!Chains.empty()) { - SDValue Chain = Chains.back(); - Chains.pop_back(); + SDValue Chain = Chains.pop_back_val(); // For TokenFactor nodes, look at each operand and only continue up the // chain until we find two aliases. If we've seen two aliases, assume we'll @@ -13503,7 +14042,7 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain, UIE = M->use_end(); UI != UIE; ++UI) if (UI.getUse().getValueType() == MVT::Other && Visited.insert(*UI).second) { - if (isa(*UI) || isa(*UI)) { + if (isa(*UI)) { // We've not visited this use, and we care about it (it could have an // ordering dependency with the original node). Aliases.clear();