X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FCodeGen%2FSelectionDAG%2FDAGCombiner.cpp;h=bec5241ea921c40da67c319461616da9d6c3d1a1;hb=8acb3100de9cfc02048b7ab23490134ed735b051;hp=e874f1b1ec18afec70234be1cc56c9af04f36299;hpb=97121ba2afb8d566ff1bf5c4e8fc5d4077940a7f;p=oota-llvm.git diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index e874f1b1ec1..bec5241ea92 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -10,11 +10,16 @@ // This pass combines dag nodes to form fewer, simpler DAG nodes. It can be run // both before and after the DAG is legalized. // +// This pass is not a substitute for the LLVM IR instcombine pass. This pass is +// primarily intended to handle simplification opportunities that are implicit +// in the LLVM IR and exposed by the various codegen lowering phases. +// //===----------------------------------------------------------------------===// #define DEBUG_TYPE "dagcombine" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/DerivedTypes.h" +#include "llvm/LLVMContext.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" @@ -26,17 +31,18 @@ #include "llvm/Target/TargetOptions.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" -#include "llvm/Support/Compiler.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" #include -#include using namespace llvm; STATISTIC(NodesCombined , "Number of dag nodes combined"); STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created"); STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created"); +STATISTIC(OpsNarrowed , "Number of load/op/store narrowed"); namespace { static cl::opt @@ -49,13 +55,13 @@ namespace { //------------------------------ DAGCombiner ---------------------------------// - class VISIBILITY_HIDDEN DAGCombiner { + class DAGCombiner { SelectionDAG &DAG; const TargetLowering &TLI; CombineLevel Level; + CodeGenOpt::Level OptLevel; bool LegalOperations; bool LegalTypes; - bool Fast; // Worklist of all of the nodes that need to be simplified. std::vector WorkList; @@ -113,7 +119,8 @@ namespace { /// it can be simplified or if things it uses can be simplified by bit /// propagation. If so, return true. bool SimplifyDemandedBits(SDValue Op) { - APInt Demanded = APInt::getAllOnesValue(Op.getValueSizeInBits()); + unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits(); + APInt Demanded = APInt::getAllOnesValue(BitWidth); return SimplifyDemandedBits(Op, Demanded); } @@ -208,16 +215,17 @@ namespace { SDValue SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3, ISD::CondCode CC, bool NotExtCompare = false); - SDValue SimplifySetCC(MVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, + SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, DebugLoc DL, bool foldBooleans = true); SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, unsigned HiOp); - SDValue CombineConsecutiveLoads(SDNode *N, MVT VT); - SDValue ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *, MVT); + SDValue CombineConsecutiveLoads(SDNode *N, EVT VT); + SDValue ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *, EVT); SDValue BuildSDIV(SDNode *N); SDValue BuildUDIV(SDNode *N); SDNode *MatchRotate(SDValue LHS, SDValue RHS, DebugLoc DL); SDValue ReduceLoadWidth(SDNode *N); + SDValue ReduceLoadOpStoreWidth(SDNode *N); SDValue GetDemandedBits(SDValue V, const APInt &Mask); @@ -230,14 +238,17 @@ namespace { /// overlap. bool isAlias(SDValue Ptr1, int64_t Size1, const Value *SrcValue1, int SrcValueOffset1, + unsigned SrcValueAlign1, SDValue Ptr2, int64_t Size2, - const Value *SrcValue2, int SrcValueOffset2) const; + const Value *SrcValue2, int SrcValueOffset2, + unsigned SrcValueAlign2) const; /// FindAliasInfo - Extracts the relevant alias information from the memory /// node. Returns true if the operand was a load. bool FindAliasInfo(SDNode *N, SDValue &Ptr, int64_t &Size, - const Value *&SrcValue, int &SrcValueOffset) const; + const Value *&SrcValue, int &SrcValueOffset, + unsigned &SrcValueAlignment) const; /// FindBetterChain - Walk up chain skipping non-aliasing memory nodes, /// looking for a better chain (aliasing node.) @@ -245,18 +256,18 @@ namespace { /// getShiftAmountTy - Returns a type large enough to hold any valid /// shift amount - before type legalization these can be huge. - MVT getShiftAmountTy() { + EVT getShiftAmountTy() { return LegalTypes ? TLI.getShiftAmountTy() : TLI.getPointerTy(); } public: - DAGCombiner(SelectionDAG &D, AliasAnalysis &A, bool fast) + DAGCombiner(SelectionDAG &D, AliasAnalysis &A, CodeGenOpt::Level OL) : DAG(D), TLI(D.getTargetLoweringInfo()), Level(Unrestricted), + OptLevel(OL), LegalOperations(false), LegalTypes(false), - Fast(fast), AA(A) {} /// Run - runs the dag combiner on all nodes in the work list @@ -268,8 +279,7 @@ public: namespace { /// WorkListRemover - This class is a DAGUpdateListener that removes any deleted /// nodes from the worklist. -class VISIBILITY_HIDDEN WorkListRemover : - public SelectionDAG::DAGUpdateListener { +class WorkListRemover : public SelectionDAG::DAGUpdateListener { DAGCombiner &DC; public: explicit WorkListRemover(DAGCombiner &dc) : DC(dc) {} @@ -386,7 +396,7 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG, assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree"); switch (Op.getOpcode()) { - default: assert(0 && "Unknown code"); + default: llvm_unreachable("Unknown code"); case ISD::ConstantFP: { APFloat V = cast(Op)->getValueAPF(); V.changeSign(); @@ -489,7 +499,7 @@ static bool isOneUseSetCC(SDValue N) { SDValue DAGCombiner::ReassociateOps(unsigned Opc, DebugLoc DL, SDValue N0, SDValue N1) { - MVT VT = N0.getValueType(); + EVT VT = N0.getValueType(); if (N0.getOpcode() == Opc && isa(N0.getOperand(1))) { if (isa(N1)) { // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2)) @@ -531,11 +541,14 @@ SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo, bool AddTo) { assert(N->getNumValues() == NumTo && "Broken CombineTo call!"); ++NodesCombined; - DOUT << "\nReplacing.1 "; DEBUG(N->dump(&DAG)); - DOUT << "\nWith: "; DEBUG(To[0].getNode()->dump(&DAG)); - DOUT << " and " << NumTo-1 << " other values\n"; - DEBUG(for (unsigned i = 0, e = NumTo; i != e; ++i) - assert(N->getValueType(i) == To[i].getValueType() && + DEBUG(errs() << "\nReplacing.1 "; + N->dump(&DAG); + errs() << "\nWith: "; + To[0].getNode()->dump(&DAG); + errs() << " and " << NumTo-1 << " other values\n"; + for (unsigned i = 0, e = NumTo; i != e; ++i) + assert((!To[i].getNode() || + N->getValueType(i) == To[i].getValueType()) && "Cannot combine value to value of different type!")); WorkListRemover DeadNodes(*this); DAG.ReplaceAllUsesWith(N, To, &DeadNodes); @@ -606,9 +619,11 @@ bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) { // Replace the old value with the new one. ++NodesCombined; - DOUT << "\nReplacing.2 "; DEBUG(TLO.Old.getNode()->dump(&DAG)); - DOUT << "\nWith: "; DEBUG(TLO.New.getNode()->dump(&DAG)); - DOUT << '\n'; + DEBUG(errs() << "\nReplacing.2 "; + TLO.Old.getNode()->dump(&DAG); + errs() << "\nWith: "; + TLO.New.getNode()->dump(&DAG); + errs() << '\n'); CommitTargetLoweringOpt(TLO); return true; @@ -674,9 +689,11 @@ void DAGCombiner::Run(CombineLevel AtLevel) { RV.getNode()->getOpcode() != ISD::DELETED_NODE && "Node was deleted but visit returned new node!"); - DOUT << "\nReplacing.3 "; DEBUG(N->dump(&DAG)); - DOUT << "\nWith: "; DEBUG(RV.getNode()->dump(&DAG)); - DOUT << '\n'; + DEBUG(errs() << "\nReplacing.3 "; + N->dump(&DAG); + errs() << "\nWith: "; + RV.getNode()->dump(&DAG); + errs() << '\n'); WorkListRemover DeadNodes(*this); if (N->getNumValues() == RV.getNode()->getNumValues()) DAG.ReplaceAllUsesWith(N, RV.getNode(), &DeadNodes); @@ -794,7 +811,7 @@ SDValue DAGCombiner::combine(SDNode *N) { // Expose the DAG combiner to the target combiner impls. TargetLowering::DAGCombinerInfo - DagCombineInfo(DAG, Level == Unrestricted, false, this); + DagCombineInfo(DAG, !LegalTypes, !LegalOperations, false, this); RV = TLI.PerformDAGCombine(N, DagCombineInfo); } @@ -871,7 +888,7 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) { break; case ISD::TokenFactor: - if ((CombinerAA || Op.hasOneUse()) && + if (Op.hasOneUse() && std::find(TFs.begin(), TFs.end(), Op.getNode()) == TFs.end()) { // Queue up for processing. TFs.push_back(Op.getNode()); @@ -892,7 +909,7 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) { } } } - + SDValue Result; // If we've change things around then replace token factor. @@ -916,9 +933,14 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) { /// MERGE_VALUES can always be eliminated. SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) { WorkListRemover DeadNodes(*this); - for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) - DAG.ReplaceAllUsesOfValueWith(SDValue(N, i), N->getOperand(i), - &DeadNodes); + // Replacing results may cause a different MERGE_VALUES to suddenly + // be CSE'd with N, and carry its uses with it. Iterate until no + // uses remain, to ensure that the node can be safely deleted. + do { + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) + DAG.ReplaceAllUsesOfValueWith(SDValue(N, i), N->getOperand(i), + &DeadNodes); + } while (!N->use_empty()); removeFromWorkList(N); DAG.DeleteNode(N); return SDValue(N, 0); // Return N so it doesn't get rechecked! @@ -927,7 +949,7 @@ SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) { static SDValue combineShlAddConstant(DebugLoc DL, SDValue N0, SDValue N1, SelectionDAG &DAG) { - MVT VT = N0.getValueType(); + EVT VT = N0.getValueType(); SDValue N00 = N0.getOperand(0); SDValue N01 = N0.getOperand(1); ConstantSDNode *N01C = dyn_cast(N01); @@ -951,7 +973,7 @@ SDValue DAGCombiner::visitADD(SDNode *N) { SDValue N1 = N->getOperand(1); ConstantSDNode *N0C = dyn_cast(N0); ConstantSDNode *N1C = dyn_cast(N1); - MVT VT = N0.getValueType(); + EVT VT = N0.getValueType(); // fold vector ops if (VT.isVector()) { @@ -1074,7 +1096,7 @@ SDValue DAGCombiner::visitADDC(SDNode *N) { SDValue N1 = N->getOperand(1); ConstantSDNode *N0C = dyn_cast(N0); ConstantSDNode *N1C = dyn_cast(N1); - MVT VT = N0.getValueType(); + EVT VT = N0.getValueType(); // If the flag result is dead, turn this into an ADD. if (N->hasNUsesOfValue(0, 1)) @@ -1136,7 +1158,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { SDValue N1 = N->getOperand(1); ConstantSDNode *N0C = dyn_cast(N0.getNode()); ConstantSDNode *N1C = dyn_cast(N1.getNode()); - MVT VT = N0.getValueType(); + EVT VT = N0.getValueType(); // fold vector ops if (VT.isVector()) { @@ -1209,7 +1231,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { SDValue N1 = N->getOperand(1); ConstantSDNode *N0C = dyn_cast(N0); ConstantSDNode *N1C = dyn_cast(N1); - MVT VT = N0.getValueType(); + EVT VT = N0.getValueType(); // fold vector ops if (VT.isVector()) { @@ -1302,7 +1324,7 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { SDValue N1 = N->getOperand(1); ConstantSDNode *N0C = dyn_cast(N0.getNode()); ConstantSDNode *N1C = dyn_cast(N1.getNode()); - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); // fold vector ops if (VT.isVector()) { @@ -1389,7 +1411,7 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) { SDValue N1 = N->getOperand(1); ConstantSDNode *N0C = dyn_cast(N0.getNode()); ConstantSDNode *N1C = dyn_cast(N1.getNode()); - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); // fold vector ops if (VT.isVector()) { @@ -1409,7 +1431,7 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) { if (N1.getOpcode() == ISD::SHL) { if (ConstantSDNode *SHC = dyn_cast(N1.getOperand(0))) { if (SHC->getAPIntValue().isPowerOf2()) { - MVT ADDVT = N1.getOperand(1).getValueType(); + EVT ADDVT = N1.getOperand(1).getValueType(); SDValue Add = DAG.getNode(ISD::ADD, N->getDebugLoc(), ADDVT, N1.getOperand(1), DAG.getConstant(SHC->getAPIntValue() @@ -1441,7 +1463,7 @@ SDValue DAGCombiner::visitSREM(SDNode *N) { SDValue N1 = N->getOperand(1); ConstantSDNode *N0C = dyn_cast(N0); ConstantSDNode *N1C = dyn_cast(N1); - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); // fold (srem c1, c2) -> c1%c2 if (N0C && N1C && !N1C->isNullValue()) @@ -1483,7 +1505,7 @@ SDValue DAGCombiner::visitUREM(SDNode *N) { SDValue N1 = N->getOperand(1); ConstantSDNode *N0C = dyn_cast(N0); ConstantSDNode *N1C = dyn_cast(N1); - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); // fold (urem c1, c2) -> c1%c2 if (N0C && N1C && !N1C->isNullValue()) @@ -1535,7 +1557,7 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); ConstantSDNode *N1C = dyn_cast(N1); - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); // fold (mulhs x, 0) -> 0 if (N1C && N1C->isNullValue()) @@ -1556,7 +1578,7 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); ConstantSDNode *N1C = dyn_cast(N1); - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); // fold (mulhu x, 0) -> 0 if (N1C && N1C->isNullValue()) @@ -1659,7 +1681,7 @@ SDValue DAGCombiner::visitUDIVREM(SDNode *N) { /// two operands of the same opcode, try to simplify it. SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); - MVT VT = N0.getValueType(); + EVT VT = N0.getValueType(); assert(N0.getOpcode() == N1.getOpcode() && "Bad input!"); // For each of OP in AND/OR/XOR: @@ -1667,11 +1689,17 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { // fold (OP (sext x), (sext y)) -> (sext (OP x, y)) // fold (OP (aext x), (aext y)) -> (aext (OP x, y)) // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) (if trunc isn't free) + // + // do not sink logical op inside of a vector extend, since it may combine + // into a vsetcc. if ((N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND|| N0.getOpcode() == ISD::SIGN_EXTEND || (N0.getOpcode() == ISD::TRUNCATE && !TLI.isTruncateFree(N0.getOperand(0).getValueType(), VT))) && - N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()) { + !VT.isVector() && + N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() && + (!LegalOperations || + TLI.isOperationLegal(N->getOpcode(), N0.getOperand(0).getValueType()))) { SDValue ORNode = DAG.getNode(N->getOpcode(), N0.getDebugLoc(), N0.getOperand(0).getValueType(), N0.getOperand(0), N1.getOperand(0)); @@ -1703,7 +1731,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { SDValue LL, LR, RL, RR, CC0, CC1; ConstantSDNode *N0C = dyn_cast(N0); ConstantSDNode *N1C = dyn_cast(N1); - MVT VT = N1.getValueType(); + EVT VT = N1.getValueType(); unsigned BitWidth = VT.getSizeInBits(); // fold vector ops @@ -1814,18 +1842,18 @@ SDValue DAGCombiner::visitAND(SDNode *N) { // fold (zext_inreg (extload x)) -> (zextload x) if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) { LoadSDNode *LN0 = cast(N0); - MVT EVT = LN0->getMemoryVT(); + EVT MemVT = LN0->getMemoryVT(); // If we zero all the possible extended bits, then we can turn this into // a zextload if we are running before legalize or the operation is legal. unsigned BitWidth = N1.getValueSizeInBits(); if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth, - BitWidth - EVT.getSizeInBits())) && + BitWidth - MemVT.getSizeInBits())) && ((!LegalOperations && !LN0->isVolatile()) || - TLI.isLoadExtLegal(ISD::ZEXTLOAD, EVT))) { + TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) { SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N0.getDebugLoc(), VT, LN0->getChain(), LN0->getBasePtr(), LN0->getSrcValue(), - LN0->getSrcValueOffset(), EVT, + LN0->getSrcValueOffset(), MemVT, LN0->isVolatile(), LN0->getAlignment()); AddToWorkList(N); CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); @@ -1836,18 +1864,18 @@ SDValue DAGCombiner::visitAND(SDNode *N) { if (ISD::isSEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) { LoadSDNode *LN0 = cast(N0); - MVT EVT = LN0->getMemoryVT(); + EVT MemVT = LN0->getMemoryVT(); // If we zero all the possible extended bits, then we can turn this into // a zextload if we are running before legalize or the operation is legal. unsigned BitWidth = N1.getValueSizeInBits(); if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth, - BitWidth - EVT.getSizeInBits())) && + BitWidth - MemVT.getSizeInBits())) && ((!LegalOperations && !LN0->isVolatile()) || - TLI.isLoadExtLegal(ISD::ZEXTLOAD, EVT))) { + TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) { SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N0.getDebugLoc(), VT, LN0->getChain(), LN0->getBasePtr(), LN0->getSrcValue(), - LN0->getSrcValueOffset(), EVT, + LN0->getSrcValueOffset(), MemVT, LN0->isVolatile(), LN0->getAlignment()); AddToWorkList(N); CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); @@ -1863,24 +1891,24 @@ SDValue DAGCombiner::visitAND(SDNode *N) { LN0->isUnindexed() && N0.hasOneUse() && // Do not change the width of a volatile load. !LN0->isVolatile()) { - MVT EVT = MVT::Other; + EVT ExtVT = MVT::Other; uint32_t ActiveBits = N1C->getAPIntValue().getActiveBits(); if (ActiveBits > 0 && APIntOps::isMask(ActiveBits, N1C->getAPIntValue())) - EVT = MVT::getIntegerVT(ActiveBits); + ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits); - MVT LoadedVT = LN0->getMemoryVT(); + EVT LoadedVT = LN0->getMemoryVT(); // Do not generate loads of non-round integer types since these can // be expensive (and would be wrong if the type is not byte sized). - if (EVT != MVT::Other && LoadedVT.bitsGT(EVT) && EVT.isRound() && - (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, EVT))) { - MVT PtrType = N0.getOperand(1).getValueType(); + if (ExtVT != MVT::Other && LoadedVT.bitsGT(ExtVT) && ExtVT.isRound() && + (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT))) { + EVT PtrType = N0.getOperand(1).getValueType(); // For big endian targets, we need to add an offset to the pointer to // load the correct bytes. For little endian systems, we merely need to // read fewer bytes from the same pointer. - unsigned LVTStoreBytes = LoadedVT.getStoreSizeInBits()/8; - unsigned EVTStoreBytes = EVT.getStoreSizeInBits()/8; + unsigned LVTStoreBytes = LoadedVT.getStoreSize(); + unsigned EVTStoreBytes = ExtVT.getStoreSize(); unsigned PtrOff = LVTStoreBytes - EVTStoreBytes; unsigned Alignment = LN0->getAlignment(); SDValue NewPtr = LN0->getBasePtr(); @@ -1895,7 +1923,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { SDValue Load = DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), VT, LN0->getChain(), NewPtr, LN0->getSrcValue(), LN0->getSrcValueOffset(), - EVT, LN0->isVolatile(), Alignment); + ExtVT, LN0->isVolatile(), Alignment); AddToWorkList(N); CombineTo(N0.getNode(), Load, Load.getValue(1)); return SDValue(N, 0); // Return N so it doesn't get rechecked! @@ -1912,7 +1940,7 @@ SDValue DAGCombiner::visitOR(SDNode *N) { SDValue LL, LR, RL, RR, CC0, CC1; ConstantSDNode *N0C = dyn_cast(N0); ConstantSDNode *N1C = dyn_cast(N1); - MVT VT = N1.getValueType(); + EVT VT = N1.getValueType(); // fold vector ops if (VT.isVector()) { @@ -1921,8 +1949,10 @@ SDValue DAGCombiner::visitOR(SDNode *N) { } // fold (or x, undef) -> -1 - if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) - return DAG.getConstant(~0ULL, VT); + if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) { + EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT; + return DAG.getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), VT); + } // fold (or c1, c2) -> c1|c2 if (N0C && N1C) return DAG.FoldConstantArithmetic(ISD::OR, VT, N0C, N1C); @@ -2052,7 +2082,7 @@ static bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) { // a rot[lr]. SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, DebugLoc DL) { // Must be a legal type. Expanded 'n promoted things won't work with rotates. - MVT VT = LHS.getValueType(); + EVT VT = LHS.getValueType(); if (!TLI.isTypeLegal(VT)) return 0; // The target must have at least one rotate flavor. @@ -2213,7 +2243,7 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { SDValue LHS, RHS, CC; ConstantSDNode *N0C = dyn_cast(N0); ConstantSDNode *N1C = dyn_cast(N1); - MVT VT = N0.getValueType(); + EVT VT = N0.getValueType(); // fold vector ops if (VT.isVector()) { @@ -2252,8 +2282,7 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { if (!LegalOperations || TLI.isCondCodeLegal(NotCC, LHS.getValueType())) { switch (N0.getOpcode()) { default: - assert(0 && "Unhandled SetCC Equivalent!"); - abort(); + llvm_unreachable("Unhandled SetCC Equivalent!"); case ISD::SETCC: return DAG.getSetCC(N->getDebugLoc(), VT, LHS, RHS, NotCC); case ISD::SELECT_CC: @@ -2382,7 +2411,7 @@ SDValue DAGCombiner::visitShiftByConstant(SDNode *N, unsigned Amt) { !isa(BinOpLHSVal->getOperand(1))) return SDValue(); - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); // If this is a signed shift right, and the high bit is modified by the // logical operation, do not perform the transformation. The highBitSet @@ -2412,8 +2441,8 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { SDValue N1 = N->getOperand(1); ConstantSDNode *N0C = dyn_cast(N0); ConstantSDNode *N1C = dyn_cast(N1); - MVT VT = N0.getValueType(); - unsigned OpSizeInBits = VT.getSizeInBits(); + EVT VT = N0.getValueType(); + unsigned OpSizeInBits = VT.getScalarType().getSizeInBits(); // fold (shl c1, c2) -> c1< (shl x, (and (trunc y), (trunc c))). if (N1.getOpcode() == ISD::TRUNCATE && @@ -2437,7 +2466,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { N1.hasOneUse() && N1.getOperand(0).hasOneUse()) { SDValue N101 = N1.getOperand(0).getOperand(1); if (ConstantSDNode *N101C = dyn_cast(N101)) { - MVT TruncVT = N1.getValueType(); + EVT TruncVT = N1.getValueType(); SDValue N100 = N1.getOperand(0).getOperand(0); APInt TruncC = N101C->getAPIntValue(); TruncC.trunc(TruncVT.getSizeInBits()); @@ -2468,20 +2497,33 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { if (N1C && N0.getOpcode() == ISD::SRL && N0.getOperand(1).getOpcode() == ISD::Constant) { uint64_t c1 = cast(N0.getOperand(1))->getZExtValue(); - uint64_t c2 = N1C->getZExtValue(); - SDValue Mask = DAG.getNode(ISD::AND, N0.getDebugLoc(), VT, N0.getOperand(0), - DAG.getConstant(~0ULL << c1, VT)); - if (c2 > c1) - return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, Mask, - DAG.getConstant(c2-c1, N1.getValueType())); - else - return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, Mask, - DAG.getConstant(c1-c2, N1.getValueType())); + if (c1 < VT.getSizeInBits()) { + uint64_t c2 = N1C->getZExtValue(); + SDValue HiBitsMask = + DAG.getConstant(APInt::getHighBitsSet(VT.getSizeInBits(), + VT.getSizeInBits() - c1), + VT); + SDValue Mask = DAG.getNode(ISD::AND, N0.getDebugLoc(), VT, + N0.getOperand(0), + HiBitsMask); + if (c2 > c1) + return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, Mask, + DAG.getConstant(c2-c1, N1.getValueType())); + else + return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, Mask, + DAG.getConstant(c1-c2, N1.getValueType())); + } } // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1)) - if (N1C && N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1)) + if (N1C && N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1)) { + SDValue HiBitsMask = + DAG.getConstant(APInt::getHighBitsSet(VT.getSizeInBits(), + VT.getSizeInBits() - + N1C->getZExtValue()), + VT); return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0.getOperand(0), - DAG.getConstant(~0ULL << N1C->getZExtValue(), VT)); + HiBitsMask); + } return N1C ? visitShiftByConstant(N, N1C->getZExtValue()) : SDValue(); } @@ -2491,7 +2533,8 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { SDValue N1 = N->getOperand(1); ConstantSDNode *N0C = dyn_cast(N0); ConstantSDNode *N1C = dyn_cast(N1); - MVT VT = N0.getValueType(); + EVT VT = N0.getValueType(); + unsigned OpSizeInBits = VT.getScalarType().getSizeInBits(); // fold (sra c1, c2) -> (sra c1, c2) if (N0C && N1C) @@ -2503,7 +2546,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { if (N0C && N0C->isAllOnesValue()) return N0; // fold (sra x, (setge c, size(x))) -> undef - if (N1C && N1C->getZExtValue() >= VT.getSizeInBits()) + if (N1C && N1C->getZExtValue() >= OpSizeInBits) return DAG.getUNDEF(VT); // fold (sra x, 0) -> x if (N1C && N1C->isNullValue()) @@ -2511,8 +2554,8 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports // sext_inreg. if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) { - unsigned LowBits = VT.getSizeInBits() - (unsigned)N1C->getZExtValue(); - MVT EVT = MVT::getIntegerVT(LowBits); + unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue(); + EVT EVT = EVT::getIntegerVT(*DAG.getContext(), LowBits); if ((!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, EVT))) return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT, N0.getOperand(0), DAG.getValueType(EVT)); @@ -2522,7 +2565,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { if (N1C && N0.getOpcode() == ISD::SRA) { if (ConstantSDNode *C1 = dyn_cast(N0.getOperand(1))) { unsigned Sum = N1C->getZExtValue() + C1->getZExtValue(); - if (Sum >= VT.getSizeInBits()) Sum = VT.getSizeInBits()-1; + if (Sum >= OpSizeInBits) Sum = OpSizeInBits-1; return DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, N0.getOperand(0), DAG.getConstant(Sum, N1C->getValueType(0))); } @@ -2538,17 +2581,16 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { const ConstantSDNode *N01C = dyn_cast(N0.getOperand(1)); if (N01C && N1C) { // Determine what the truncate's result bitsize and type would be. - unsigned VTValSize = VT.getSizeInBits(); - MVT TruncVT = - MVT::getIntegerVT(VTValSize - N1C->getZExtValue()); + EVT TruncVT = + EVT::getIntegerVT(*DAG.getContext(), OpSizeInBits - N1C->getZExtValue()); // Determine the residual right-shift amount. - unsigned ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue(); + signed ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue(); // If the shift is not a no-op (in which case this should be just a sign // extend already), the truncated to type is legal, sign_extend is legal // on that type, and the the truncate to that type is both legal and free, // perform the transform. - if (ShiftAmt && + if ((ShiftAmt > 0) && TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) && TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) && TLI.isTruncateFree(VT, TruncVT)) { @@ -2570,10 +2612,10 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { N1.hasOneUse() && N1.getOperand(0).hasOneUse()) { SDValue N101 = N1.getOperand(0).getOperand(1); if (ConstantSDNode *N101C = dyn_cast(N101)) { - MVT TruncVT = N1.getValueType(); + EVT TruncVT = N1.getValueType(); SDValue N100 = N1.getOperand(0).getOperand(0); APInt TruncC = N101C->getAPIntValue(); - TruncC.trunc(TruncVT.getSizeInBits()); + TruncC.trunc(TruncVT.getScalarType().getSizeInBits()); return DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, N0, DAG.getNode(ISD::AND, N->getDebugLoc(), TruncVT, @@ -2601,8 +2643,8 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { SDValue N1 = N->getOperand(1); ConstantSDNode *N0C = dyn_cast(N0); ConstantSDNode *N1C = dyn_cast(N1); - MVT VT = N0.getValueType(); - unsigned OpSizeInBits = VT.getSizeInBits(); + EVT VT = N0.getValueType(); + unsigned OpSizeInBits = VT.getScalarType().getSizeInBits(); // fold (srl c1, c2) -> c1 >>u c2 if (N0C && N1C) @@ -2635,7 +2677,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { // fold (srl (anyextend x), c) -> (anyextend (srl x, c)) if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) { // Shifting in all undef bits? - MVT SmallVT = N0.getOperand(0).getValueType(); + EVT SmallVT = N0.getOperand(0).getValueType(); if (N1C->getZExtValue() >= SmallVT.getSizeInBits()) return DAG.getUNDEF(VT); @@ -2694,7 +2736,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { N1.hasOneUse() && N1.getOperand(0).hasOneUse()) { SDValue N101 = N1.getOperand(0).getOperand(1); if (ConstantSDNode *N101C = dyn_cast(N101)) { - MVT TruncVT = N1.getValueType(); + EVT TruncVT = N1.getValueType(); SDValue N100 = N1.getOperand(0).getOperand(0); APInt TruncC = N101C->getAPIntValue(); TruncC.trunc(TruncVT.getSizeInBits()); @@ -2718,7 +2760,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { SDValue DAGCombiner::visitCTLZ(SDNode *N) { SDValue N0 = N->getOperand(0); - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); // fold (ctlz c1) -> c2 if (isa(N0)) @@ -2728,7 +2770,7 @@ SDValue DAGCombiner::visitCTLZ(SDNode *N) { SDValue DAGCombiner::visitCTTZ(SDNode *N) { SDValue N0 = N->getOperand(0); - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); // fold (cttz c1) -> c2 if (isa(N0)) @@ -2738,7 +2780,7 @@ SDValue DAGCombiner::visitCTTZ(SDNode *N) { SDValue DAGCombiner::visitCTPOP(SDNode *N) { SDValue N0 = N->getOperand(0); - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); // fold (ctpop c1) -> c2 if (isa(N0)) @@ -2753,8 +2795,8 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { ConstantSDNode *N0C = dyn_cast(N0); ConstantSDNode *N1C = dyn_cast(N1); ConstantSDNode *N2C = dyn_cast(N2); - MVT VT = N->getValueType(0); - MVT VT0 = N0.getValueType(); + EVT VT = N->getValueType(0); + EVT VT0 = N0.getValueType(); // fold (select C, X, X) -> X if (N1 == N2) @@ -2819,7 +2861,8 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { // Check against MVT::Other for SELECT_CC, which is a workaround for targets // having to say they don't support SELECT_CC on every type the DAG knows // about, since there is no way to mark an opcode illegal at all value types - if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, MVT::Other)) + if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, MVT::Other) && + TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT)) return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), VT, N0.getOperand(0), N0.getOperand(1), N1, N2, N0.getOperand(2)); @@ -2874,7 +2917,7 @@ SDValue DAGCombiner::visitSETCC(SDNode *N) { } // ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this: -// "fold ({s|z}ext (load x)) -> ({s|z}ext (truncate ({s|z}extload x)))" +// "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))" // transformation. Returns true if extension are possible and the above // mentioned transformation is profitable. static bool ExtendUsesToFormExtLoad(SDNode *N, SDValue N0, @@ -2889,8 +2932,10 @@ static bool ExtendUsesToFormExtLoad(SDNode *N, SDValue N0, SDNode *User = *UI; if (User == N) continue; + if (UI.getUse().getResNo() != N0.getResNo()) + continue; // FIXME: Only extend SETCC N, N and SETCC N, c for now. - if (User->getOpcode() == ISD::SETCC) { + if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) { ISD::CondCode CC = cast(User->getOperand(2))->get(); if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC)) // Sign bits will be lost after a zext. @@ -2906,32 +2951,25 @@ static bool ExtendUsesToFormExtLoad(SDNode *N, SDValue N0, } if (Add) ExtendNodes.push_back(User); - } else { - for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) { - SDValue UseOp = User->getOperand(i); - if (UseOp == N0) { - // If truncate from extended type to original load type is free - // on this target, then it's ok to extend a CopyToReg. - if (isTruncFree && User->getOpcode() == ISD::CopyToReg) - HasCopyToRegUses = true; - else - return false; - } - } + continue; } + // If truncates aren't free and there are users we can't + // extend, it isn't worthwhile. + if (!isTruncFree) + return false; + // Remember if this value is live-out. + if (User->getOpcode() == ISD::CopyToReg) + HasCopyToRegUses = true; } if (HasCopyToRegUses) { bool BothLiveOut = false; for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); UI != UE; ++UI) { - SDNode *User = *UI; - for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) { - SDValue UseOp = User->getOperand(i); - if (UseOp.getNode() == N && UseOp.getResNo() == 0) { - BothLiveOut = true; - break; - } + SDUse &Use = UI.getUse(); + if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) { + BothLiveOut = true; + break; } } if (BothLiveOut) @@ -2944,7 +2982,7 @@ static bool ExtendUsesToFormExtLoad(SDNode *N, SDValue N0, SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { SDValue N0 = N->getOperand(0); - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); // fold (sext c1) -> c1 if (isa(N0)) @@ -2963,7 +3001,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { if (NarrowLoad.getNode()) { if (NarrowLoad.getNode() != N0.getNode()) CombineTo(N0.getNode(), NarrowLoad); - return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, NarrowLoad); + return SDValue(N, 0); // Return N so it doesn't get rechecked! } // See if the value being truncated is already sign extended. If so, just @@ -2999,7 +3037,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { else if (Op.getValueType().bitsGT(VT)) Op = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), VT, Op); return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT, Op, - DAG.getValueType(N0.getValueType())); + DAG.getValueType(N0.getValueType().getScalarType())); } } @@ -3013,8 +3051,8 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::SIGN_EXTEND, SetCCs, TLI); if (DoXform) { LoadSDNode *LN0 = cast(N0); - SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), - VT, LN0->getChain(), + SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT, + LN0->getChain(), LN0->getBasePtr(), LN0->getSrcValue(), LN0->getSrcValueOffset(), N0.getValueType(), @@ -3034,8 +3072,8 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { if (SOp == Trunc) Ops.push_back(ExtLoad); else - Ops.push_back(DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), - VT, SOp)); + Ops.push_back(DAG.getNode(ISD::SIGN_EXTEND, + N->getDebugLoc(), VT, SOp)); } Ops.push_back(SetCC->getOperand(2)); @@ -3053,13 +3091,13 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { if ((ISD::isSEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) && ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) { LoadSDNode *LN0 = cast(N0); - MVT EVT = LN0->getMemoryVT(); + EVT MemVT = LN0->getMemoryVT(); if ((!LegalOperations && !LN0->isVolatile()) || - TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT)) { + TLI.isLoadExtLegal(ISD::SEXTLOAD, MemVT)) { SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT, LN0->getChain(), LN0->getBasePtr(), LN0->getSrcValue(), - LN0->getSrcValueOffset(), EVT, + LN0->getSrcValueOffset(), MemVT, LN0->isVolatile(), LN0->getAlignment()); CombineTo(N, ExtLoad); CombineTo(N0.getNode(), @@ -3070,14 +3108,34 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { } } - // sext(setcc x, y, cc) -> (select_cc x, y, -1, 0, cc) if (N0.getOpcode() == ISD::SETCC) { + // sext(setcc) -> sext_in_reg(vsetcc) for vectors. + if (VT.isVector() && + // We know that the # elements of the results is the same as the + // # elements of the compare (and the # elements of the compare result + // for that matter). Check to see that they are the same size. If so, + // we know that the element size of the sext'd result matches the + // element size of the compare operands. + VT.getSizeInBits() == N0.getOperand(0).getValueType().getSizeInBits() && + + // Only do this before legalize for now. + !LegalOperations) { + return DAG.getVSetCC(N->getDebugLoc(), VT, N0.getOperand(0), + N0.getOperand(1), + cast(N0.getOperand(2))->get()); + } + + // sext(setcc x, y, cc) -> (select_cc x, y, -1, 0, cc) + SDValue NegOne = + DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), VT); SDValue SCC = SimplifySelectCC(N->getDebugLoc(), N0.getOperand(0), N0.getOperand(1), - DAG.getConstant(~0ULL, VT), DAG.getConstant(0, VT), + NegOne, DAG.getConstant(0, VT), cast(N0.getOperand(2))->get(), true); if (SCC.getNode()) return SCC; } + + // fold (sext x) -> (zext x) if the sign bit is known zero. if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) && @@ -3089,7 +3147,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { SDValue N0 = N->getOperand(0); - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); // fold (zext c1) -> c1 if (isa(N0)) @@ -3120,7 +3178,8 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { } else if (Op.getValueType().bitsGT(VT)) { Op = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Op); } - return DAG.getZeroExtendInReg(Op, N->getDebugLoc(), N0.getValueType()); + return DAG.getZeroExtendInReg(Op, N->getDebugLoc(), + N0.getValueType().getScalarType()); } // Fold (zext (and (trunc x), cst)) -> (and x, cst), @@ -3143,6 +3202,19 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { X, DAG.getConstant(Mask, VT)); } + // Fold (zext (and x, cst)) -> (and (zext x), cst) + if (N0.getOpcode() == ISD::AND && + N0.getOperand(1).getOpcode() == ISD::Constant && + N0.getOperand(0).getOpcode() != ISD::TRUNCATE && + N0.getOperand(0).hasOneUse()) { + APInt Mask = cast(N0.getOperand(1))->getAPIntValue(); + Mask.zext(VT.getSizeInBits()); + return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, + DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, + N0.getOperand(0)), + DAG.getConstant(Mask, VT)); + } + // fold (zext (load x)) -> (zext (truncate (zextload x))) if (ISD::isNON_EXTLoad(N0.getNode()) && ((!LegalOperations && !cast(N0)->isVolatile()) || @@ -3193,13 +3265,13 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { if ((ISD::isZEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) && ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) { LoadSDNode *LN0 = cast(N0); - MVT EVT = LN0->getMemoryVT(); + EVT MemVT = LN0->getMemoryVT(); if ((!LegalOperations && !LN0->isVolatile()) || - TLI.isLoadExtLegal(ISD::ZEXTLOAD, EVT)) { + TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT)) { SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N->getDebugLoc(), VT, LN0->getChain(), LN0->getBasePtr(), LN0->getSrcValue(), - LN0->getSrcValueOffset(), EVT, + LN0->getSrcValueOffset(), MemVT, LN0->isVolatile(), LN0->getAlignment()); CombineTo(N, ExtLoad); CombineTo(N0.getNode(), @@ -3219,12 +3291,22 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { if (SCC.getNode()) return SCC; } + // (zext (shl (zext x), y)) -> (shl (zext x), (zext y)) + if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) && + N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND && + N0.hasOneUse()) { + DebugLoc dl = N->getDebugLoc(); + return DAG.getNode(N0.getOpcode(), dl, VT, + DAG.getNode(ISD::ZERO_EXTEND, dl, VT, N0.getOperand(0)), + DAG.getNode(ISD::ZERO_EXTEND, dl, VT, N0.getOperand(1))); + } + return SDValue(); } SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { SDValue N0 = N->getOperand(0); - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); // fold (aext c1) -> c1 if (isa(N0)) @@ -3278,26 +3360,48 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { } // fold (aext (load x)) -> (aext (truncate (extload x))) - if (ISD::isNON_EXTLoad(N0.getNode()) && N0.hasOneUse() && + if (ISD::isNON_EXTLoad(N0.getNode()) && ((!LegalOperations && !cast(N0)->isVolatile()) || TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType()))) { - LoadSDNode *LN0 = cast(N0); - SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, N->getDebugLoc(), VT, - LN0->getChain(), - LN0->getBasePtr(), LN0->getSrcValue(), - LN0->getSrcValueOffset(), - N0.getValueType(), - LN0->isVolatile(), LN0->getAlignment()); - CombineTo(N, ExtLoad); - // Redirect any chain users to the new load. - DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), - SDValue(ExtLoad.getNode(), 1)); - // If any node needs the original loaded value, recompute it. - if (!LN0->use_empty()) - CombineTo(LN0, DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), - N0.getValueType(), ExtLoad), - ExtLoad.getValue(1)); - return SDValue(N, 0); // Return N so it doesn't get rechecked! + bool DoXform = true; + SmallVector SetCCs; + if (!N0.hasOneUse()) + DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ANY_EXTEND, SetCCs, TLI); + if (DoXform) { + LoadSDNode *LN0 = cast(N0); + SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, N->getDebugLoc(), VT, + LN0->getChain(), + LN0->getBasePtr(), LN0->getSrcValue(), + LN0->getSrcValueOffset(), + N0.getValueType(), + LN0->isVolatile(), LN0->getAlignment()); + CombineTo(N, ExtLoad); + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), + N0.getValueType(), ExtLoad); + CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1)); + + // Extend SetCC uses if necessary. + for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) { + SDNode *SetCC = SetCCs[i]; + SmallVector Ops; + + for (unsigned j = 0; j != 2; ++j) { + SDValue SOp = SetCC->getOperand(j); + if (SOp == Trunc) + Ops.push_back(ExtLoad); + else + Ops.push_back(DAG.getNode(ISD::ANY_EXTEND, + N->getDebugLoc(), VT, SOp)); + } + + Ops.push_back(SetCC->getOperand(2)); + CombineTo(SetCC, DAG.getNode(ISD::SETCC, N->getDebugLoc(), + SetCC->getValueType(0), + &Ops[0], Ops.size())); + } + + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } } // fold (aext (zextload x)) -> (aext (truncate (zextload x))) @@ -3307,11 +3411,11 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { !ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) { LoadSDNode *LN0 = cast(N0); - MVT EVT = LN0->getMemoryVT(); + EVT MemVT = LN0->getMemoryVT(); SDValue ExtLoad = DAG.getExtLoad(LN0->getExtensionType(), N->getDebugLoc(), VT, LN0->getChain(), LN0->getBasePtr(), LN0->getSrcValue(), - LN0->getSrcValueOffset(), EVT, + LN0->getSrcValueOffset(), MemVT, LN0->isVolatile(), LN0->getAlignment()); CombineTo(N, ExtLoad); CombineTo(N0.getNode(), @@ -3377,8 +3481,8 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { unsigned Opc = N->getOpcode(); ISD::LoadExtType ExtType = ISD::NON_EXTLOAD; SDValue N0 = N->getOperand(0); - MVT VT = N->getValueType(0); - MVT EVT = VT; + EVT VT = N->getValueType(0); + EVT ExtVT = VT; // This transformation isn't valid for vector loads. if (VT.isVector()) @@ -3388,20 +3492,21 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { // extended to VT. if (Opc == ISD::SIGN_EXTEND_INREG) { ExtType = ISD::SEXTLOAD; - EVT = cast(N->getOperand(1))->getVT(); - if (LegalOperations && !TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT)) + ExtVT = cast(N->getOperand(1))->getVT(); + if (LegalOperations && !TLI.isLoadExtLegal(ISD::SEXTLOAD, ExtVT)) return SDValue(); } - unsigned EVTBits = EVT.getSizeInBits(); + unsigned EVTBits = ExtVT.getSizeInBits(); unsigned ShAmt = 0; - if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) { + if (N0.getOpcode() == ISD::SRL && N0.hasOneUse() && ExtVT.isRound()) { if (ConstantSDNode *N01 = dyn_cast(N0.getOperand(1))) { ShAmt = N01->getZExtValue(); // Is the shift amount a multiple of size of VT? if ((ShAmt & (EVTBits-1)) == 0) { N0 = N0.getOperand(0); - if (N0.getValueType().getSizeInBits() <= EVTBits) + // Is the load width a multiple of size of VT? + if ((N0.getValueType().getSizeInBits() & (EVTBits-1)) != 0) return SDValue(); } } @@ -3409,18 +3514,18 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { // Do not generate loads of non-round integer types since these can // be expensive (and would be wrong if the type is not byte sized). - if (isa(N0) && N0.hasOneUse() && EVT.isRound() && + if (isa(N0) && N0.hasOneUse() && ExtVT.isRound() && cast(N0)->getMemoryVT().getSizeInBits() > EVTBits && // Do not change the width of a volatile load. !cast(N0)->isVolatile()) { LoadSDNode *LN0 = cast(N0); - MVT PtrType = N0.getOperand(1).getValueType(); + EVT PtrType = N0.getOperand(1).getValueType(); // For big endian targets, we need to adjust the offset to the pointer to // load the correct bytes. if (TLI.isBigEndian()) { unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits(); - unsigned EVTStoreBits = EVT.getStoreSizeInBits(); + unsigned EVTStoreBits = ExtVT.getStoreSizeInBits(); ShAmt = LVTStoreBits - EVTStoreBits - ShAmt; } @@ -3437,7 +3542,7 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { LN0->isVolatile(), NewAlign) : DAG.getExtLoad(ExtType, N0.getDebugLoc(), VT, LN0->getChain(), NewPtr, LN0->getSrcValue(), LN0->getSrcValueOffset() + PtrOff, - EVT, LN0->isVolatile(), NewAlign); + ExtVT, LN0->isVolatile(), NewAlign); // Replace the old load's chain with the new load's chain. WorkListRemover DeadNodes(*this); @@ -3454,9 +3559,9 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - MVT VT = N->getValueType(0); - MVT EVT = cast(N1)->getVT(); - unsigned VTBits = VT.getSizeInBits(); + EVT VT = N->getValueType(0); + EVT EVT = cast(N1)->getVT(); + unsigned VTBits = VT.getScalarType().getSizeInBits(); unsigned EVTBits = EVT.getSizeInBits(); // fold (sext_in_reg c1) -> c1 @@ -3464,7 +3569,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT, N0, N1); // If the input is already sign extended, just drop the extension. - if (DAG.ComputeNumSignBits(N0) >= VT.getSizeInBits()-EVTBits+1) + if (DAG.ComputeNumSignBits(N0) >= VTBits-EVTBits+1) return N0; // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2 @@ -3479,7 +3584,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { // if x is small enough. if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) { SDValue N00 = N0.getOperand(0); - if (N00.getValueType().getSizeInBits() < EVTBits) + if (N00.getValueType().getScalarType().getSizeInBits() < EVTBits) return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, N00, N1); } @@ -3503,11 +3608,11 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above. if (N0.getOpcode() == ISD::SRL) { if (ConstantSDNode *ShAmt = dyn_cast(N0.getOperand(1))) - if (ShAmt->getZExtValue()+EVTBits <= VT.getSizeInBits()) { + if (ShAmt->getZExtValue()+EVTBits <= VTBits) { // We can turn this into an SRA iff the input to the SRL is already sign // extended enough. unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0)); - if (VT.getSizeInBits()-(ShAmt->getZExtValue()+EVTBits) < InSignBits) + if (VTBits-(ShAmt->getZExtValue()+EVTBits) < InSignBits) return DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, N0.getOperand(0), N0.getOperand(1)); } @@ -3550,7 +3655,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { SDValue N0 = N->getOperand(0); - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); // noop truncate if (N0.getValueType() == N->getValueType(0)) @@ -3600,33 +3705,31 @@ static SDNode *getBuildPairElt(SDNode *N, unsigned i) { /// CombineConsecutiveLoads - build_pair (load, load) -> load /// if load locations are consecutive. -SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, MVT VT) { +SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) { assert(N->getOpcode() == ISD::BUILD_PAIR); - SDNode *LD1 = getBuildPairElt(N, 0); - if (!ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse()) + LoadSDNode *LD1 = dyn_cast(getBuildPairElt(N, 0)); + LoadSDNode *LD2 = dyn_cast(getBuildPairElt(N, 1)); + if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse()) return SDValue(); - MVT LD1VT = LD1->getValueType(0); - SDNode *LD2 = getBuildPairElt(N, 1); - const MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); + EVT LD1VT = LD1->getValueType(0); if (ISD::isNON_EXTLoad(LD2) && LD2->hasOneUse() && // If both are volatile this would reduce the number of volatile loads. // If one is volatile it might be ok, but play conservative and bail out. - !cast(LD1)->isVolatile() && - !cast(LD2)->isVolatile() && - TLI.isConsecutiveLoad(LD2, LD1, LD1VT.getSizeInBits()/8, 1, MFI)) { - LoadSDNode *LD = cast(LD1); - unsigned Align = LD->getAlignment(); + !LD1->isVolatile() && + !LD2->isVolatile() && + DAG.isConsecutiveLoad(LD2, LD1, LD1VT.getSizeInBits()/8, 1)) { + unsigned Align = LD1->getAlignment(); unsigned NewAlign = TLI.getTargetData()-> - getABITypeAlignment(VT.getTypeForMVT()); + getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext())); if (NewAlign <= Align && (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT))) - return DAG.getLoad(VT, N->getDebugLoc(), LD->getChain(), LD->getBasePtr(), - LD->getSrcValue(), LD->getSrcValueOffset(), - false, Align); + return DAG.getLoad(VT, N->getDebugLoc(), LD1->getChain(), + LD1->getBasePtr(), LD1->getSrcValue(), + LD1->getSrcValueOffset(), false, Align); } return SDValue(); @@ -3634,7 +3737,7 @@ SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, MVT VT) { SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) { SDValue N0 = N->getOperand(0); - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); // If the input is a BUILD_VECTOR with all constant elements, fold this now. // Only do this before legalize, since afterward the target may be depending @@ -3652,7 +3755,7 @@ SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) { break; } - MVT DestEltVT = N->getValueType(0).getVectorElementType(); + EVT DestEltVT = N->getValueType(0).getVectorElementType(); assert(!DestEltVT.isVector() && "Element type of vector ValueType must not be vector!"); if (isSimple) @@ -3662,7 +3765,18 @@ SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) { // If the input is a constant, let getNode fold it. if (isa(N0) || isa(N0)) { SDValue Res = DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), VT, N0); - if (Res.getNode() != N) return Res; + if (Res.getNode() != N) { + if (!LegalOperations || + TLI.isOperationLegal(Res.getNode()->getOpcode(), VT)) + return Res; + + // Folding it resulted in an illegal node, and it's too late to + // do that. Clean up the old node and forego the transformation. + // Ideally this won't happen very often, because instcombine + // and the earlier dagcombine runs (where illegal nodes are + // permitted) should have folded most of them already. + DAG.DeleteNode(Res.getNode()); + } } // (conv (conv x, t1), t2) -> (conv x, t2) @@ -3678,7 +3792,7 @@ SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) { (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT))) { LoadSDNode *LN0 = cast(N0); unsigned Align = TLI.getTargetData()-> - getABITypeAlignment(VT.getTypeForMVT()); + getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext())); unsigned OrigAlign = LN0->getAlignment(); if (Align <= OrigAlign) { @@ -3721,7 +3835,7 @@ SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) { isa(N0.getOperand(0)) && VT.isInteger() && !VT.isVector()) { unsigned OrigXWidth = N0.getOperand(1).getValueType().getSizeInBits(); - MVT IntXVT = MVT::getIntegerVT(OrigXWidth); + EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth); if (TLI.isTypeLegal(IntXVT) || !LegalTypes) { SDValue X = DAG.getNode(ISD::BIT_CONVERT, N0.getDebugLoc(), IntXVT, N0.getOperand(1)); @@ -3769,7 +3883,7 @@ SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) { } SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) { - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); return CombineConsecutiveLoads(N, VT); } @@ -3777,8 +3891,8 @@ SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) { /// node with Constant, ConstantFP or Undef operands. DstEltVT indicates the /// destination element value type. SDValue DAGCombiner:: -ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, MVT DstEltVT) { - MVT SrcEltVT = BV->getOperand(0).getValueType(); +ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { + EVT SrcEltVT = BV->getValueType(0).getVectorElementType(); // If this is already the right type, we're done. if (SrcEltVT == DstEltVT) return SDValue(BV, 0); @@ -3791,11 +3905,16 @@ ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, MVT DstEltVT) { if (SrcBitSize == DstBitSize) { SmallVector Ops; for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) { + SDValue Op = BV->getOperand(i); + // If the vector element type is not legal, the BUILD_VECTOR operands + // are promoted and implicitly truncated. Make that explicit here. + if (Op.getValueType() != SrcEltVT) + Op = DAG.getNode(ISD::TRUNCATE, BV->getDebugLoc(), SrcEltVT, Op); Ops.push_back(DAG.getNode(ISD::BIT_CONVERT, BV->getDebugLoc(), - DstEltVT, BV->getOperand(i))); + DstEltVT, Op)); AddToWorkList(Ops.back().getNode()); } - MVT VT = MVT::getVectorVT(DstEltVT, + EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, BV->getValueType(0).getVectorNumElements()); return DAG.getNode(ISD::BUILD_VECTOR, BV->getDebugLoc(), VT, &Ops[0], Ops.size()); @@ -3808,7 +3927,7 @@ ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, MVT DstEltVT) { // Convert the input float vector to a int vector where the elements are the // same sizes. assert((SrcEltVT == MVT::f32 || SrcEltVT == MVT::f64) && "Unknown FP VT!"); - MVT IntVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits()); + EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits()); BV = ConstantFoldBIT_CONVERTofBUILD_VECTOR(BV, IntVT).getNode(); SrcEltVT = IntVT; } @@ -3817,7 +3936,7 @@ ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, MVT DstEltVT) { // convert to integer first, then to FP of the right size. if (DstEltVT.isFloatingPoint()) { assert((DstEltVT == MVT::f32 || DstEltVT == MVT::f64) && "Unknown FP VT!"); - MVT TmpVT = MVT::getIntegerVT(DstEltVT.getSizeInBits()); + EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits()); SDNode *Tmp = ConstantFoldBIT_CONVERTofBUILD_VECTOR(BV, TmpVT).getNode(); // Next, convert to FP elements of the same size. @@ -3843,8 +3962,8 @@ ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, MVT DstEltVT) { if (Op.getOpcode() == ISD::UNDEF) continue; EltIsUndef = false; - NewBits |= - APInt(cast(Op)->getAPIntValue()).zext(DstBitSize); + NewBits |= (APInt(cast(Op)->getAPIntValue()). + zextOrTrunc(SrcBitSize).zext(DstBitSize)); } if (EltIsUndef) @@ -3853,7 +3972,7 @@ ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, MVT DstEltVT) { Ops.push_back(DAG.getConstant(NewBits, DstEltVT)); } - MVT VT = MVT::getVectorVT(DstEltVT, Ops.size()); + EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size()); return DAG.getNode(ISD::BUILD_VECTOR, BV->getDebugLoc(), VT, &Ops[0], Ops.size()); } @@ -3862,7 +3981,8 @@ ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, MVT DstEltVT) { // turns into multiple outputs. bool isS2V = ISD::isScalarToVector(BV); unsigned NumOutputsPerInput = SrcBitSize/DstBitSize; - MVT VT = MVT::getVectorVT(DstEltVT, NumOutputsPerInput*BV->getNumOperands()); + EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, + NumOutputsPerInput*BV->getNumOperands()); SmallVector Ops; for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) { @@ -3872,7 +3992,8 @@ ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, MVT DstEltVT) { continue; } - APInt OpVal = cast(BV->getOperand(i))->getAPIntValue(); + APInt OpVal = APInt(cast(BV->getOperand(i))-> + getAPIntValue()).zextOrTrunc(SrcBitSize); for (unsigned j = 0; j != NumOutputsPerInput; ++j) { APInt ThisVal = APInt(OpVal).trunc(DstBitSize); @@ -3898,7 +4019,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { SDValue N1 = N->getOperand(1); ConstantFPSDNode *N0CFP = dyn_cast(N0); ConstantFPSDNode *N1CFP = dyn_cast(N1); - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); // fold vector ops if (VT.isVector()) { @@ -3939,7 +4060,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { SDValue N1 = N->getOperand(1); ConstantFPSDNode *N0CFP = dyn_cast(N0); ConstantFPSDNode *N1CFP = dyn_cast(N1); - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); // fold vector ops if (VT.isVector()) { @@ -3973,7 +4094,7 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { SDValue N1 = N->getOperand(1); ConstantFPSDNode *N0CFP = dyn_cast(N0); ConstantFPSDNode *N1CFP = dyn_cast(N1); - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); // fold vector ops if (VT.isVector()) { @@ -3990,10 +4111,13 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { // fold (fmul A, 0) -> 0 if (UnsafeFPMath && N1CFP && N1CFP->getValueAPF().isZero()) return N1; + // fold (fmul A, 0) -> 0, vector edition. + if (UnsafeFPMath && ISD::isBuildVectorAllZeros(N1.getNode())) + return N1; // fold (fmul X, 2.0) -> (fadd X, X) if (N1CFP && N1CFP->isExactlyValue(+2.0)) return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0, N0); - // fold (fmul X, (fneg 1.0)) -> (fneg X) + // fold (fmul X, -1.0) -> (fneg X) if (N1CFP && N1CFP->isExactlyValue(-1.0)) if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) return DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT, N0); @@ -4025,7 +4149,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { SDValue N1 = N->getOperand(1); ConstantFPSDNode *N0CFP = dyn_cast(N0); ConstantFPSDNode *N1CFP = dyn_cast(N1); - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); // fold vector ops if (VT.isVector()) { @@ -4058,7 +4182,7 @@ SDValue DAGCombiner::visitFREM(SDNode *N) { SDValue N1 = N->getOperand(1); ConstantFPSDNode *N0CFP = dyn_cast(N0); ConstantFPSDNode *N1CFP = dyn_cast(N1); - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); // fold (frem c1, c2) -> fmod(c1,c2) if (N0CFP && N1CFP && VT != MVT::ppcf128) @@ -4072,7 +4196,7 @@ SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) { SDValue N1 = N->getOperand(1); ConstantFPSDNode *N0CFP = dyn_cast(N0); ConstantFPSDNode *N1CFP = dyn_cast(N1); - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); if (N0CFP && N1CFP && VT != MVT::ppcf128) // Constant fold return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT, N0, N1); @@ -4120,8 +4244,8 @@ SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) { SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) { SDValue N0 = N->getOperand(0); ConstantSDNode *N0C = dyn_cast(N0); - MVT VT = N->getValueType(0); - MVT OpVT = N0.getValueType(); + EVT VT = N->getValueType(0); + EVT OpVT = N0.getValueType(); // fold (sint_to_fp c1) -> c1fp if (N0C && OpVT != MVT::ppcf128) @@ -4142,8 +4266,8 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) { SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) { SDValue N0 = N->getOperand(0); ConstantSDNode *N0C = dyn_cast(N0); - MVT VT = N->getValueType(0); - MVT OpVT = N0.getValueType(); + EVT VT = N->getValueType(0); + EVT OpVT = N0.getValueType(); // fold (uint_to_fp c1) -> c1fp if (N0C && OpVT != MVT::ppcf128) @@ -4164,7 +4288,7 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) { SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) { SDValue N0 = N->getOperand(0); ConstantFPSDNode *N0CFP = dyn_cast(N0); - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); // fold (fp_to_sint c1fp) -> c1 if (N0CFP) @@ -4176,7 +4300,7 @@ SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) { SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) { SDValue N0 = N->getOperand(0); ConstantFPSDNode *N0CFP = dyn_cast(N0); - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); // fold (fp_to_uint c1fp) -> c1 if (N0CFP && VT != MVT::ppcf128) @@ -4189,7 +4313,7 @@ SDValue DAGCombiner::visitFP_ROUND(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); ConstantFPSDNode *N0CFP = dyn_cast(N0); - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); // fold (fp_round c1fp) -> c1fp if (N0CFP && N0.getValueType() != MVT::ppcf128) @@ -4222,8 +4346,8 @@ SDValue DAGCombiner::visitFP_ROUND(SDNode *N) { SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) { SDValue N0 = N->getOperand(0); - MVT VT = N->getValueType(0); - MVT EVT = cast(N->getOperand(1))->getVT(); + EVT VT = N->getValueType(0); + EVT EVT = cast(N->getOperand(1))->getVT(); ConstantFPSDNode *N0CFP = dyn_cast(N0); // fold (fp_round_inreg c1fp) -> c1fp @@ -4238,7 +4362,7 @@ SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) { SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) { SDValue N0 = N->getOperand(0); ConstantFPSDNode *N0CFP = dyn_cast(N0); - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded. if (N->hasOneUse() && @@ -4285,23 +4409,25 @@ SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) { SDValue DAGCombiner::visitFNEG(SDNode *N) { SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); if (isNegatibleForFree(N0, LegalOperations)) return GetNegatedExpression(N0, DAG, LegalOperations); // Transform fneg(bitconvert(x)) -> bitconvert(x^sign) to avoid loading // constant pool values. - if (N0.getOpcode() == ISD::BIT_CONVERT && N0.getNode()->hasOneUse() && - N0.getOperand(0).getValueType().isInteger() && - !N0.getOperand(0).getValueType().isVector()) { + if (N0.getOpcode() == ISD::BIT_CONVERT && + !VT.isVector() && + N0.getNode()->hasOneUse() && + N0.getOperand(0).getValueType().isInteger()) { SDValue Int = N0.getOperand(0); - MVT IntVT = Int.getValueType(); + EVT IntVT = Int.getValueType(); if (IntVT.isInteger() && !IntVT.isVector()) { Int = DAG.getNode(ISD::XOR, N0.getDebugLoc(), IntVT, Int, DAG.getConstant(APInt::getSignBit(IntVT.getSizeInBits()), IntVT)); AddToWorkList(Int.getNode()); return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), - N->getValueType(0), Int); + VT, Int); } } @@ -4311,7 +4437,7 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) { SDValue DAGCombiner::visitFABS(SDNode *N) { SDValue N0 = N->getOperand(0); ConstantFPSDNode *N0CFP = dyn_cast(N0); - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); // fold (fabs c1) -> fabs(c1) if (N0CFP && VT != MVT::ppcf128) @@ -4330,7 +4456,7 @@ SDValue DAGCombiner::visitFABS(SDNode *N) { N0.getOperand(0).getValueType().isInteger() && !N0.getOperand(0).getValueType().isVector()) { SDValue Int = N0.getOperand(0); - MVT IntVT = Int.getValueType(); + EVT IntVT = Int.getValueType(); if (IntVT.isInteger() && !IntVT.isVector()) { Int = DAG.getNode(ISD::AND, N0.getDebugLoc(), IntVT, Int, DAG.getConstant(~APInt::getSignBit(IntVT.getSizeInBits()), IntVT)); @@ -4347,14 +4473,13 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) { SDValue Chain = N->getOperand(0); SDValue N1 = N->getOperand(1); SDValue N2 = N->getOperand(2); - ConstantSDNode *N1C = dyn_cast(N1); - // never taken branch, fold to chain - if (N1C && N1C->isNullValue()) - return Chain; - // unconditional branch - if (N1C && N1C->getAPIntValue() == 1) - return DAG.getNode(ISD::BR, N->getDebugLoc(), MVT::Other, Chain, N2); + // If N is a constant we could fold this into a fallthrough or unconditional + // branch. However that doesn't happen very often in normal code, because + // Instcombine/SimplifyCFG should have handled the available opportunities. + // If we did this folding here, it would be necessary to update the + // MachineBasicBlock CFG, which is awkward. + // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal // on the target. if (N1.getOpcode() == ISD::SETCC && @@ -4388,7 +4513,6 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) { if (Op0.getOpcode() == ISD::AND && Op0.hasOneUse() && Op1.getOpcode() == ISD::Constant) { - SDValue AndOp0 = Op0.getOperand(0); SDValue AndOp1 = Op0.getOperand(1); if (AndOp1.getOpcode() == ISD::Constant) { @@ -4422,22 +4546,18 @@ SDValue DAGCombiner::visitBR_CC(SDNode *N) { CondCodeSDNode *CC = cast(N->getOperand(1)); SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3); + // If N is a constant we could fold this into a fallthrough or unconditional + // branch. However that doesn't happen very often in normal code, because + // Instcombine/SimplifyCFG should have handled the available opportunities. + // If we did this folding here, it would be necessary to update the + // MachineBasicBlock CFG, which is awkward. + // Use SimplifySetCC to simplify SETCC's. SDValue Simp = SimplifySetCC(TLI.getSetCCResultType(CondLHS.getValueType()), CondLHS, CondRHS, CC->get(), N->getDebugLoc(), false); if (Simp.getNode()) AddToWorkList(Simp.getNode()); - ConstantSDNode *SCCC = dyn_cast_or_null(Simp.getNode()); - - // fold br_cc true, dest -> br dest (unconditional branch) - if (SCCC && !SCCC->isNullValue()) - return DAG.getNode(ISD::BR, N->getDebugLoc(), MVT::Other, - N->getOperand(0), N->getOperand(4)); - // fold br_cc false, dest -> unconditional fall through - if (SCCC && SCCC->isNullValue()) - return N->getOperand(0); - // fold to a simpler setcc if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC) return DAG.getNode(ISD::BR_CC, N->getDebugLoc(), MVT::Other, @@ -4460,7 +4580,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { bool isLoad = true; SDValue Ptr; - MVT VT; + EVT VT; if (LoadSDNode *LD = dyn_cast(N)) { if (LD->isIndexed()) return false; @@ -4509,7 +4629,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { // Check #1. Preinc'ing a frame index would require copying the stack pointer // (plus the implicit offset) to a register to preinc anyway. - if (isa(BasePtr)) + if (isa(BasePtr) || isa(BasePtr)) return false; // Check #2. @@ -4548,9 +4668,11 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { BasePtr, Offset, AM); ++PreIndexedNodes; ++NodesCombined; - DOUT << "\nReplacing.4 "; DEBUG(N->dump(&DAG)); - DOUT << "\nWith: "; DEBUG(Result.getNode()->dump(&DAG)); - DOUT << '\n'; + DEBUG(errs() << "\nReplacing.4 "; + N->dump(&DAG); + errs() << "\nWith: "; + Result.getNode()->dump(&DAG); + errs() << '\n'); WorkListRemover DeadNodes(*this); if (isLoad) { DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0), @@ -4585,7 +4707,7 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { bool isLoad = true; SDValue Ptr; - MVT VT; + EVT VT; if (LoadSDNode *LD = dyn_cast(N)) { if (LD->isIndexed()) return false; @@ -4621,7 +4743,7 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { SDValue Offset; ISD::MemIndexedMode AM = ISD::UNINDEXED; if (TLI.getPostIndexedAddressParts(N, Op, BasePtr, Offset, AM, DAG)) { - if (Ptr == Offset) + if (Ptr == Offset && Op->getOpcode() == ISD::ADD) std::swap(BasePtr, Offset); if (Ptr != BasePtr) continue; @@ -4636,6 +4758,9 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { // nor a successor of N. Otherwise, if Op is folded that would // create a cycle. + if (isa(BasePtr) || isa(BasePtr)) + continue; + // Check for #1. bool TryNext = false; for (SDNode::use_iterator II = BasePtr.getNode()->use_begin(), @@ -4677,9 +4802,11 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { BasePtr, Offset, AM); ++PostIndexedNodes; ++NodesCombined; - DOUT << "\nReplacing.5 "; DEBUG(N->dump(&DAG)); - DOUT << "\nWith: "; DEBUG(Result.getNode()->dump(&DAG)); - DOUT << '\n'; + DEBUG(errs() << "\nReplacing.5 "; + N->dump(&DAG); + errs() << "\nWith: "; + Result.getNode()->dump(&DAG); + errs() << '\n'); WorkListRemover DeadNodes(*this); if (isLoad) { DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0), @@ -4708,57 +4835,14 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { return false; } -/// InferAlignment - If we can infer some alignment information from this -/// pointer, return it. -static unsigned InferAlignment(SDValue Ptr, SelectionDAG &DAG) { - // If this is a direct reference to a stack slot, use information about the - // stack slot's alignment. - int FrameIdx = 1 << 31; - int64_t FrameOffset = 0; - if (FrameIndexSDNode *FI = dyn_cast(Ptr)) { - FrameIdx = FI->getIndex(); - } else if (Ptr.getOpcode() == ISD::ADD && - isa(Ptr.getOperand(1)) && - isa(Ptr.getOperand(0))) { - FrameIdx = cast(Ptr.getOperand(0))->getIndex(); - FrameOffset = Ptr.getConstantOperandVal(1); - } - - if (FrameIdx != (1 << 31)) { - // FIXME: Handle FI+CST. - const MachineFrameInfo &MFI = *DAG.getMachineFunction().getFrameInfo(); - if (MFI.isFixedObjectIndex(FrameIdx)) { - int64_t ObjectOffset = MFI.getObjectOffset(FrameIdx) + FrameOffset; - - // The alignment of the frame index can be determined from its offset from - // the incoming frame position. If the frame object is at offset 32 and - // the stack is guaranteed to be 16-byte aligned, then we know that the - // object is 16-byte aligned. - unsigned StackAlign = DAG.getTarget().getFrameInfo()->getStackAlignment(); - unsigned Align = MinAlign(ObjectOffset, StackAlign); - - // Finally, the frame object itself may have a known alignment. Factor - // the alignment + offset into a new alignment. For example, if we know - // the FI is 8 byte aligned, but the pointer is 4 off, we really have a - // 4-byte alignment of the resultant pointer. Likewise align 4 + 4-byte - // offset = 4-byte alignment, align 4 + 1-byte offset = align 1, etc. - unsigned FIInfoAlign = MinAlign(MFI.getObjectAlignment(FrameIdx), - FrameOffset); - return std::max(Align, FIInfoAlign); - } - } - - return 0; -} - SDValue DAGCombiner::visitLOAD(SDNode *N) { LoadSDNode *LD = cast(N); SDValue Chain = LD->getChain(); SDValue Ptr = LD->getBasePtr(); // Try to infer better alignment information than the load already has. - if (!Fast && LD->isUnindexed()) { - if (unsigned Align = InferAlignment(Ptr, DAG)) { + if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) { + if (unsigned Align = DAG.InferPtrAlignment(Ptr)) { if (Align > LD->getAlignment()) return DAG.getExtLoad(LD->getExtensionType(), N->getDebugLoc(), LD->getValueType(0), @@ -4781,9 +4865,11 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { // v3 = add v2, c // Now we replace use of chain2 with chain1. This makes the second load // isomorphic to the one we are deleting, and thus makes this load live. - DOUT << "\nReplacing.6 "; DEBUG(N->dump(&DAG)); - DOUT << "\nWith chain: "; DEBUG(Chain.getNode()->dump(&DAG)); - DOUT << "\n"; + DEBUG(errs() << "\nReplacing.6 "; + N->dump(&DAG); + errs() << "\nWith chain: "; + Chain.getNode()->dump(&DAG); + errs() << "\n"); WorkListRemover DeadNodes(*this); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain, &DeadNodes); @@ -4799,9 +4885,11 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?"); if (N->hasNUsesOfValue(0, 0) && N->hasNUsesOfValue(0, 1)) { SDValue Undef = DAG.getUNDEF(N->getValueType(0)); - DOUT << "\nReplacing.6 "; DEBUG(N->dump(&DAG)); - DOUT << "\nWith: "; DEBUG(Undef.getNode()->dump(&DAG)); - DOUT << " and 2 other values\n"; + DEBUG(errs() << "\nReplacing.6 "; + N->dump(&DAG); + errs() << "\nWith: "; + Undef.getNode()->dump(&DAG); + errs() << " and 2 other values\n"); WorkListRemover DeadNodes(*this); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef, &DeadNodes); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), @@ -4856,7 +4944,10 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { // Create token factor to keep old chain connected. SDValue Token = DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), MVT::Other, Chain, ReplLoad.getValue(1)); - + + // Make sure the new and old chains are cleaned up. + AddToWorkList(Token.getNode()); + // Replace uses with load result and token factor. Don't add users // to work list. return CombineTo(N, ReplLoad.getValue(0), Token, false); @@ -4870,6 +4961,104 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { return SDValue(); } + +/// ReduceLoadOpStoreWidth - Look for sequence of load / op / store where op is +/// one of 'or', 'xor', and 'and' of immediates. If 'op' is only touching some +/// of the loaded bits, try narrowing the load and store if it would end up +/// being a win for performance or code size. +SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { + StoreSDNode *ST = cast(N); + if (ST->isVolatile()) + return SDValue(); + + SDValue Chain = ST->getChain(); + SDValue Value = ST->getValue(); + SDValue Ptr = ST->getBasePtr(); + EVT VT = Value.getValueType(); + + if (ST->isTruncatingStore() || VT.isVector() || !Value.hasOneUse()) + return SDValue(); + + unsigned Opc = Value.getOpcode(); + if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) || + Value.getOperand(1).getOpcode() != ISD::Constant) + return SDValue(); + + SDValue N0 = Value.getOperand(0); + if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse()) { + LoadSDNode *LD = cast(N0); + if (LD->getBasePtr() != Ptr) + return SDValue(); + + // Find the type to narrow it the load / op / store to. + SDValue N1 = Value.getOperand(1); + unsigned BitWidth = N1.getValueSizeInBits(); + APInt Imm = cast(N1)->getAPIntValue(); + if (Opc == ISD::AND) + Imm ^= APInt::getAllOnesValue(BitWidth); + if (Imm == 0 || Imm.isAllOnesValue()) + return SDValue(); + unsigned ShAmt = Imm.countTrailingZeros(); + unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1; + unsigned NewBW = NextPowerOf2(MSB - ShAmt); + EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW); + while (NewBW < BitWidth && + !(TLI.isOperationLegalOrCustom(Opc, NewVT) && + TLI.isNarrowingProfitable(VT, NewVT))) { + NewBW = NextPowerOf2(NewBW); + NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW); + } + if (NewBW >= BitWidth) + return SDValue(); + + // If the lsb changed does not start at the type bitwidth boundary, + // start at the previous one. + if (ShAmt % NewBW) + ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW; + APInt Mask = APInt::getBitsSet(BitWidth, ShAmt, ShAmt + NewBW); + if ((Imm & Mask) == Imm) { + APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW); + if (Opc == ISD::AND) + NewImm ^= APInt::getAllOnesValue(NewBW); + uint64_t PtrOff = ShAmt / 8; + // For big endian targets, we need to adjust the offset to the pointer to + // load the correct bytes. + if (TLI.isBigEndian()) + PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff; + + unsigned NewAlign = MinAlign(LD->getAlignment(), PtrOff); + if (NewAlign < + TLI.getTargetData()->getABITypeAlignment(NewVT.getTypeForEVT(*DAG.getContext()))) + return SDValue(); + + SDValue NewPtr = DAG.getNode(ISD::ADD, LD->getDebugLoc(), + Ptr.getValueType(), Ptr, + DAG.getConstant(PtrOff, Ptr.getValueType())); + SDValue NewLD = DAG.getLoad(NewVT, N0.getDebugLoc(), + LD->getChain(), NewPtr, + LD->getSrcValue(), LD->getSrcValueOffset(), + LD->isVolatile(), NewAlign); + SDValue NewVal = DAG.getNode(Opc, Value.getDebugLoc(), NewVT, NewLD, + DAG.getConstant(NewImm, NewVT)); + SDValue NewST = DAG.getStore(Chain, N->getDebugLoc(), + NewVal, NewPtr, + ST->getSrcValue(), ST->getSrcValueOffset(), + false, NewAlign); + + AddToWorkList(NewPtr.getNode()); + AddToWorkList(NewLD.getNode()); + AddToWorkList(NewVal.getNode()); + WorkListRemover DeadNodes(*this); + DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1), + &DeadNodes); + ++OpsNarrowed; + return NewST; + } + } + + return SDValue(); +} + SDValue DAGCombiner::visitSTORE(SDNode *N) { StoreSDNode *ST = cast(N); SDValue Chain = ST->getChain(); @@ -4877,8 +5066,8 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { SDValue Ptr = ST->getBasePtr(); // Try to infer better alignment information than the store already has. - if (!Fast && ST->isUnindexed()) { - if (unsigned Align = InferAlignment(Ptr, DAG)) { + if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) { + if (unsigned Align = DAG.InferPtrAlignment(Ptr)) { if (Align > ST->getAlignment()) return DAG.getTruncStore(Chain, N->getDebugLoc(), Value, Ptr, ST->getSrcValue(), @@ -4892,9 +5081,9 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { if (Value.getOpcode() == ISD::BIT_CONVERT && !ST->isTruncatingStore() && ST->isUnindexed()) { unsigned OrigAlign = ST->getAlignment(); - MVT SVT = Value.getOperand(0).getValueType(); + EVT SVT = Value.getOperand(0).getValueType(); unsigned Align = TLI.getTargetData()-> - getABITypeAlignment(SVT.getTypeForMVT()); + getABITypeAlignment(SVT.getTypeForEVT(*DAG.getContext())); if (Align <= OrigAlign && ((!LegalOperations && !ST->isVolatile()) || TLI.isOperationLegalOrCustom(ISD::STORE, SVT))) @@ -4911,8 +5100,8 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { // transform should not be done in this case. if (Value.getOpcode() != ISD::TargetConstantFP) { SDValue Tmp; - switch (CFP->getValueType(0).getSimpleVT()) { - default: assert(0 && "Unknown FP type"); + switch (CFP->getValueType(0).getSimpleVT().SimpleTy) { + default: llvm_unreachable("Unknown FP type"); case MVT::f80: // We don't do this for these yet. case MVT::f128: case MVT::ppcf128: @@ -4979,8 +5168,9 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { // If there is a better chain. if (Chain != BetterChain) { - // Replace the chain to avoid dependency. SDValue ReplStore; + + // Replace the chain to avoid dependency. if (ST->isTruncatingStore()) { ReplStore = DAG.getTruncStore(BetterChain, N->getDebugLoc(), Value, Ptr, ST->getSrcValue(),ST->getSrcValueOffset(), @@ -4996,6 +5186,9 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { SDValue Token = DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), MVT::Other, Chain, ReplStore); + // Make sure the new and old chains are cleaned up. + AddToWorkList(Token.getNode()); + // Don't add users to work list. return CombineTo(N, Token, false); } @@ -5026,7 +5219,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { // SimplifyDemandedBits, which only works if the value has a single use. if (SimplifyDemandedBits(Value, APInt::getLowBitsSet( - Value.getValueSizeInBits(), + Value.getValueType().getScalarType().getSizeInBits(), ST->getMemoryVT().getSizeInBits()))) return SDValue(N, 0); } @@ -5056,7 +5249,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { ST->isVolatile(), ST->getAlignment()); } - return SDValue(); + return ReduceLoadOpStoreWidth(N); } SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { @@ -5075,7 +5268,21 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), InVec.getValueType(), &Ops[0], Ops.size()); } + // If the invec is an UNDEF and if EltNo is a constant, create a new + // BUILD_VECTOR with undef elements and the inserted element. + if (!LegalOperations && InVec.getOpcode() == ISD::UNDEF && + isa(EltNo)) { + EVT VT = InVec.getValueType(); + EVT EltVT = VT.getVectorElementType(); + unsigned NElts = VT.getVectorNumElements(); + SmallVector Ops(NElts, DAG.getUNDEF(EltVT)); + unsigned Elt = cast(EltNo)->getZExtValue(); + if (Elt < Ops.size()) + Ops[Elt] = InVal; + return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), + InVec.getValueType(), &Ops[0], Ops.size()); + } return SDValue(); } @@ -5083,8 +5290,15 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { // (vextract (scalar_to_vector val, 0) -> val SDValue InVec = N->getOperand(0); - if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) - return InVec.getOperand(0); + if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) { + // If the operand is wider than the vector element type then it is implicitly + // truncated. Make that explicit here. + EVT EltVT = InVec.getValueType().getVectorElementType(); + SDValue InOp = InVec.getOperand(0); + if (InOp.getValueType() != EltVT) + return DAG.getNode(ISD::TRUNCATE, InVec.getDebugLoc(), EltVT, InOp); + return InOp; + } // Perform only after legalization to ensure build_vector / vector_shuffle // optimizations have already been done. @@ -5099,29 +5313,30 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { unsigned Elt = cast(EltNo)->getZExtValue(); bool NewLoad = false; bool BCNumEltsChanged = false; - MVT VT = InVec.getValueType(); - MVT EVT = VT.getVectorElementType(); - MVT LVT = EVT; + EVT VT = InVec.getValueType(); + EVT ExtVT = VT.getVectorElementType(); + EVT LVT = ExtVT; if (InVec.getOpcode() == ISD::BIT_CONVERT) { - MVT BCVT = InVec.getOperand(0).getValueType(); - if (!BCVT.isVector() || EVT.bitsGT(BCVT.getVectorElementType())) + EVT BCVT = InVec.getOperand(0).getValueType(); + if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType())) return SDValue(); if (VT.getVectorNumElements() != BCVT.getVectorNumElements()) BCNumEltsChanged = true; InVec = InVec.getOperand(0); - EVT = BCVT.getVectorElementType(); + ExtVT = BCVT.getVectorElementType(); NewLoad = true; } LoadSDNode *LN0 = NULL; + const ShuffleVectorSDNode *SVN = NULL; if (ISD::isNormalLoad(InVec.getNode())) { LN0 = cast(InVec); } else if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR && - InVec.getOperand(0).getValueType() == EVT && + InVec.getOperand(0).getValueType() == ExtVT && ISD::isNormalLoad(InVec.getOperand(0).getNode())) { LN0 = cast(InVec.getOperand(0)); - } else if (InVec.getOpcode() == ISD::VECTOR_SHUFFLE) { + } else if ((SVN = dyn_cast(InVec))) { // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1) // => // (load $addr+1*size) @@ -5130,15 +5345,17 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { // to examine the mask. if (BCNumEltsChanged) return SDValue(); - unsigned Idx = cast(InVec.getOperand(2). - getOperand(Elt))->getZExtValue(); - unsigned NumElems = InVec.getOperand(2).getNumOperands(); - InVec = (Idx < NumElems) ? InVec.getOperand(0) : InVec.getOperand(1); + + // Select the input vector, guarding against out of range extract vector. + unsigned NumElems = VT.getVectorNumElements(); + int Idx = (Elt > NumElems) ? -1 : SVN->getMaskElt(Elt); + InVec = (Idx < (int)NumElems) ? InVec.getOperand(0) : InVec.getOperand(1); + if (InVec.getOpcode() == ISD::BIT_CONVERT) InVec = InVec.getOperand(0); if (ISD::isNormalLoad(InVec.getNode())) { LN0 = cast(InVec); - Elt = (Idx < NumElems) ? Idx : Idx - NumElems; + Elt = (Idx < (int)NumElems) ? Idx : Idx - NumElems; } } @@ -5150,7 +5367,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { // Check the resultant load doesn't need a higher alignment than the // original load. unsigned NewAlign = - TLI.getTargetData()->getABITypeAlignment(LVT.getTypeForMVT()); + TLI.getTargetData()->getABITypeAlignment(LVT.getTypeForEVT(*DAG.getContext())); if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, LVT)) return SDValue(); @@ -5161,7 +5378,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { SDValue NewPtr = LN0->getBasePtr(); if (Elt) { unsigned PtrOff = LVT.getSizeInBits() * Elt / 8; - MVT PtrType = NewPtr.getValueType(); + EVT PtrType = NewPtr.getValueType(); if (TLI.isBigEndian()) PtrOff = VT.getSizeInBits() / 8 - PtrOff; NewPtr = DAG.getNode(ISD::ADD, N->getDebugLoc(), PtrType, NewPtr, @@ -5178,9 +5395,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { unsigned NumInScalars = N->getNumOperands(); - MVT VT = N->getValueType(0); - unsigned NumElts = VT.getVectorNumElements(); - MVT EltType = VT.getVectorElementType(); + EVT VT = N->getValueType(0); // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT // operations. If so, and if the EXTRACT_VECTOR_ELT vector inputs come from @@ -5222,56 +5437,40 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { } // If everything is good, we can make a shuffle operation. - MVT IndexVT = MVT::i32; if (VecIn1.getNode()) { - SmallVector BuildVecIndices; + SmallVector Mask; for (unsigned i = 0; i != NumInScalars; ++i) { if (N->getOperand(i).getOpcode() == ISD::UNDEF) { - BuildVecIndices.push_back(DAG.getUNDEF(IndexVT)); + Mask.push_back(-1); continue; } - SDValue Extract = N->getOperand(i); - // If extracting from the first vector, just use the index directly. + SDValue Extract = N->getOperand(i); SDValue ExtVal = Extract.getOperand(1); if (Extract.getOperand(0) == VecIn1) { - if (ExtVal.getValueType() == IndexVT) - BuildVecIndices.push_back(ExtVal); - else { - unsigned Idx = cast(ExtVal)->getZExtValue(); - BuildVecIndices.push_back(DAG.getConstant(Idx, IndexVT)); - } + unsigned ExtIndex = cast(ExtVal)->getZExtValue(); + if (ExtIndex > VT.getVectorNumElements()) + return SDValue(); + + Mask.push_back(ExtIndex); continue; } // Otherwise, use InIdx + VecSize unsigned Idx = cast(ExtVal)->getZExtValue(); - BuildVecIndices.push_back(DAG.getConstant(Idx+NumInScalars, IndexVT)); + Mask.push_back(Idx+NumInScalars); } // Add count and size info. - MVT BuildVecVT = MVT::getVectorVT(IndexVT, NumElts); - if (!TLI.isTypeLegal(BuildVecVT) && LegalTypes) + if (!TLI.isTypeLegal(VT) && LegalTypes) return SDValue(); // Return the new VECTOR_SHUFFLE node. - SDValue Ops[5]; + SDValue Ops[2]; Ops[0] = VecIn1; - if (VecIn2.getNode()) { - Ops[1] = VecIn2; - } else { - // Use an undef build_vector as input for the second operand. - std::vector UnOps(NumInScalars, - DAG.getUNDEF(EltType)); - Ops[1] = DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT, - &UnOps[0], UnOps.size()); - AddToWorkList(Ops[1].getNode()); - } - - Ops[2] = DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), BuildVecVT, - &BuildVecIndices[0], BuildVecIndices.size()); - return DAG.getNode(ISD::VECTOR_SHUFFLE, N->getDebugLoc(), VT, Ops, 3); + Ops[1] = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT); + return DAG.getVectorShuffle(VT, N->getDebugLoc(), Ops[0], Ops[1], &Mask[0]); } return SDValue(); @@ -5291,69 +5490,23 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { } SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { - SDValue ShufMask = N->getOperand(2); - unsigned NumElts = ShufMask.getNumOperands(); + return SDValue(); + + EVT VT = N->getValueType(0); + unsigned NumElts = VT.getVectorNumElements(); SDValue N0 = N->getOperand(0); - SDValue N1 = N->getOperand(1); assert(N0.getValueType().getVectorNumElements() == NumElts && "Vector shuffle must be normalized in DAG"); - // If the shuffle mask is an identity operation on the LHS, return the LHS. - bool isIdentity = true; - for (unsigned i = 0; i != NumElts; ++i) { - if (ShufMask.getOperand(i).getOpcode() != ISD::UNDEF && - cast(ShufMask.getOperand(i))->getZExtValue() != i) { - isIdentity = false; - break; - } - } - if (isIdentity) return N->getOperand(0); - - // If the shuffle mask is an identity operation on the RHS, return the RHS. - isIdentity = true; - for (unsigned i = 0; i != NumElts; ++i) { - if (ShufMask.getOperand(i).getOpcode() != ISD::UNDEF && - cast(ShufMask.getOperand(i))->getZExtValue() != - i+NumElts) { - isIdentity = false; - break; - } - } - if (isIdentity) return N->getOperand(1); - - // Check if the shuffle is a unary shuffle, i.e. one of the vectors is not - // needed at all. - bool isUnary = true; - bool isSplat = true; - int VecNum = -1; - unsigned BaseIdx = 0; - for (unsigned i = 0; i != NumElts; ++i) - if (ShufMask.getOperand(i).getOpcode() != ISD::UNDEF) { - unsigned Idx=cast(ShufMask.getOperand(i))->getZExtValue(); - int V = (Idx < NumElts) ? 0 : 1; - if (VecNum == -1) { - VecNum = V; - BaseIdx = Idx; - } else { - if (BaseIdx != Idx) - isSplat = false; - if (VecNum != V) { - isUnary = false; - break; - } - } - } - - // Normalize unary shuffle so the RHS is undef. - if (isUnary && VecNum == 1) - std::swap(N0, N1); + // FIXME: implement canonicalizations from DAG.getVectorShuffle() // If it is a splat, check if the argument vector is a build_vector with // all scalar elements the same. - if (isSplat) { + if (cast(N)->isSplat()) { SDNode *V = N0.getNode(); + // If this is a bit convert that changes the element type of the vector but // not the number of vector elements, look through it. Be careful not to @@ -5367,6 +5520,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { if (V->getOpcode() == ISD::BUILD_VECTOR) { unsigned NumElems = V->getNumOperands(); + unsigned BaseIdx = cast(N)->getSplatIndex(); if (NumElems > BaseIdx) { SDValue Base; bool AllSame = true; @@ -5391,38 +5545,6 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { } } } - - // If it is a unary or the LHS and the RHS are the same node, turn the RHS - // into an undef. - if (isUnary || N0 == N1) { - // Check the SHUFFLE mask, mapping any inputs from the 2nd operand into the - // first operand. - SmallVector MappedOps; - - for (unsigned i = 0; i != NumElts; ++i) { - if (ShufMask.getOperand(i).getOpcode() == ISD::UNDEF || - cast(ShufMask.getOperand(i))->getZExtValue() < - NumElts) { - MappedOps.push_back(ShufMask.getOperand(i)); - } else { - unsigned NewIdx = - cast(ShufMask.getOperand(i))->getZExtValue() - - NumElts; - MappedOps.push_back(DAG.getConstant(NewIdx, - ShufMask.getOperand(i).getValueType())); - } - } - - ShufMask = DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), - ShufMask.getValueType(), - &MappedOps[0], MappedOps.size()); - AddToWorkList(ShufMask.getNode()); - return DAG.getNode(ISD::VECTOR_SHUFFLE, N->getDebugLoc(), - N->getValueType(0), N0, - DAG.getUNDEF(N->getValueType(0)), - ShufMask); - } - return SDValue(); } @@ -5431,52 +5553,42 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { /// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==> /// vector_shuffle V, Zero, <0, 4, 2, 4> SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) { + EVT VT = N->getValueType(0); + DebugLoc dl = N->getDebugLoc(); SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); if (N->getOpcode() == ISD::AND) { if (RHS.getOpcode() == ISD::BIT_CONVERT) RHS = RHS.getOperand(0); if (RHS.getOpcode() == ISD::BUILD_VECTOR) { - std::vector IdxOps; - unsigned NumOps = RHS.getNumOperands(); - unsigned NumElts = NumOps; + SmallVector Indices; + unsigned NumElts = RHS.getNumOperands(); for (unsigned i = 0; i != NumElts; ++i) { SDValue Elt = RHS.getOperand(i); if (!isa(Elt)) return SDValue(); else if (cast(Elt)->isAllOnesValue()) - IdxOps.push_back(DAG.getIntPtrConstant(i)); + Indices.push_back(i); else if (cast(Elt)->isNullValue()) - IdxOps.push_back(DAG.getIntPtrConstant(NumElts)); + Indices.push_back(NumElts); else return SDValue(); } // Let's see if the target supports this vector_shuffle. - if (!TLI.isVectorClearMaskLegal(IdxOps, TLI.getPointerTy(), DAG)) + EVT RVT = RHS.getValueType(); + if (!TLI.isVectorClearMaskLegal(Indices, RVT)) return SDValue(); // Return the new VECTOR_SHUFFLE node. - MVT EVT = RHS.getValueType().getVectorElementType(); - MVT VT = MVT::getVectorVT(EVT, NumElts); - MVT MaskVT = MVT::getVectorVT(TLI.getPointerTy(), NumElts); - std::vector Ops; - LHS = DAG.getNode(ISD::BIT_CONVERT, LHS.getDebugLoc(), VT, LHS); - Ops.push_back(LHS); - AddToWorkList(LHS.getNode()); - std::vector ZeroOps(NumElts, DAG.getConstant(0, EVT)); - Ops.push_back(DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), - VT, &ZeroOps[0], ZeroOps.size())); - Ops.push_back(DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), - MaskVT, &IdxOps[0], IdxOps.size())); - SDValue Result = DAG.getNode(ISD::VECTOR_SHUFFLE, N->getDebugLoc(), - VT, &Ops[0], Ops.size()); - - if (VT != N->getValueType(0)) - Result = DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), - N->getValueType(0), Result); - - return Result; + EVT EltVT = RVT.getVectorElementType(); + SmallVector ZeroOps(RVT.getVectorNumElements(), + DAG.getConstant(0, EltVT)); + SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), + RVT, &ZeroOps[0], ZeroOps.size()); + LHS = DAG.getNode(ISD::BIT_CONVERT, dl, RVT, LHS); + SDValue Shuf = DAG.getVectorShuffle(RVT, dl, LHS, Zero, &Indices[0]); + return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Shuf); } } @@ -5490,10 +5602,10 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { // things. Simplifying them may result in a loss of legality. if (LegalOperations) return SDValue(); - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); assert(VT.isVector() && "SimplifyVBinOp only works on vectors!"); - MVT EltType = VT.getVectorElementType(); + EVT EltType = VT.getVectorElementType(); SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); SDValue Shuffle = XformToShuffleWithZero(N); @@ -5536,7 +5648,7 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { } if (Ops.size() == LHS.getNumOperands()) { - MVT VT = LHS.getValueType(); + EVT VT = LHS.getValueType(); return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT, &Ops[0], Ops.size()); } @@ -5600,15 +5712,17 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS, // If this is an EXTLOAD, the VT's must match. if (LLD->getMemoryVT() == RLD->getMemoryVT()) { - // FIXME: this conflates two src values, discarding one. This is not - // the right thing to do, but nothing uses srcvalues now. When they do, - // turn SrcValue into a list of locations. + // FIXME: this discards src value information. This is + // over-conservative. It would be beneficial to be able to remember + // both potential memory locations. SDValue Addr; if (TheSelect->getOpcode() == ISD::SELECT) { // Check that the condition doesn't reach either load. If so, folding // this will induce a cycle into the DAG. - if (!LLD->isPredecessorOf(TheSelect->getOperand(0).getNode()) && - !RLD->isPredecessorOf(TheSelect->getOperand(0).getNode())) { + if ((!LLD->hasAnyUseOfValue(1) || + !LLD->isPredecessorOf(TheSelect->getOperand(0).getNode())) && + (!RLD->hasAnyUseOfValue(1) || + !RLD->isPredecessorOf(TheSelect->getOperand(0).getNode()))) { Addr = DAG.getNode(ISD::SELECT, TheSelect->getDebugLoc(), LLD->getBasePtr().getValueType(), TheSelect->getOperand(0), LLD->getBasePtr(), @@ -5617,10 +5731,12 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS, } else { // Check that the condition doesn't reach either load. If so, folding // this will induce a cycle into the DAG. - if (!LLD->isPredecessorOf(TheSelect->getOperand(0).getNode()) && - !RLD->isPredecessorOf(TheSelect->getOperand(0).getNode()) && - !LLD->isPredecessorOf(TheSelect->getOperand(1).getNode()) && - !RLD->isPredecessorOf(TheSelect->getOperand(1).getNode())) { + if ((!LLD->hasAnyUseOfValue(1) || + (!LLD->isPredecessorOf(TheSelect->getOperand(0).getNode()) && + !LLD->isPredecessorOf(TheSelect->getOperand(1).getNode()))) && + (!RLD->hasAnyUseOfValue(1) || + (!RLD->isPredecessorOf(TheSelect->getOperand(0).getNode()) && + !RLD->isPredecessorOf(TheSelect->getOperand(1).getNode())))) { Addr = DAG.getNode(ISD::SELECT_CC, TheSelect->getDebugLoc(), LLD->getBasePtr().getValueType(), TheSelect->getOperand(0), @@ -5636,16 +5752,14 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS, Load = DAG.getLoad(TheSelect->getValueType(0), TheSelect->getDebugLoc(), LLD->getChain(), - Addr,LLD->getSrcValue(), - LLD->getSrcValueOffset(), + Addr, 0, 0, LLD->isVolatile(), LLD->getAlignment()); } else { Load = DAG.getExtLoad(LLD->getExtensionType(), TheSelect->getDebugLoc(), TheSelect->getValueType(0), - LLD->getChain(), Addr, LLD->getSrcValue(), - LLD->getSrcValueOffset(), + LLD->getChain(), Addr, 0, 0, LLD->getMemoryVT(), LLD->isVolatile(), LLD->getAlignment()); @@ -5675,7 +5789,7 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, // (x ? y : y) -> y. if (N2 == N3) return N2; - MVT VT = N2.getValueType(); + EVT VT = N2.getValueType(); ConstantSDNode *N1C = dyn_cast(N1.getNode()); ConstantSDNode *N2C = dyn_cast(N2.getNode()); ConstantSDNode *N3C = dyn_cast(N3.getNode()); @@ -5743,7 +5857,7 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, // Get the offsets to the 0 and 1 element of the array so that we can // select between them. SDValue Zero = DAG.getIntPtrConstant(0); - unsigned EltSize = (unsigned)TD.getTypePaddedSize(Elts[0]->getType()); + unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType()); SDValue One = DAG.getIntPtrConstant(EltSize); SDValue Cond = DAG.getSetCC(DL, @@ -5767,8 +5881,8 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, N2.getValueType().isInteger() && (N1C->isNullValue() || // (a < 0) ? b : 0 (N1C->getAPIntValue() == 1 && N0 == N2))) { // (a < 1) ? a : 0 - MVT XType = N0.getValueType(); - MVT AType = N2.getValueType(); + EVT XType = N0.getValueType(); + EVT AType = N2.getValueType(); if (XType.bitsGE(AType)) { // and (sra X, size(X)-1, A) -> "and (srl X, C2), A" iff A is a // single-bit constant. @@ -5847,7 +5961,7 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, // FIXME: Turn all of these into setcc if setcc if setcc is legal // otherwise, go ahead with the folds. if (0 && N3C && N3C->isNullValue() && N2C && (N2C->getAPIntValue() == 1ULL)) { - MVT XType = N0.getValueType(); + EVT XType = N0.getValueType(); if (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, TLI.getSetCCResultType(XType))) { SDValue Res = DAG.getSetCC(DL, TLI.getSetCCResultType(XType), N0, N1, CC); @@ -5889,7 +6003,7 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, if (N1C && N1C->isNullValue() && (CC == ISD::SETLT || CC == ISD::SETLE) && N0 == N3 && N2.getOpcode() == ISD::SUB && N0 == N2.getOperand(1) && N2.getOperand(0) == N1 && N0.getValueType().isInteger()) { - MVT XType = N0.getValueType(); + EVT XType = N0.getValueType(); SDValue Shift = DAG.getNode(ISD::SRA, N0.getDebugLoc(), XType, N0, DAG.getConstant(XType.getSizeInBits()-1, getShiftAmountTy())); @@ -5904,7 +6018,7 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, if (N1C && N1C->isAllOnesValue() && CC == ISD::SETGT && N0 == N2 && N3.getOpcode() == ISD::SUB && N0 == N3.getOperand(1)) { if (ConstantSDNode *SubC = dyn_cast(N3.getOperand(0))) { - MVT XType = N0.getValueType(); + EVT XType = N0.getValueType(); if (SubC->isNullValue() && XType.isInteger()) { SDValue Shift = DAG.getNode(ISD::SRA, N0.getDebugLoc(), XType, N0, @@ -5923,11 +6037,11 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, } /// SimplifySetCC - This is a stub for TargetLowering::SimplifySetCC. -SDValue DAGCombiner::SimplifySetCC(MVT VT, SDValue N0, +SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, DebugLoc DL, bool foldBooleans) { TargetLowering::DAGCombinerInfo - DagCombineInfo(DAG, Level == Unrestricted, false, this); + DagCombineInfo(DAG, !LegalTypes, !LegalOperations, false, this); return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL); } @@ -5959,11 +6073,12 @@ SDValue DAGCombiner::BuildUDIV(SDNode *N) { return S; } -/// FindBaseOffset - Return true if base is known not to alias with anything -/// but itself. Provides base object and offset as results. -static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset) { +/// FindBaseOffset - Return true if base is a frame index, which is known not +// to alias with anything but itself. Provides base object and offset as results. +static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset, + GlobalValue *&GV, void *&CV) { // Assume it is a primitive operation. - Base = Ptr; Offset = 0; + Base = Ptr; Offset = 0; GV = 0; CV = 0; // If it's an adding a simple constant then integrate the offset. if (Base.getOpcode() == ISD::ADD) { @@ -5972,36 +6087,73 @@ static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset) { Offset += C->getZExtValue(); } } + + // Return the underlying GlobalValue, and update the Offset. Return false + // for GlobalAddressSDNode since the same GlobalAddress may be represented + // by multiple nodes with different offsets. + if (GlobalAddressSDNode *G = dyn_cast(Base)) { + GV = G->getGlobal(); + Offset += G->getOffset(); + return false; + } + // Return the underlying Constant value, and update the Offset. Return false + // for ConstantSDNodes since the same constant pool entry may be represented + // by multiple nodes with different offsets. + if (ConstantPoolSDNode *C = dyn_cast(Base)) { + CV = C->isMachineConstantPoolEntry() ? (void *)C->getMachineCPVal() + : (void *)C->getConstVal(); + Offset += C->getOffset(); + return false; + } // If it's any of the following then it can't alias with anything but itself. - return isa(Base) || - isa(Base) || - isa(Base); + return isa(Base); } /// isAlias - Return true if there is any possibility that the two addresses /// overlap. bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1, const Value *SrcValue1, int SrcValueOffset1, + unsigned SrcValueAlign1, SDValue Ptr2, int64_t Size2, - const Value *SrcValue2, int SrcValueOffset2) const { + const Value *SrcValue2, int SrcValueOffset2, + unsigned SrcValueAlign2) const { // If they are the same then they must be aliases. if (Ptr1 == Ptr2) return true; // Gather base node and offset information. SDValue Base1, Base2; int64_t Offset1, Offset2; - bool KnownBase1 = FindBaseOffset(Ptr1, Base1, Offset1); - bool KnownBase2 = FindBaseOffset(Ptr2, Base2, Offset2); + GlobalValue *GV1, *GV2; + void *CV1, *CV2; + bool isFrameIndex1 = FindBaseOffset(Ptr1, Base1, Offset1, GV1, CV1); + bool isFrameIndex2 = FindBaseOffset(Ptr2, Base2, Offset2, GV2, CV2); - // If they have a same base address then... - if (Base1 == Base2) - // Check to see if the addresses overlap. + // If they have a same base address then check to see if they overlap. + if (Base1 == Base2 || (GV1 && (GV1 == GV2)) || (CV1 && (CV1 == CV2))) return !((Offset1 + Size1) <= Offset2 || (Offset2 + Size2) <= Offset1); - // If we know both bases then they can't alias. - if (KnownBase1 && KnownBase2) return false; + // If we know what the bases are, and they aren't identical, then we know they + // cannot alias. + if ((isFrameIndex1 || CV1 || GV1) && (isFrameIndex2 || CV2 || GV2)) + return false; + // If we know required SrcValue1 and SrcValue2 have relatively large alignment + // compared to the size and offset of the access, we may be able to prove they + // do not alias. This check is conservative for now to catch cases created by + // splitting vector types. + if ((SrcValueAlign1 == SrcValueAlign2) && + (SrcValueOffset1 != SrcValueOffset2) && + (Size1 == Size2) && (SrcValueAlign1 > Size1)) { + int64_t OffAlign1 = SrcValueOffset1 % SrcValueAlign1; + int64_t OffAlign2 = SrcValueOffset2 % SrcValueAlign1; + + // There is no overlap between these relatively aligned accesses of similar + // size, return no alias. + if ((OffAlign1 + Size1) <= OffAlign2 || (OffAlign2 + Size2) <= OffAlign1) + return false; + } + if (CombinerGlobalAA) { // Use alias analysis information. int64_t MinOffset = std::min(SrcValueOffset1, SrcValueOffset2); @@ -6021,20 +6173,24 @@ bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1, /// node. Returns true if the operand was a load. bool DAGCombiner::FindAliasInfo(SDNode *N, SDValue &Ptr, int64_t &Size, - const Value *&SrcValue, int &SrcValueOffset) const { + const Value *&SrcValue, + int &SrcValueOffset, + unsigned &SrcValueAlign) const { if (LoadSDNode *LD = dyn_cast(N)) { Ptr = LD->getBasePtr(); Size = LD->getMemoryVT().getSizeInBits() >> 3; SrcValue = LD->getSrcValue(); SrcValueOffset = LD->getSrcValueOffset(); + SrcValueAlign = LD->getOriginalAlignment(); return true; } else if (StoreSDNode *ST = dyn_cast(N)) { Ptr = ST->getBasePtr(); Size = ST->getMemoryVT().getSizeInBits() >> 3; SrcValue = ST->getSrcValue(); SrcValueOffset = ST->getSrcValueOffset(); + SrcValueAlign = ST->getOriginalAlignment(); } else { - assert(0 && "FindAliasInfo expected a memory operand"); + llvm_unreachable("FindAliasInfo expected a memory operand"); } return false; @@ -6045,28 +6201,45 @@ bool DAGCombiner::FindAliasInfo(SDNode *N, void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain, SmallVector &Aliases) { SmallVector Chains; // List of chains to visit. - std::set Visited; // Visited node set. + SmallPtrSet Visited; // Visited node set. // Get alias information for node. SDValue Ptr; int64_t Size; const Value *SrcValue; int SrcValueOffset; - bool IsLoad = FindAliasInfo(N, Ptr, Size, SrcValue, SrcValueOffset); + unsigned SrcValueAlign; + bool IsLoad = FindAliasInfo(N, Ptr, Size, SrcValue, SrcValueOffset, + SrcValueAlign); // Starting off. Chains.push_back(OriginalChain); - + unsigned Depth = 0; + // Look at each chain and determine if it is an alias. If so, add it to the // aliases list. If not, then continue up the chain looking for the next // candidate. while (!Chains.empty()) { SDValue Chain = Chains.back(); Chains.pop_back(); + + // For TokenFactor nodes, look at each operand and only continue up the + // chain until we find two aliases. If we've seen two aliases, assume we'll + // find more and revert to original chain since the xform is unlikely to be + // profitable. + // + // FIXME: The depth check could be made to return the last non-aliasing + // chain we found before we hit a tokenfactor rather than the original + // chain. + if (Depth > 6 || Aliases.size() == 2) { + Aliases.clear(); + Aliases.push_back(OriginalChain); + break; + } - // Don't bother if we've been before. - if (Visited.find(Chain.getNode()) != Visited.end()) continue; - Visited.insert(Chain.getNode()); + // Don't bother if we've been before. + if (!Visited.insert(Chain.getNode())) + continue; switch (Chain.getOpcode()) { case ISD::EntryToken: @@ -6080,32 +6253,37 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain, int64_t OpSize; const Value *OpSrcValue; int OpSrcValueOffset; + unsigned OpSrcValueAlign; bool IsOpLoad = FindAliasInfo(Chain.getNode(), OpPtr, OpSize, - OpSrcValue, OpSrcValueOffset); + OpSrcValue, OpSrcValueOffset, + OpSrcValueAlign); // If chain is alias then stop here. if (!(IsLoad && IsOpLoad) && - isAlias(Ptr, Size, SrcValue, SrcValueOffset, - OpPtr, OpSize, OpSrcValue, OpSrcValueOffset)) { + isAlias(Ptr, Size, SrcValue, SrcValueOffset, SrcValueAlign, + OpPtr, OpSize, OpSrcValue, OpSrcValueOffset, + OpSrcValueAlign)) { Aliases.push_back(Chain); } else { // Look further up the chain. Chains.push_back(Chain.getOperand(0)); - // Clean up old chain. - AddToWorkList(Chain.getNode()); + ++Depth; } break; } case ISD::TokenFactor: - // We have to check each of the operands of the token factor, so we queue - // then up. Adding the operands to the queue (stack) in reverse order - // maintains the original order and increases the likelihood that getNode - // will find a matching token factor (CSE.) + // We have to check each of the operands of the token factor for "small" + // token factors, so we queue them up. Adding the operands to the queue + // (stack) in reverse order maintains the original order and increases the + // likelihood that getNode will find a matching token factor (CSE.) + if (Chain.getNumOperands() > 16) { + Aliases.push_back(Chain); + break; + } for (unsigned n = Chain.getNumOperands(); n;) Chains.push_back(Chain.getOperand(--n)); - // Eliminate the token factor if we can. - AddToWorkList(Chain.getNode()); + ++Depth; break; default: @@ -6131,21 +6309,17 @@ SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) { // If a single operand then chain to it. We don't need to revisit it. return Aliases[0]; } - + // Construct a custom tailored token factor. - SDValue NewChain = DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), MVT::Other, - &Aliases[0], Aliases.size()); - - // Make sure the old chain gets cleaned up. - if (NewChain != OldChain) AddToWorkList(OldChain.getNode()); - - return NewChain; + return DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), MVT::Other, + &Aliases[0], Aliases.size()); } // SelectionDAG::Combine - This is the entry point for the file. // -void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis &AA, bool Fast) { +void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis &AA, + CodeGenOpt::Level OptLevel) { /// run - This is the main entry point to this class. /// - DAGCombiner(*this, AA, Fast).Run(Level); + DAGCombiner(*this, AA, OptLevel).Run(Level); }