From: Dan Gohman Date: Fri, 18 Jun 2010 01:05:21 +0000 (+0000) Subject: Fold the ShrinkDemandedOps pass into the regular DAGCombiner pass, X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=8a7f7426eeb18fef58c3471db23fc829b67bc350;p=oota-llvm.git Fold the ShrinkDemandedOps pass into the regular DAGCombiner pass, which is faster, simpler, and less surprising. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@106263 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/llvm/CodeGen/SelectionDAGISel.h b/include/llvm/CodeGen/SelectionDAGISel.h index 38175808ab4..f22ac902de9 100644 --- a/include/llvm/CodeGen/SelectionDAGISel.h +++ b/include/llvm/CodeGen/SelectionDAGISel.h @@ -292,7 +292,6 @@ private: MachineBasicBlock *CodeGenAndEmitDAG(MachineBasicBlock *BB); void LowerArguments(const BasicBlock *BB); - void ShrinkDemandedOps(); void ComputeLiveOutVRegInfo(); /// Create the scheduler. If a specific scheduler was specified diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h index 5efebe637e1..d2646fd09da 100644 --- a/include/llvm/Target/TargetLowering.h +++ b/include/llvm/Target/TargetLowering.h @@ -766,14 +766,12 @@ public: SelectionDAG &DAG; bool LegalTys; bool LegalOps; - bool ShrinkOps; SDValue Old; SDValue New; explicit TargetLoweringOpt(SelectionDAG &InDAG, - bool LT, bool LO, - bool Shrink = false) : - DAG(InDAG), LegalTys(LT), LegalOps(LO), ShrinkOps(Shrink) {} + bool LT, bool LO) : + DAG(InDAG), LegalTys(LT), LegalOps(LO) {} bool LegalTypes() const { return LegalTys; } bool LegalOperations() const { return LegalOps; } diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 6bddd784fe8..19b169ce444 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -2028,7 +2028,7 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { // fold (OP (zext x), (zext y)) -> (zext (OP x, y)) // fold (OP (sext x), (sext y)) -> (sext (OP x, y)) // fold (OP (aext x), (aext y)) -> (aext (OP x, y)) - // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) + // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) (if trunc isn't free) // // do not sink logical op inside of a vector extend, since it may combine // into a vsetcc. @@ -2038,7 +2038,10 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { // Avoid infinite looping with PromoteIntBinOp. (N0.getOpcode() == ISD::ANY_EXTEND && (!LegalTypes || TLI.isTypeDesirableForOp(N->getOpcode(), Op0VT))) || - (N0.getOpcode() == ISD::TRUNCATE && TLI.isTypeLegal(Op0VT))) && + (N0.getOpcode() == ISD::TRUNCATE && + (!TLI.isZExtFree(VT, Op0VT) || + !TLI.isTruncateFree(Op0VT, VT)) && + TLI.isTypeLegal(Op0VT))) && !VT.isVector() && Op0VT == N1.getOperand(0).getValueType() && (!LegalOperations || TLI.isOperationLegal(N->getOpcode(), Op0VT))) { @@ -2425,6 +2428,11 @@ SDValue DAGCombiner::visitOR(SDNode *N) { if (SDNode *Rot = MatchRotate(N0, N1, N->getDebugLoc())) return SDValue(Rot, 0); + // Simplify the operands using demanded-bits information. + if (!VT.isVector() && + SimplifyDemandedBits(SDValue(N, 0))) + return SDValue(N, 0); + return SDValue(); } @@ -3158,6 +3166,11 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { return NewSRL; } + // Attempt to convert a srl of a load into a narrower zero-extending load. + SDValue NarrowLoad = ReduceLoadWidth(N); + if (NarrowLoad.getNode()) + return NarrowLoad; + // Here is a common situation. We want to optimize: // // %a = ... @@ -3635,10 +3648,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { // fold (zext (truncate x)) -> (and x, mask) if (N0.getOpcode() == ISD::TRUNCATE && - (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) && - (!TLI.isTruncateFree(N0.getOperand(0).getValueType(), - N0.getValueType()) || - !TLI.isZExtFree(N0.getValueType(), VT))) { + (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT))) { SDValue Op = N0.getOperand(0); if (Op.getValueType().bitsLT(VT)) { Op = DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, Op); @@ -4024,6 +4034,7 @@ SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) { /// extended, also fold the extension to form a extending load. SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { unsigned Opc = N->getOpcode(); + ISD::LoadExtType ExtType = ISD::NON_EXTLOAD; SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); @@ -4040,6 +4051,15 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { ExtVT = cast(N->getOperand(1))->getVT(); if (LegalOperations && !TLI.isLoadExtLegal(ISD::SEXTLOAD, ExtVT)) return SDValue(); + } else if (Opc == ISD::SRL) { + // Annother special-case: SRL is basically zero-extending a narrower + // value. + ExtType = ISD::ZEXTLOAD; + N0 = SDValue(N, 0); + ConstantSDNode *N01 = dyn_cast(N0.getOperand(1)); + if (!N01) return SDValue(); + ExtVT = EVT::getIntegerVT(*DAG.getContext(), + VT.getSizeInBits() - N01->getZExtValue()); } unsigned EVTBits = ExtVT.getSizeInBits(); @@ -4243,8 +4263,17 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { // fold (truncate (load x)) -> (smaller load x) // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits)) - if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) - return ReduceLoadWidth(N); + if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) { + SDValue Reduced = ReduceLoadWidth(N); + if (Reduced.getNode()) + return Reduced; + } + + // Simplify the operands using demanded-bits information. + if (!VT.isVector() && + SimplifyDemandedBits(SDValue(N, 0))) + return SDValue(N, 0); + return SDValue(); } diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 49f31012a39..ca3746cdaf8 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -2474,10 +2474,18 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, VT.getVectorNumElements() == Operand.getValueType().getVectorNumElements()) && "Vector element count mismatch!"); + if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND || OpOpcode == ISD::ANY_EXTEND) // (ext (zext x)) -> (zext x) and (ext (sext x)) -> (sext x) return getNode(OpOpcode, DL, VT, Operand.getNode()->getOperand(0)); + + // (ext (trunx x)) -> x + if (OpOpcode == ISD::TRUNCATE) { + SDValue OpOp = Operand.getNode()->getOperand(0); + if (OpOp.getValueType() == VT) + return OpOp; + } break; case ISD::TRUNCATE: assert(VT.isInteger() && Operand.getValueType().isInteger() && diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 2ee5f0b4bfe..48b4d896bc4 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -372,102 +372,6 @@ public: }; } -/// TrivialTruncElim - Eliminate some trivial nops that can result from -/// ShrinkDemandedOps: (trunc (ext n)) -> n. -static bool TrivialTruncElim(SDValue Op, - TargetLowering::TargetLoweringOpt &TLO) { - SDValue N0 = Op.getOperand(0); - EVT VT = Op.getValueType(); - if ((N0.getOpcode() == ISD::ZERO_EXTEND || - N0.getOpcode() == ISD::SIGN_EXTEND || - N0.getOpcode() == ISD::ANY_EXTEND) && - N0.getOperand(0).getValueType() == VT) { - return TLO.CombineTo(Op, N0.getOperand(0)); - } - return false; -} - -/// ShrinkDemandedOps - A late transformation pass that shrink expressions -/// using TargetLowering::TargetLoweringOpt::ShrinkDemandedOp. It converts -/// x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free. -void SelectionDAGISel::ShrinkDemandedOps() { - SmallVector Worklist; - SmallPtrSet InWorklist; - - // Add all the dag nodes to the worklist. - Worklist.reserve(CurDAG->allnodes_size()); - for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), - E = CurDAG->allnodes_end(); I != E; ++I) { - Worklist.push_back(I); - InWorklist.insert(I); - } - - TargetLowering::TargetLoweringOpt TLO(*CurDAG, true, true, true); - while (!Worklist.empty()) { - SDNode *N = Worklist.pop_back_val(); - InWorklist.erase(N); - - if (N->use_empty() && N != CurDAG->getRoot().getNode()) { - // Deleting this node may make its operands dead, add them to the worklist - // if they aren't already there. - for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) - if (InWorklist.insert(N->getOperand(i).getNode())) - Worklist.push_back(N->getOperand(i).getNode()); - - CurDAG->DeleteNode(N); - continue; - } - - // Run ShrinkDemandedOp on scalar binary operations. - if (N->getNumValues() != 1 || - !N->getValueType(0).isSimple() || !N->getValueType(0).isInteger()) - continue; - - unsigned BitWidth = N->getValueType(0).getScalarType().getSizeInBits(); - APInt Demanded = APInt::getAllOnesValue(BitWidth); - APInt KnownZero, KnownOne; - if (!TLI.SimplifyDemandedBits(SDValue(N, 0), Demanded, - KnownZero, KnownOne, TLO) && - (N->getOpcode() != ISD::TRUNCATE || - !TrivialTruncElim(SDValue(N, 0), TLO))) - continue; - - // Revisit the node. - assert(!InWorklist.count(N) && "Already in worklist"); - Worklist.push_back(N); - InWorklist.insert(N); - - // Replace the old value with the new one. - DEBUG(errs() << "\nShrinkDemandedOps replacing "; - TLO.Old.getNode()->dump(CurDAG); - errs() << "\nWith: "; - TLO.New.getNode()->dump(CurDAG); - errs() << '\n'); - - if (InWorklist.insert(TLO.New.getNode())) - Worklist.push_back(TLO.New.getNode()); - - SDOPsWorkListRemover DeadNodes(Worklist, InWorklist); - CurDAG->ReplaceAllUsesOfValueWith(TLO.Old, TLO.New, &DeadNodes); - - if (!TLO.Old.getNode()->use_empty()) continue; - - for (unsigned i = 0, e = TLO.Old.getNode()->getNumOperands(); - i != e; ++i) { - SDNode *OpNode = TLO.Old.getNode()->getOperand(i).getNode(); - if (OpNode->hasOneUse()) { - // Add OpNode to the end of the list to revisit. - DeadNodes.RemoveFromWorklist(OpNode); - Worklist.push_back(OpNode); - InWorklist.insert(OpNode); - } - } - - DeadNodes.RemoveFromWorklist(TLO.Old.getNode()); - CurDAG->DeleteNode(TLO.Old.getNode()); - } -} - void SelectionDAGISel::ComputeLiveOutVRegInfo() { SmallPtrSet VisitedNodes; SmallVector Worklist; @@ -636,10 +540,8 @@ MachineBasicBlock *SelectionDAGISel::CodeGenAndEmitDAG(MachineBasicBlock *BB) { DEBUG(dbgs() << "Optimized legalized selection DAG:\n"); DEBUG(CurDAG->dump()); - if (OptLevel != CodeGenOpt::None) { - ShrinkDemandedOps(); + if (OptLevel != CodeGenOpt::None) ComputeLiveOutVRegInfo(); - } if (ViewISelDAGs) CurDAG->viewGraph("isel input for " + BlockName); diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 1cca100d56f..730bfda2046 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -1042,7 +1042,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, if (TLO.ShrinkDemandedConstant(Op, ~KnownZero2 & NewMask)) return true; // If the operation can be done in a smaller type, do so. - if (TLO.ShrinkOps && TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl)) + if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl)) return true; // Output known-1 bits are only known if set in both the LHS & RHS. @@ -1076,7 +1076,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, if (TLO.ShrinkDemandedConstant(Op, NewMask)) return true; // If the operation can be done in a smaller type, do so. - if (TLO.ShrinkOps && TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl)) + if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl)) return true; // Output known-0 bits are only known if clear in both the LHS & RHS. @@ -1101,7 +1101,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, if ((KnownZero2 & NewMask) == NewMask) return TLO.CombineTo(Op, Op.getOperand(1)); // If the operation can be done in a smaller type, do so. - if (TLO.ShrinkOps && TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl)) + if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl)) return true; // If all of the unknown bits are known to be zero on one side or the other @@ -1548,7 +1548,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, KnownOne2, TLO, Depth+1)) return true; // See if the operation should be performed at a smaller bit width. - if (TLO.ShrinkOps && TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl)) + if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl)) return true; } // FALL THROUGH diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 99070668329..f5e0b225dbb 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -9641,8 +9641,10 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG, if (ShAmt1.getOpcode() == ISD::SUB) { SDValue Sum = ShAmt1.getOperand(0); if (ConstantSDNode *SumC = dyn_cast(Sum)) { - if (SumC->getSExtValue() == Bits && - ShAmt1.getOperand(1) == ShAmt0) + SDValue ShAmt1Op1 = ShAmt1.getOperand(1); + if (ShAmt1Op1.getNode()->getOpcode() == ISD::TRUNCATE) + ShAmt1Op1 = ShAmt1Op1.getOperand(0); + if (SumC->getSExtValue() == Bits && ShAmt1Op1 == ShAmt0) return DAG.getNode(Opc, DL, VT, Op0, Op1, DAG.getNode(ISD::TRUNCATE, DL, diff --git a/test/CodeGen/X86/shift-folding.ll b/test/CodeGen/X86/shift-folding.ll index 872817fd495..48ca36ca981 100644 --- a/test/CodeGen/X86/shift-folding.ll +++ b/test/CodeGen/X86/shift-folding.ll @@ -21,3 +21,8 @@ define i32* @test3(i32* %P, i32 %X) { ret i32* %P2 } +define fastcc i32 @test4(i32* %d) nounwind { + %tmp4 = load i32* %d + %tmp512 = lshr i32 %tmp4, 24 + ret i32 %tmp512 +} diff --git a/test/CodeGen/X86/store-narrow.ll b/test/CodeGen/X86/store-narrow.ll index b1100fa960c..5682e7caf8b 100644 --- a/test/CodeGen/X86/store-narrow.ll +++ b/test/CodeGen/X86/store-narrow.ll @@ -67,7 +67,7 @@ entry: ; X64: movw %si, 2(%rdi) ; X32: test4: -; X32: movzwl 8(%esp), %eax +; X32: movl 8(%esp), %eax ; X32: movw %ax, 2(%{{.*}}) }