From: Daniel Dunbar Date: Wed, 23 Jun 2010 17:09:26 +0000 (+0000) Subject: Revert r106263, "Fold the ShrinkDemandedOps pass into the regular DAGCombiner pass... X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=cbe762b5d165c565feb98b745e93b71d208a1e36;p=oota-llvm.git Revert r106263, "Fold the ShrinkDemandedOps pass into the regular DAGCombiner pass,"... it was causing both 'file' (with clang) and 176.gcc (with llvm-gcc) to be miscompiled. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@106634 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/llvm/CodeGen/SelectionDAGISel.h b/include/llvm/CodeGen/SelectionDAGISel.h index f22ac902de9..38175808ab4 100644 --- a/include/llvm/CodeGen/SelectionDAGISel.h +++ b/include/llvm/CodeGen/SelectionDAGISel.h @@ -292,6 +292,7 @@ private: MachineBasicBlock *CodeGenAndEmitDAG(MachineBasicBlock *BB); void LowerArguments(const BasicBlock *BB); + void ShrinkDemandedOps(); void ComputeLiveOutVRegInfo(); /// Create the scheduler. If a specific scheduler was specified diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h index 7ba67e4c68d..b44fa71b90e 100644 --- a/include/llvm/Target/TargetLowering.h +++ b/include/llvm/Target/TargetLowering.h @@ -773,12 +773,14 @@ public: SelectionDAG &DAG; bool LegalTys; bool LegalOps; + bool ShrinkOps; SDValue Old; SDValue New; explicit TargetLoweringOpt(SelectionDAG &InDAG, - bool LT, bool LO) : - DAG(InDAG), LegalTys(LT), LegalOps(LO) {} + bool LT, bool LO, + bool Shrink = false) : + DAG(InDAG), LegalTys(LT), LegalOps(LO), ShrinkOps(Shrink) {} bool LegalTypes() const { return LegalTys; } bool LegalOperations() const { return LegalOps; } diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index a72415f9532..81a382b7e70 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -2030,7 +2030,7 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { // fold (OP (zext x), (zext y)) -> (zext (OP x, y)) // fold (OP (sext x), (sext y)) -> (sext (OP x, y)) // fold (OP (aext x), (aext y)) -> (aext (OP x, y)) - // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) (if trunc isn't free) + // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) // // do not sink logical op inside of a vector extend, since it may combine // into a vsetcc. @@ -2040,10 +2040,7 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { // Avoid infinite looping with PromoteIntBinOp. (N0.getOpcode() == ISD::ANY_EXTEND && (!LegalTypes || TLI.isTypeDesirableForOp(N->getOpcode(), Op0VT))) || - (N0.getOpcode() == ISD::TRUNCATE && - (!TLI.isZExtFree(VT, Op0VT) || - !TLI.isTruncateFree(Op0VT, VT)) && - TLI.isTypeLegal(Op0VT))) && + (N0.getOpcode() == ISD::TRUNCATE && TLI.isTypeLegal(Op0VT))) && !VT.isVector() && Op0VT == N1.getOperand(0).getValueType() && (!LegalOperations || TLI.isOperationLegal(N->getOpcode(), Op0VT))) { @@ -2430,11 +2427,6 @@ SDValue DAGCombiner::visitOR(SDNode *N) { if (SDNode *Rot = MatchRotate(N0, N1, N->getDebugLoc())) return SDValue(Rot, 0); - // Simplify the operands using demanded-bits information. - if (!VT.isVector() && - SimplifyDemandedBits(SDValue(N, 0))) - return SDValue(N, 0); - return SDValue(); } @@ -3168,11 +3160,6 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { return NewSRL; } - // Attempt to convert a srl of a load into a narrower zero-extending load. - SDValue NarrowLoad = ReduceLoadWidth(N); - if (NarrowLoad.getNode()) - return NarrowLoad; - // Here is a common situation. We want to optimize: // // %a = ... @@ -3650,7 +3637,10 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { // fold (zext (truncate x)) -> (and x, mask) if (N0.getOpcode() == ISD::TRUNCATE && - (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT))) { + (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) && + (!TLI.isTruncateFree(N0.getOperand(0).getValueType(), + N0.getValueType()) || + !TLI.isZExtFree(N0.getValueType(), VT))) { SDValue Op = N0.getOperand(0); if (Op.getValueType().bitsLT(VT)) { Op = DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, Op); @@ -4036,7 +4026,6 @@ SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) { /// extended, also fold the extension to form a extending load. SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { unsigned Opc = N->getOpcode(); - ISD::LoadExtType ExtType = ISD::NON_EXTLOAD; SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); @@ -4053,15 +4042,6 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { ExtVT = cast(N->getOperand(1))->getVT(); if (LegalOperations && !TLI.isLoadExtLegal(ISD::SEXTLOAD, ExtVT)) return SDValue(); - } else if (Opc == ISD::SRL) { - // Annother special-case: SRL is basically zero-extending a narrower - // value. - ExtType = ISD::ZEXTLOAD; - N0 = SDValue(N, 0); - ConstantSDNode *N01 = dyn_cast(N0.getOperand(1)); - if (!N01) return SDValue(); - ExtVT = EVT::getIntegerVT(*DAG.getContext(), - VT.getSizeInBits() - N01->getZExtValue()); } unsigned EVTBits = ExtVT.getSizeInBits(); @@ -4265,17 +4245,8 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { // fold (truncate (load x)) -> (smaller load x) // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits)) - if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) { - SDValue Reduced = ReduceLoadWidth(N); - if (Reduced.getNode()) - return Reduced; - } - - // Simplify the operands using demanded-bits information. - if (!VT.isVector() && - SimplifyDemandedBits(SDValue(N, 0))) - return SDValue(N, 0); - + if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) + return ReduceLoadWidth(N); return SDValue(); } diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 4cbfaed87fc..81970769bed 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -2470,18 +2470,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, VT.getVectorNumElements() == Operand.getValueType().getVectorNumElements()) && "Vector element count mismatch!"); - if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND || OpOpcode == ISD::ANY_EXTEND) // (ext (zext x)) -> (zext x) and (ext (sext x)) -> (sext x) return getNode(OpOpcode, DL, VT, Operand.getNode()->getOperand(0)); - - // (ext (trunx x)) -> x - if (OpOpcode == ISD::TRUNCATE) { - SDValue OpOp = Operand.getNode()->getOperand(0); - if (OpOp.getValueType() == VT) - return OpOp; - } break; case ISD::TRUNCATE: assert(VT.isInteger() && Operand.getValueType().isInteger() && diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 3b0ef18dd85..15c1ae5ff42 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -372,6 +372,102 @@ public: }; } +/// TrivialTruncElim - Eliminate some trivial nops that can result from +/// ShrinkDemandedOps: (trunc (ext n)) -> n. +static bool TrivialTruncElim(SDValue Op, + TargetLowering::TargetLoweringOpt &TLO) { + SDValue N0 = Op.getOperand(0); + EVT VT = Op.getValueType(); + if ((N0.getOpcode() == ISD::ZERO_EXTEND || + N0.getOpcode() == ISD::SIGN_EXTEND || + N0.getOpcode() == ISD::ANY_EXTEND) && + N0.getOperand(0).getValueType() == VT) { + return TLO.CombineTo(Op, N0.getOperand(0)); + } + return false; +} + +/// ShrinkDemandedOps - A late transformation pass that shrink expressions +/// using TargetLowering::TargetLoweringOpt::ShrinkDemandedOp. It converts +/// x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free. +void SelectionDAGISel::ShrinkDemandedOps() { + SmallVector Worklist; + SmallPtrSet InWorklist; + + // Add all the dag nodes to the worklist. + Worklist.reserve(CurDAG->allnodes_size()); + for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), + E = CurDAG->allnodes_end(); I != E; ++I) { + Worklist.push_back(I); + InWorklist.insert(I); + } + + TargetLowering::TargetLoweringOpt TLO(*CurDAG, true, true, true); + while (!Worklist.empty()) { + SDNode *N = Worklist.pop_back_val(); + InWorklist.erase(N); + + if (N->use_empty() && N != CurDAG->getRoot().getNode()) { + // Deleting this node may make its operands dead, add them to the worklist + // if they aren't already there. + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) + if (InWorklist.insert(N->getOperand(i).getNode())) + Worklist.push_back(N->getOperand(i).getNode()); + + CurDAG->DeleteNode(N); + continue; + } + + // Run ShrinkDemandedOp on scalar binary operations. + if (N->getNumValues() != 1 || + !N->getValueType(0).isSimple() || !N->getValueType(0).isInteger()) + continue; + + unsigned BitWidth = N->getValueType(0).getScalarType().getSizeInBits(); + APInt Demanded = APInt::getAllOnesValue(BitWidth); + APInt KnownZero, KnownOne; + if (!TLI.SimplifyDemandedBits(SDValue(N, 0), Demanded, + KnownZero, KnownOne, TLO) && + (N->getOpcode() != ISD::TRUNCATE || + !TrivialTruncElim(SDValue(N, 0), TLO))) + continue; + + // Revisit the node. + assert(!InWorklist.count(N) && "Already in worklist"); + Worklist.push_back(N); + InWorklist.insert(N); + + // Replace the old value with the new one. + DEBUG(errs() << "\nShrinkDemandedOps replacing "; + TLO.Old.getNode()->dump(CurDAG); + errs() << "\nWith: "; + TLO.New.getNode()->dump(CurDAG); + errs() << '\n'); + + if (InWorklist.insert(TLO.New.getNode())) + Worklist.push_back(TLO.New.getNode()); + + SDOPsWorkListRemover DeadNodes(Worklist, InWorklist); + CurDAG->ReplaceAllUsesOfValueWith(TLO.Old, TLO.New, &DeadNodes); + + if (!TLO.Old.getNode()->use_empty()) continue; + + for (unsigned i = 0, e = TLO.Old.getNode()->getNumOperands(); + i != e; ++i) { + SDNode *OpNode = TLO.Old.getNode()->getOperand(i).getNode(); + if (OpNode->hasOneUse()) { + // Add OpNode to the end of the list to revisit. + DeadNodes.RemoveFromWorklist(OpNode); + Worklist.push_back(OpNode); + InWorklist.insert(OpNode); + } + } + + DeadNodes.RemoveFromWorklist(TLO.Old.getNode()); + CurDAG->DeleteNode(TLO.Old.getNode()); + } +} + void SelectionDAGISel::ComputeLiveOutVRegInfo() { SmallPtrSet VisitedNodes; SmallVector Worklist; @@ -521,8 +617,10 @@ MachineBasicBlock *SelectionDAGISel::CodeGenAndEmitDAG(MachineBasicBlock *BB) { DEBUG(dbgs() << "Optimized legalized selection DAG:\n"; CurDAG->dump()); - if (OptLevel != CodeGenOpt::None) + if (OptLevel != CodeGenOpt::None) { + ShrinkDemandedOps(); ComputeLiveOutVRegInfo(); + } if (ViewISelDAGs) CurDAG->viewGraph("isel input for " + BlockName); diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index bfd689b4d1e..06f29b8e950 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -1075,7 +1075,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, if (TLO.ShrinkDemandedConstant(Op, ~KnownZero2 & NewMask)) return true; // If the operation can be done in a smaller type, do so. - if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl)) + if (TLO.ShrinkOps && TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl)) return true; // Output known-1 bits are only known if set in both the LHS & RHS. @@ -1109,7 +1109,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, if (TLO.ShrinkDemandedConstant(Op, NewMask)) return true; // If the operation can be done in a smaller type, do so. - if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl)) + if (TLO.ShrinkOps && TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl)) return true; // Output known-0 bits are only known if clear in both the LHS & RHS. @@ -1134,7 +1134,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, if ((KnownZero2 & NewMask) == NewMask) return TLO.CombineTo(Op, Op.getOperand(1)); // If the operation can be done in a smaller type, do so. - if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl)) + if (TLO.ShrinkOps && TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl)) return true; // If all of the unknown bits are known to be zero on one side or the other @@ -1581,7 +1581,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, KnownOne2, TLO, Depth+1)) return true; // See if the operation should be performed at a smaller bit width. - if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl)) + if (TLO.ShrinkOps && TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl)) return true; } // FALL THROUGH diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index dd1c4a179b4..bd95c856293 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -9627,10 +9627,8 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG, if (ShAmt1.getOpcode() == ISD::SUB) { SDValue Sum = ShAmt1.getOperand(0); if (ConstantSDNode *SumC = dyn_cast(Sum)) { - SDValue ShAmt1Op1 = ShAmt1.getOperand(1); - if (ShAmt1Op1.getNode()->getOpcode() == ISD::TRUNCATE) - ShAmt1Op1 = ShAmt1Op1.getOperand(0); - if (SumC->getSExtValue() == Bits && ShAmt1Op1 == ShAmt0) + if (SumC->getSExtValue() == Bits && + ShAmt1.getOperand(1) == ShAmt0) return DAG.getNode(Opc, DL, VT, Op0, Op1, DAG.getNode(ISD::TRUNCATE, DL, diff --git a/test/CodeGen/X86/shift-folding.ll b/test/CodeGen/X86/shift-folding.ll index 48ca36ca981..872817fd495 100644 --- a/test/CodeGen/X86/shift-folding.ll +++ b/test/CodeGen/X86/shift-folding.ll @@ -21,8 +21,3 @@ define i32* @test3(i32* %P, i32 %X) { ret i32* %P2 } -define fastcc i32 @test4(i32* %d) nounwind { - %tmp4 = load i32* %d - %tmp512 = lshr i32 %tmp4, 24 - ret i32 %tmp512 -} diff --git a/test/CodeGen/X86/store-narrow.ll b/test/CodeGen/X86/store-narrow.ll index 5682e7caf8b..b1100fa960c 100644 --- a/test/CodeGen/X86/store-narrow.ll +++ b/test/CodeGen/X86/store-narrow.ll @@ -67,7 +67,7 @@ entry: ; X64: movw %si, 2(%rdi) ; X32: test4: -; X32: movl 8(%esp), %eax +; X32: movzwl 8(%esp), %eax ; X32: movw %ax, 2(%{{.*}}) }