From: Chris Lattner Date: Thu, 12 Mar 2009 06:52:53 +0000 (+0000) Subject: Move 3 "(add (select cc, 0, c), x) -> (select cc, x, (add, x, c))" X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=d1980a5acd8509ea34ee2dec5e13de5dbe16af2d;p=oota-llvm.git Move 3 "(add (select cc, 0, c), x) -> (select cc, x, (add, x, c))" related transformations out of target-specific dag combine into the ARM backend. These were added by Evan in r37685 with no testcases and only seems to help ARM (e.g. test/CodeGen/ARM/select_xform.ll). Add some simple X86-specific (for now) DAG combines that turn things like cond ? 8 : 0 -> (zext(cond) << 3). This happens frequently with the recently added cp constant select optimization, but is a very general xform. For example, we now compile the second example in const-select.ll to: _test: movsd LCPI2_0, %xmm0 ucomisd 8(%esp), %xmm0 seta %al movzbl %al, %eax movl 4(%esp), %ecx movsbl (%ecx,%eax,4), %eax ret instead of: _test: movl 4(%esp), %eax leal 4(%eax), %ecx movsd LCPI2_0, %xmm0 ucomisd 8(%esp), %xmm0 cmovbe %eax, %ecx movsbl (%ecx), %eax ret This passes multisource and dejagnu. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@66779 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 993092e7f29..671c85e6d78 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -543,8 +543,10 @@ SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo, if (AddTo) { // Push the new nodes and any users onto the worklist for (unsigned i = 0, e = NumTo; i != e; ++i) { - AddToWorkList(To[i].getNode()); - AddUsersToWorkList(To[i].getNode()); + if (To[i].getNode()) { + AddToWorkList(To[i].getNode()); + AddUsersToWorkList(To[i].getNode()); + } } } @@ -944,65 +946,6 @@ SDValue combineShlAddConstant(DebugLoc DL, SDValue N0, SDValue N1, return SDValue(); } -static -SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp, - SelectionDAG &DAG, const TargetLowering &TLI, - bool LegalOperations) { - MVT VT = N->getValueType(0); - unsigned Opc = N->getOpcode(); - bool isSlctCC = Slct.getOpcode() == ISD::SELECT_CC; - SDValue LHS = isSlctCC ? Slct.getOperand(2) : Slct.getOperand(1); - SDValue RHS = isSlctCC ? Slct.getOperand(3) : Slct.getOperand(2); - ISD::CondCode CC = ISD::SETCC_INVALID; - - if (isSlctCC) { - CC = cast(Slct.getOperand(4))->get(); - } else { - SDValue CCOp = Slct.getOperand(0); - if (CCOp.getOpcode() == ISD::SETCC) - CC = cast(CCOp.getOperand(2))->get(); - } - - bool DoXform = false; - bool InvCC = false; - assert ((Opc == ISD::ADD || (Opc == ISD::SUB && Slct == N->getOperand(1))) && - "Bad input!"); - - if (LHS.getOpcode() == ISD::Constant && - cast(LHS)->isNullValue()) { - DoXform = true; - } else if (CC != ISD::SETCC_INVALID && - RHS.getOpcode() == ISD::Constant && - cast(RHS)->isNullValue()) { - std::swap(LHS, RHS); - SDValue Op0 = Slct.getOperand(0); - MVT OpVT = isSlctCC ? Op0.getValueType() : - Op0.getOperand(0).getValueType(); - bool isInt = OpVT.isInteger(); - CC = ISD::getSetCCInverse(CC, isInt); - - if (LegalOperations && !TLI.isCondCodeLegal(CC, OpVT)) - return SDValue(); // Inverse operator isn't legal. - - DoXform = true; - InvCC = true; - } - - if (DoXform) { - SDValue Result = DAG.getNode(Opc, RHS.getDebugLoc(), VT, OtherOp, RHS); - if (isSlctCC) - return DAG.getSelectCC(N->getDebugLoc(), OtherOp, Result, - Slct.getOperand(0), Slct.getOperand(1), CC); - SDValue CCOp = Slct.getOperand(0); - if (InvCC) - CCOp = DAG.getSetCC(Slct.getDebugLoc(), CCOp.getValueType(), - CCOp.getOperand(0), CCOp.getOperand(1), CC); - return DAG.getNode(ISD::SELECT, N->getDebugLoc(), VT, - CCOp, OtherOp, Result); - } - return SDValue(); -} - SDValue DAGCombiner::visitADD(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -1123,16 +1066,6 @@ SDValue DAGCombiner::visitADD(SDNode *N) { if (Result.getNode()) return Result; } - // fold (add (select cc, 0, c), x) -> (select cc, x, (add, x, c)) - if (N0.getOpcode() == ISD::SELECT && N0.getNode()->hasOneUse()) { - SDValue Result = combineSelectAndUse(N, N0, N1, DAG, TLI, LegalOperations); - if (Result.getNode()) return Result; - } - if (N1.getOpcode() == ISD::SELECT && N1.getNode()->hasOneUse()) { - SDValue Result = combineSelectAndUse(N, N1, N0, DAG, TLI, LegalOperations); - if (Result.getNode()) return Result; - } - return SDValue(); } @@ -1246,11 +1179,6 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { N0.getOperand(1).getOperand(1) == N1) return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N0.getOperand(0), N0.getOperand(1).getOperand(0)); - // fold (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c)) - if (N1.getOpcode() == ISD::SELECT && N1.getNode()->hasOneUse()) { - SDValue Result = combineSelectAndUse(N, N1, N0, DAG, TLI, LegalOperations); - if (Result.getNode()) return Result; - } // If either operand of a sub is undef, the result is undef if (N0.getOpcode() == ISD::UNDEF) diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index a6674b2916e..25dda0c4d67 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -256,7 +256,10 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) // We have target-specific dag combine patterns for the following nodes: // ARMISD::FMRRD - No need to call setTargetDAGCombine - + setTargetDAGCombine(ISD::ADD); + setTargetDAGCombine(ISD::SUB); + + setStackPointerRegisterToSaveRestore(ARM::SP); setSchedulingPreference(SchedulingForRegPressure); setIfCvtBlockSizeLimit(Subtarget->isThumb() ? 0 : 10); @@ -1562,6 +1565,102 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, // ARM Optimization Hooks //===----------------------------------------------------------------------===// +static +SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp, + TargetLowering::DAGCombinerInfo &DCI) { + + SelectionDAG &DAG = DCI.DAG; + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + MVT VT = N->getValueType(0); + unsigned Opc = N->getOpcode(); + bool isSlctCC = Slct.getOpcode() == ISD::SELECT_CC; + SDValue LHS = isSlctCC ? Slct.getOperand(2) : Slct.getOperand(1); + SDValue RHS = isSlctCC ? Slct.getOperand(3) : Slct.getOperand(2); + ISD::CondCode CC = ISD::SETCC_INVALID; + + if (isSlctCC) { + CC = cast(Slct.getOperand(4))->get(); + } else { + SDValue CCOp = Slct.getOperand(0); + if (CCOp.getOpcode() == ISD::SETCC) + CC = cast(CCOp.getOperand(2))->get(); + } + + bool DoXform = false; + bool InvCC = false; + assert ((Opc == ISD::ADD || (Opc == ISD::SUB && Slct == N->getOperand(1))) && + "Bad input!"); + + if (LHS.getOpcode() == ISD::Constant && + cast(LHS)->isNullValue()) { + DoXform = true; + } else if (CC != ISD::SETCC_INVALID && + RHS.getOpcode() == ISD::Constant && + cast(RHS)->isNullValue()) { + std::swap(LHS, RHS); + SDValue Op0 = Slct.getOperand(0); + MVT OpVT = isSlctCC ? Op0.getValueType() : + Op0.getOperand(0).getValueType(); + bool isInt = OpVT.isInteger(); + CC = ISD::getSetCCInverse(CC, isInt); + + if (!TLI.isCondCodeLegal(CC, OpVT)) + return SDValue(); // Inverse operator isn't legal. + + DoXform = true; + InvCC = true; + } + + if (DoXform) { + SDValue Result = DAG.getNode(Opc, RHS.getDebugLoc(), VT, OtherOp, RHS); + if (isSlctCC) + return DAG.getSelectCC(N->getDebugLoc(), OtherOp, Result, + Slct.getOperand(0), Slct.getOperand(1), CC); + SDValue CCOp = Slct.getOperand(0); + if (InvCC) + CCOp = DAG.getSetCC(Slct.getDebugLoc(), CCOp.getValueType(), + CCOp.getOperand(0), CCOp.getOperand(1), CC); + return DAG.getNode(ISD::SELECT, N->getDebugLoc(), VT, + CCOp, OtherOp, Result); + } + return SDValue(); +} + +/// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD. +static SDValue PerformADDCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI) { + // added by evan in r37685 with no testcase. + SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); + + // fold (add (select cc, 0, c), x) -> (select cc, x, (add, x, c)) + if (N0.getOpcode() == ISD::SELECT && N0.getNode()->hasOneUse()) { + SDValue Result = combineSelectAndUse(N, N0, N1, DCI); + if (Result.getNode()) return Result; + } + if (N1.getOpcode() == ISD::SELECT && N1.getNode()->hasOneUse()) { + SDValue Result = combineSelectAndUse(N, N1, N0, DCI); + if (Result.getNode()) return Result; + } + + return SDValue(); +} + +/// PerformSUBCombine - Target-specific dag combine xforms for ISD::SUB. +static SDValue PerformSUBCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI) { + // added by evan in r37685 with no testcase. + SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); + + // fold (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c)) + if (N1.getOpcode() == ISD::SELECT && N1.getNode()->hasOneUse()) { + SDValue Result = combineSelectAndUse(N, N1, N0, DCI); + if (Result.getNode()) return Result; + } + + return SDValue(); +} + + /// PerformFMRRDCombine - Target-specific dag combine xforms for ARMISD::FMRRD. static SDValue PerformFMRRDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { @@ -1576,6 +1675,8 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { switch (N->getOpcode()) { default: break; + case ISD::ADD: return PerformADDCombine(N, DCI); + case ISD::SUB: return PerformSUBCombine(N, DCI); case ARMISD::FMRRD: return PerformFMRRDCombine(N, DCI); } diff --git a/lib/Target/X86/README.txt b/lib/Target/X86/README.txt index 5d0446f0b2b..a2cf079e0b1 100644 --- a/lib/Target/X86/README.txt +++ b/lib/Target/X86/README.txt @@ -1842,28 +1842,6 @@ signed or unsigned overflow, respectively. //===---------------------------------------------------------------------===// -test/CodeGen/X86/2009-03-07-FPConstSelect.ll compiles to: - -_f: - xorl %eax, %eax - cmpl $0, 4(%esp) - movl $4, %ecx - cmovne %eax, %ecx - flds LCPI1_0(%ecx) - ret - -we should recognize cmov of 0 and a power of two and compile it into a -setcc+shift. This would give us something like: - -_f: - xorl %eax,%eax - cmpl $0, 4(%esp) - seteq %al - flds LCPI1_0(%ecx, %eax,4) - ret - -//===---------------------------------------------------------------------===// - memcpy/memmove do not lower to SSE copies when possible. A silly example is: define <16 x float> @foo(<16 x float> %A) nounwind { %tmp = alloca <16 x float>, align 16 diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index f15acd73078..dfa2a090985 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -5710,7 +5710,8 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) { !isScalarFPTypeInSSEReg(VT)) // FPStack? IllegalFPCMov = !hasFPCMov(cast(CC)->getSExtValue()); - if ((isX86LogicalCmp(Cmp) && !IllegalFPCMov) || Opc == X86ISD::BT) { // FIXME + if ((isX86LogicalCmp(Cmp) && !IllegalFPCMov) || + Opc == X86ISD::BT) { // FIXME Cond = Cmp; addTest = false; } @@ -8185,9 +8186,89 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, return DAG.getNode(Opcode, DL, N->getValueType(0), LHS, RHS); } + // If this is a select between two integer constants, try to do some + // optimizations. + if (ConstantSDNode *LHSC = dyn_cast(LHS)) { + if (ConstantSDNode *RHSC = dyn_cast(RHS)) + // Don't do this for crazy integer types. + if (DAG.getTargetLoweringInfo().isTypeLegal(LHS.getValueType())) { + // If this is efficiently invertible, canonicalize the LHSC/RHSC values + // so that LHSC (the true value) is larger than RHSC (the false value). + bool NeedsCondInvert = false; + + if (LHSC->getAPIntValue().ult(RHSC->getAPIntValue()) && + // Efficiently invertible. + (Cond.getOpcode() == ISD::SETCC || // setcc -> invertible. + (Cond.getOpcode() == ISD::XOR && // xor(X, C) -> invertible. + isa(Cond.getOperand(1))))) { + NeedsCondInvert = true; + std::swap(LHSC, RHSC); + } + + // Optimize C ? 8 : 0 -> zext(C) << 3. Likewise for any pow2/0. + if (RHSC->getAPIntValue() == 0 && LHSC->getAPIntValue().isPowerOf2()) { + if (NeedsCondInvert) // Invert the condition if needed. + Cond = DAG.getNode(ISD::XOR, DL, Cond.getValueType(), Cond, + DAG.getConstant(1, Cond.getValueType())); + + // Zero extend the condition if needed. + Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, LHS.getValueType(), Cond); + + unsigned ShAmt = LHSC->getAPIntValue().logBase2(); + return DAG.getNode(ISD::SHL, DL, LHS.getValueType(), Cond, + DAG.getConstant(ShAmt, MVT::i8)); + } + } + } + + return SDValue(); +} + +/// Optimize X86ISD::CMOV [LHS, RHS, CONDCODE (e.g. X86::COND_NE), CONDVAL] +static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI) { + DebugLoc DL = N->getDebugLoc(); + + // If the flag operand isn't dead, don't touch this CMOV. + if (N->getNumValues() == 2 && !SDValue(N, 1).use_empty()) + return SDValue(); + + // If this is a select between two integer constants, try to do some + // optimizations. Note that the operands are ordered the opposite of SELECT + // operands. + if (ConstantSDNode *TrueC = dyn_cast(N->getOperand(1))) { + if (ConstantSDNode *FalseC = dyn_cast(N->getOperand(0))) { + // Canonicalize the TrueC/FalseC values so that TrueC (the true value) is + // larger than FalseC (the false value). + X86::CondCode CC = (X86::CondCode)N->getConstantOperandVal(2); + + if (TrueC->getAPIntValue().ult(FalseC->getAPIntValue())) { + CC = X86::GetOppositeBranchCondition(CC); + std::swap(TrueC, FalseC); + } + + // Optimize C ? 8 : 0 -> zext(setcc(C)) << 3. Likewise for any pow2/0. + if (FalseC->getAPIntValue() == 0 && TrueC->getAPIntValue().isPowerOf2()) { + SDValue Cond = N->getOperand(3); + Cond = DAG.getNode(X86ISD::SETCC, DL, MVT::i8, + DAG.getConstant(CC, MVT::i8), Cond); + + // Zero extend the condition if needed. + Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, TrueC->getValueType(0), Cond); + + unsigned ShAmt = TrueC->getAPIntValue().logBase2(); + Cond = DAG.getNode(ISD::SHL, DL, Cond.getValueType(), Cond, + DAG.getConstant(ShAmt, MVT::i8)); + if (N->getNumValues() == 2) // Dead flag value? + return DCI.CombineTo(N, Cond, SDValue()); + return Cond; + } + } + } return SDValue(); } + /// PerformShiftCombine - Transforms vector shift nodes to use vector shifts /// when possible. static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG, @@ -8448,6 +8529,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case ISD::BUILD_VECTOR: return PerformBuildVectorCombine(N, DAG, DCI, Subtarget, *this); case ISD::SELECT: return PerformSELECTCombine(N, DAG, Subtarget); + case X86ISD::CMOV: return PerformCMOVCombine(N, DAG, DCI); case ISD::SHL: case ISD::SRA: case ISD::SRL: return PerformShiftCombine(N, DAG, Subtarget); diff --git a/test/CodeGen/X86/const-select.ll b/test/CodeGen/X86/const-select.ll new file mode 100644 index 00000000000..6e3156beb0f --- /dev/null +++ b/test/CodeGen/X86/const-select.ll @@ -0,0 +1,22 @@ + +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" +target triple = "i386-apple-darwin7" + +; RUN: llvm-as < %s | llc | grep {LCPI1_0(,%eax,4)} +define float @f(i32 %x) nounwind readnone { +entry: + %0 = icmp eq i32 %x, 0 ; [#uses=1] + %iftmp.0.0 = select i1 %0, float 4.200000e+01, float 2.300000e+01 ; [#uses=1] + ret float %iftmp.0.0 +} + +; RUN: llvm-as < %s | llc | grep {movsbl.*(%e.x,%e.x,4), %eax} +define signext i8 @test(i8* nocapture %P, double %F) nounwind readonly { +entry: + %0 = fcmp olt double %F, 4.200000e+01 ; [#uses=1] + %iftmp.0.0 = select i1 %0, i32 4, i32 0 ; [#uses=1] + %1 = getelementptr i8* %P, i32 %iftmp.0.0 ; [#uses=1] + %2 = load i8* %1, align 1 ; [#uses=1] + ret i8 %2 +} + diff --git a/test/CodeGen/X86/mul-legalize.ll b/test/CodeGen/X86/mul-legalize.ll index 597806f2f9c..487614f74dd 100644 --- a/test/CodeGen/X86/mul-legalize.ll +++ b/test/CodeGen/X86/mul-legalize.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -march=x86 | grep cmov | count 1 +; RUN: llvm-as < %s | llc -march=x86 | grep 24576 ; PR2135 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32" @@ -19,9 +19,6 @@ return: ret void } -define i1 @report__equal(i32 %x, i32 %y) nounwind { - %tmp = icmp eq i32 %x, %y - ret i1 %tmp -} +declare i1 @report__equal(i32 %x, i32 %y) nounwind; declare void @abort()