From c892aeb26601cc5109490d30c7e170cb07f84428 Mon Sep 17 00:00:00 2001 From: Evan Cheng Date: Thu, 23 Feb 2012 01:19:06 +0000 Subject: [PATCH] Optimize a couple of common patterns involving conditional moves where the false value is zero. Instead of a cmov + op, issue an conditional op instead. e.g. cmp r9, r4 mov r4, #0 moveq r4, #1 orr lr, lr, r4 should be: cmp r9, r4 orreq lr, lr, #1 That is, optimize (or x, (cmov 0, y, cond)) to (or.cond x, y). Similarly extend this to xor as well as (and x, (cmov -1, y, cond)) => (and.cond x, y). It's possible to extend this to ADD and SUB but I don't think they are common. rdar://8659097 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@151224 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMISelDAGToDAG.cpp | 118 ++++++++++++++++++++++++++++- lib/Target/ARM/ARMISelLowering.cpp | 93 +++++++++++++++++++++-- lib/Target/ARM/ARMISelLowering.h | 4 + lib/Target/ARM/ARMInstrInfo.td | 67 ++++++++++++++++ lib/Target/ARM/ARMInstrThumb2.td | 38 ++++++++++ test/CodeGen/ARM/select_xform.ll | 46 +++++++++++ 6 files changed, 358 insertions(+), 8 deletions(-) diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index b25fcdea517..d4b58f2e9b2 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -244,6 +244,7 @@ private: /// SelectCMOVOp - Select CMOV instructions for ARM. SDNode *SelectCMOVOp(SDNode *N); + SDNode *SelectConditionalOp(SDNode *N); SDNode *SelectT2CMOVShiftOp(SDNode *N, SDValue FalseVal, SDValue TrueVal, ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag); @@ -2302,9 +2303,6 @@ SDNode *ARMDAGToDAGISel::SelectCMOVOp(SDNode *N) { // Pattern: (ARMcmov:i32 GPR:i32:$false, so_reg:i32:$true, (imm:i32):$cc) // Emits: (MOVCCs:i32 GPR:i32:$false, so_reg:i32:$true, (imm:i32):$cc) // Pattern complexity = 18 cost = 1 size = 0 - SDValue CPTmp0; - SDValue CPTmp1; - SDValue CPTmp2; if (Subtarget->isThumb()) { SDNode *Res = SelectT2CMOVShiftOp(N, FalseVal, TrueVal, CCVal, CCR, InFlag); @@ -2377,6 +2375,116 @@ SDNode *ARMDAGToDAGISel::SelectCMOVOp(SDNode *N) { return CurDAG->SelectNodeTo(N, Opc, VT, Ops, 5); } +SDNode *ARMDAGToDAGISel::SelectConditionalOp(SDNode *N) { + EVT VT = N->getValueType(0); + SDValue FalseVal = N->getOperand(0); + SDValue TrueVal = N->getOperand(1); + ARMCC::CondCodes CCVal = + (ARMCC::CondCodes)cast(N->getOperand(2))->getZExtValue(); + SDValue CCR = N->getOperand(3); + assert(CCR.getOpcode() == ISD::Register); + SDValue InFlag = N->getOperand(4); + SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32); + SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); + + if (Subtarget->isThumb()) { + SDValue CPTmp0; + SDValue CPTmp1; + if (SelectT2ShifterOperandReg(TrueVal, CPTmp0, CPTmp1)) { + unsigned Opc; + switch (N->getOpcode()) { + default: llvm_unreachable("Unexpected node"); + case ARMISD::CAND: Opc = ARM::t2ANDCCrs; break; + case ARMISD::COR: Opc = ARM::t2ORRCCrs; break; + case ARMISD::CXOR: Opc = ARM::t2EORCCrs; break; + } + SDValue Ops[] = { FalseVal, CPTmp0, CPTmp1, CC, CCR, Reg0, InFlag }; + return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 7); + } + + ConstantSDNode *T = dyn_cast(TrueVal); + if (T) { + unsigned TrueImm = T->getZExtValue(); + if (is_t2_so_imm(TrueImm)) { + unsigned Opc; + switch (N->getOpcode()) { + default: llvm_unreachable("Unexpected node"); + case ARMISD::CAND: Opc = ARM::t2ANDCCri; break; + case ARMISD::COR: Opc = ARM::t2ORRCCri; break; + case ARMISD::CXOR: Opc = ARM::t2EORCCri; break; + } + SDValue True = CurDAG->getTargetConstant(TrueImm, MVT::i32); + SDValue Ops[] = { FalseVal, True, CC, CCR, Reg0, InFlag }; + return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 6); + } + } + + unsigned Opc; + switch (N->getOpcode()) { + default: llvm_unreachable("Unexpected node"); + case ARMISD::CAND: Opc = ARM::t2ANDCCrr; break; + case ARMISD::COR: Opc = ARM::t2ORRCCrr; break; + case ARMISD::CXOR: Opc = ARM::t2EORCCrr; break; + } + SDValue Ops[] = { FalseVal, TrueVal, CC, CCR, Reg0, InFlag }; + return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 6); + } + + SDValue CPTmp0; + SDValue CPTmp1; + SDValue CPTmp2; + if (SelectImmShifterOperand(TrueVal, CPTmp0, CPTmp2)) { + unsigned Opc; + switch (N->getOpcode()) { + default: llvm_unreachable("Unexpected node"); + case ARMISD::CAND: Opc = ARM::ANDCCrsi; break; + case ARMISD::COR: Opc = ARM::ORRCCrsi; break; + case ARMISD::CXOR: Opc = ARM::EORCCrsi; break; + } + SDValue Ops[] = { FalseVal, CPTmp0, CPTmp2, CC, CCR, Reg0, InFlag }; + return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 7); + } + + if (SelectRegShifterOperand(TrueVal, CPTmp0, CPTmp1, CPTmp2)) { + unsigned Opc; + switch (N->getOpcode()) { + default: llvm_unreachable("Unexpected node"); + case ARMISD::CAND: Opc = ARM::ANDCCrsr; break; + case ARMISD::COR: Opc = ARM::ORRCCrsr; break; + case ARMISD::CXOR: Opc = ARM::EORCCrsr; break; + } + SDValue Ops[] = { FalseVal, CPTmp0, CPTmp1, CPTmp2, CC, CCR, Reg0, InFlag }; + return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 8); + } + + ConstantSDNode *T = dyn_cast(TrueVal); + if (T) { + unsigned TrueImm = T->getZExtValue(); + if (is_so_imm(TrueImm)) { + unsigned Opc; + switch (N->getOpcode()) { + default: llvm_unreachable("Unexpected node"); + case ARMISD::CAND: Opc = ARM::ANDCCri; break; + case ARMISD::COR: Opc = ARM::ORRCCri; break; + case ARMISD::CXOR: Opc = ARM::EORCCri; break; + } + SDValue True = CurDAG->getTargetConstant(TrueImm, MVT::i32); + SDValue Ops[] = { FalseVal, True, CC, CCR, Reg0, InFlag }; + return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 6); + } + } + + unsigned Opc; + switch (N->getOpcode()) { + default: llvm_unreachable("Unexpected node"); + case ARMISD::CAND: Opc = ARM::ANDCCrr; break; + case ARMISD::COR: Opc = ARM::ORRCCrr; break; + case ARMISD::CXOR: Opc = ARM::EORCCrr; break; + } + SDValue Ops[] = { FalseVal, TrueVal, CC, CCR, Reg0, InFlag }; + return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 6); +} + /// Target-specific DAG combining for ISD::XOR. /// Target-independent combining lowers SELECT_CC nodes of the form /// select_cc setg[ge] X, 0, X, -X @@ -2714,6 +2822,10 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { } case ARMISD::CMOV: return SelectCMOVOp(N); + case ARMISD::CAND: + case ARMISD::COR: + case ARMISD::CXOR: + return SelectConditionalOp(N); case ARMISD::VZIP: { unsigned Opc = 0; EVT VT = N->getValueType(0); diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index dd0d2f8d07b..b0960c5baa3 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -794,10 +794,11 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setTargetDAGCombine(ISD::SUB); setTargetDAGCombine(ISD::MUL); - if (Subtarget->hasV6T2Ops() || Subtarget->hasNEON()) - setTargetDAGCombine(ISD::OR); - if (Subtarget->hasNEON()) + if (Subtarget->hasV6T2Ops() || Subtarget->hasNEON()) { setTargetDAGCombine(ISD::AND); + setTargetDAGCombine(ISD::OR); + setTargetDAGCombine(ISD::XOR); + } setStackPointerRegisterToSaveRestore(ARM::SP); @@ -890,7 +891,11 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::CMPFPw0: return "ARMISD::CMPFPw0"; case ARMISD::BCC_i64: return "ARMISD::BCC_i64"; case ARMISD::FMSTAT: return "ARMISD::FMSTAT"; + case ARMISD::CMOV: return "ARMISD::CMOV"; + case ARMISD::CAND: return "ARMISD::CAND"; + case ARMISD::COR: return "ARMISD::COR"; + case ARMISD::CXOR: return "ARMISD::CXOR"; case ARMISD::RBIT: return "ARMISD::RBIT"; @@ -6843,8 +6848,52 @@ static SDValue PerformMULCombine(SDNode *N, return SDValue(); } +static bool isCMOVWithZeroOrAllOnesLHS(SDValue N, bool AllOnes) { + if (N.getOpcode() != ARMISD::CMOV || !N.getNode()->hasOneUse()) + return false; + + SDValue FalseVal = N.getOperand(0); + ConstantSDNode *C = dyn_cast(FalseVal); + if (!C) + return false; + if (AllOnes) + return C->isAllOnesValue(); + return C->isNullValue(); +} + +/// formConditionalOp - Combine an operation with a conditional move operand +/// to form a conditional op. e.g. (or x, (cmov 0, y, cond)) => (or.cond x, y) +/// (and x, (cmov -1, y, cond)) => (and.cond, x, y) +static SDValue formConditionalOp(SDNode *N, SelectionDAG &DAG, + bool Commutable) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + + bool isAND = N->getOpcode() == ISD::AND; + bool isCand = isCMOVWithZeroOrAllOnesLHS(N1, isAND); + if (!isCand && Commutable) { + isCand = isCMOVWithZeroOrAllOnesLHS(N0, isAND); + if (isCand) + std::swap(N0, N1); + } + if (!isCand) + return SDValue(); + + unsigned Opc = 0; + switch (N->getOpcode()) { + default: llvm_unreachable("Unexpected node"); + case ISD::AND: Opc = ARMISD::CAND; break; + case ISD::OR: Opc = ARMISD::COR; break; + case ISD::XOR: Opc = ARMISD::CXOR; break; + } + return DAG.getNode(Opc, N->getDebugLoc(), N->getValueType(0), N0, + N1.getOperand(1), N1.getOperand(2), N1.getOperand(3), + N1.getOperand(4)); +} + static SDValue PerformANDCombine(SDNode *N, - TargetLowering::DAGCombinerInfo &DCI) { + TargetLowering::DAGCombinerInfo &DCI, + const ARMSubtarget *Subtarget) { // Attempt to use immediate-form VBIC BuildVectorSDNode *BVN = dyn_cast(N->getOperand(1)); @@ -6875,6 +6924,13 @@ static SDValue PerformANDCombine(SDNode *N, } } + if (!Subtarget->isThumb1Only()) { + // (and x, (cmov -1, y, cond)) => (and.cond x, y) + SDValue CAND = formConditionalOp(N, DAG, true); + if (CAND.getNode()) + return CAND; + } + return SDValue(); } @@ -6911,6 +6967,13 @@ static SDValue PerformORCombine(SDNode *N, } } + if (!Subtarget->isThumb1Only()) { + // (or x, (cmov 0, y, cond)) => (or.cond x, y) + SDValue COR = formConditionalOp(N, DAG, true); + if (COR.getNode()) + return COR; + } + SDValue N0 = N->getOperand(0); if (N0.getOpcode() != ISD::AND) return SDValue(); @@ -7059,6 +7122,25 @@ static SDValue PerformORCombine(SDNode *N, return SDValue(); } +static SDValue PerformXORCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, + const ARMSubtarget *Subtarget) { + EVT VT = N->getValueType(0); + SelectionDAG &DAG = DCI.DAG; + + if(!DAG.getTargetLoweringInfo().isTypeLegal(VT)) + return SDValue(); + + if (!Subtarget->isThumb1Only()) { + // (xor x, (cmov 0, y, cond)) => (xor.cond x, y) + SDValue CXOR = formConditionalOp(N, DAG, true); + if (CXOR.getNode()) + return CXOR; + } + + return SDValue(); +} + /// PerformBFICombine - (bfi A, (and B, Mask1), Mask2) -> (bfi A, B, Mask2) iff /// the bits being cleared by the AND are not demanded by the BFI. static SDValue PerformBFICombine(SDNode *N, @@ -8110,7 +8192,8 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, case ISD::SUB: return PerformSUBCombine(N, DCI); case ISD::MUL: return PerformMULCombine(N, DCI, Subtarget); case ISD::OR: return PerformORCombine(N, DCI, Subtarget); - case ISD::AND: return PerformANDCombine(N, DCI); + case ISD::XOR: return PerformXORCombine(N, DCI, Subtarget); + case ISD::AND: return PerformANDCombine(N, DCI, Subtarget); case ARMISD::BFI: return PerformBFICombine(N, DCI); case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI); case ARMISD::VMOVDRR: return PerformVMOVDRRCombine(N, DCI.DAG); diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index 527156aac41..a72a476e962 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -56,7 +56,11 @@ namespace llvm { CMPFP, // ARM VFP compare instruction, sets FPSCR. CMPFPw0, // ARM VFP compare against zero instruction, sets FPSCR. FMSTAT, // ARM fmstat instruction. + CMOV, // ARM conditional move instructions. + CAND, // ARM conditional and instructions. + COR, // ARM conditional or instructions. + CXOR, // ARM conditional xor instructions. BCC_i64, diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index ce3e7700dae..798cf52fd04 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -4084,6 +4084,73 @@ def MVNCCi : ARMPseudoInst<(outs GPR:$Rd), 4, IIC_iCMOVi, [/*(set GPR:$Rd, (ARMcmov GPR:$false, so_imm_not:$imm, imm:$cc, CCR:$ccr))*/]>, RegConstraint<"$false = $Rd">; + +let isCodeGenOnly = 1 in { +// Conditional instructions +multiclass AsI1_bincc_irs opcod, string opc, + InstrItinClass iii, InstrItinClass iir, InstrItinClass iis> { + def ri : AsI1, + RegConstraint<"$Rn = $Rd"> { + bits<4> Rd; + bits<4> Rn; + bits<12> imm; + let Inst{25} = 1; + let Inst{19-16} = Rn; + let Inst{15-12} = Rd; + let Inst{11-0} = imm; + } + def rr : AsI1, + RegConstraint<"$Rn = $Rd"> { + bits<4> Rd; + bits<4> Rn; + bits<4> Rm; + let Inst{25} = 0; + let Inst{19-16} = Rn; + let Inst{15-12} = Rd; + let Inst{11-4} = 0b00000000; + let Inst{3-0} = Rm; + } + + def rsi : AsI1, + RegConstraint<"$Rn = $Rd"> { + bits<4> Rd; + bits<4> Rn; + bits<12> shift; + let Inst{25} = 0; + let Inst{19-16} = Rn; + let Inst{15-12} = Rd; + let Inst{11-5} = shift{11-5}; + let Inst{4} = 0; + let Inst{3-0} = shift{3-0}; + } + + def rsr : AsI1, + RegConstraint<"$Rn = $Rd"> { + bits<4> Rd; + bits<4> Rn; + bits<12> shift; + let Inst{25} = 0; + let Inst{19-16} = Rn; + let Inst{15-12} = Rd; + let Inst{11-8} = shift{11-8}; + let Inst{7} = 0; + let Inst{6-5} = shift{6-5}; + let Inst{4} = 1; + let Inst{3-0} = shift{3-0}; + } +} // AsI1_bincc_irs + +defm ANDCC : AsI1_bincc_irs<0b0000, "and", IIC_iBITi, IIC_iBITr, IIC_iBITsr>; +defm ORRCC : AsI1_bincc_irs<0b1100, "orr", IIC_iBITi, IIC_iBITr, IIC_iBITsr>; +defm EORCC : AsI1_bincc_irs<0b0001, "eor", IIC_iBITi, IIC_iBITr, IIC_iBITsr>; + +} // isCodeGenOnly } // neverHasSideEffects //===----------------------------------------------------------------------===// diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index c15cbbedbff..555d8763d44 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -2943,6 +2943,44 @@ def t2MOVCCror : T2I_movcc_sh<0b11, (outs rGPR:$Rd), (ins rGPR:$false, rGPR:$Rm, i32imm:$imm), IIC_iCMOVsi, "ror", ".w\t$Rd, $Rm, $imm", []>, RegConstraint<"$false = $Rd">; + +multiclass T2I_bincc_irs opcod, string opc, + InstrItinClass iii, InstrItinClass iir, InstrItinClass iis> { + // shifted imm + def ri : T2sTwoRegImm<(outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_imm:$imm), + iii, opc, ".w\t$Rd, $Rn, $imm", []>, + RegConstraint<"$Rn = $Rd"> { + let Inst{31-27} = 0b11110; + let Inst{25} = 0; + let Inst{24-21} = opcod; + let Inst{15} = 0; + } + // register + def rr : T2sThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), + iir, opc, ".w\t$Rd, $Rn, $Rm", []>, + RegConstraint<"$Rn = $Rd"> { + let Inst{31-27} = 0b11101; + let Inst{26-25} = 0b01; + let Inst{24-21} = opcod; + let Inst{14-12} = 0b000; // imm3 + let Inst{7-6} = 0b00; // imm2 + let Inst{5-4} = 0b00; // type + } + // shifted register + def rs : T2sTwoRegShiftedReg<(outs rGPR:$Rd), + (ins rGPR:$Rn, t2_so_reg:$ShiftedRm), + iis, opc, ".w\t$Rd, $Rn, $ShiftedRm", []>, + RegConstraint<"$Rn = $Rd"> { + let Inst{31-27} = 0b11101; + let Inst{26-25} = 0b01; + let Inst{24-21} = opcod; + } +} // T2I_bincc_irs + +defm t2ANDCC : T2I_bincc_irs<0b0000, "and", IIC_iBITi, IIC_iBITr, IIC_iBITsi>; +defm t2ORRCC : T2I_bincc_irs<0b0010, "orr", IIC_iBITi, IIC_iBITr, IIC_iBITsi>; +defm t2EORCC : T2I_bincc_irs<0b0100, "eor", IIC_iBITi, IIC_iBITr, IIC_iBITsi>; + } // isCodeGenOnly = 1 } // neverHasSideEffects diff --git a/test/CodeGen/ARM/select_xform.ll b/test/CodeGen/ARM/select_xform.ll index 8a3133adf3e..3a66ec59bf5 100644 --- a/test/CodeGen/ARM/select_xform.ll +++ b/test/CodeGen/ARM/select_xform.ll @@ -58,3 +58,49 @@ define i32 @t4(i32 %a, i32 %b, i32 %x, i32 %y) nounwind { %s = or i32 %z, %y ret i32 %s } + +define i32 @t5(i32 %a, i32 %b, i32 %c) nounwind { +entry: +; ARM: t5: +; ARM-NOT: moveq +; ARM: orreq r2, r2, #1 + +; T2: t5: +; T2-NOT: moveq +; T2: orreq.w r2, r2, #1 + %tmp1 = icmp eq i32 %a, %b + %tmp2 = zext i1 %tmp1 to i32 + %tmp3 = or i32 %tmp2, %c + ret i32 %tmp3 +} + +define i32 @t6(i32 %a, i32 %b, i32 %c, i32 %d) nounwind { +; ARM: t6: +; ARM-NOT: movge +; ARM: eorlt r3, r3, r2 + +; T2: t6: +; T2-NOT: movge +; T2: eorlt.w r3, r3, r2 + %cond = icmp slt i32 %a, %b + %tmp1 = select i1 %cond, i32 %c, i32 0 + %tmp2 = xor i32 %tmp1, %d + ret i32 %tmp2 +} + +define i32 @t7(i32 %a, i32 %b, i32 %c) nounwind { +entry: +; ARM: t7: +; ARM-NOT: lsleq +; ARM: andeq r2, r2, r2, lsl #1 + +; T2: t7: +; T2-NOT: lsleq.w +; T2: andeq.w r2, r2, r2, lsl #1 + %tmp1 = shl i32 %c, 1 + %cond = icmp eq i32 %a, %b + %tmp2 = select i1 %cond, i32 %tmp1, i32 -1 + %tmp3 = and i32 %c, %tmp2 + ret i32 %tmp3 +} + -- 2.34.1