From e721f5c8d3ea2cc2cc8c3c308ce8bdd8a3fc3b32 Mon Sep 17 00:00:00 2001 From: Evan Cheng Date: Wed, 13 Jul 2011 00:42:17 +0000 Subject: [PATCH] Improve codegen for select's: if (x != 0) x = 1 if (x == 1) x = 1 Previous codegen looks like this: mov r1, r0 cmp r1, #1 mov r0, #0 moveq r0, #1 The naive lowering select between two different values. It should recognize the test is equality test so it's more a conditional move rather than a select: cmp r0, #1 movne r0, #0 rdar://9758317 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@135017 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMISelLowering.cpp | 67 +++++++++++++++++++++++++++++- lib/Target/ARM/ARMISelLowering.h | 1 + test/CodeGen/ARM/select-imm.ll | 36 ++++++++++++++++ test/CodeGen/Thumb2/thumb2-teq.ll | 38 ----------------- test/CodeGen/Thumb2/thumb2-teq2.ll | 16 ------- test/CodeGen/Thumb2/thumb2-tst.ll | 45 -------------------- test/CodeGen/Thumb2/thumb2-tst2.ll | 16 ------- 7 files changed, 103 insertions(+), 116 deletions(-) diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 4340a714778..cf8c5baa8e7 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -2754,7 +2754,7 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { SDValue ARMcc; SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl); - return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR,Cmp); + return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp); } ARMCC::CondCodes CondCode, CondCode2; @@ -6960,6 +6960,70 @@ static SDValue PerformSELECT_CCCombine(SDNode *N, SelectionDAG &DAG, return DAG.getNode(Opcode, N->getDebugLoc(), N->getValueType(0), LHS, RHS); } +/// PerformCMOVCombine - Target-specific DAG combining for ARMISD::CMOV. +SDValue +ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const { + SDValue Cmp = N->getOperand(4); + if (Cmp.getOpcode() != ARMISD::CMPZ) + // Only looking at EQ and NE cases. + return SDValue(); + + EVT VT = N->getValueType(0); + DebugLoc dl = N->getDebugLoc(); + SDValue LHS = Cmp.getOperand(0); + SDValue RHS = Cmp.getOperand(1); + SDValue FalseVal = N->getOperand(0); + SDValue TrueVal = N->getOperand(1); + SDValue ARMcc = N->getOperand(2); + ARMCC::CondCodes CC = (ARMCC::CondCodes)cast(ARMcc)->getZExtValue(); + + // Simplify + // mov r1, r0 + // cmp r1, x + // mov r0, y + // moveq r0, x + // to + // cmp r0, x + // movne r0, y + // + // mov r1, r0 + // cmp r1, x + // mov r0, x + // movne r0, y + // to + // cmp r0, x + // movne r0, y + /// FIXME: Turn this into a target neutral optimization? + SDValue Res; + if (CC == ARMCC::NE && FalseVal == RHS) { + Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, TrueVal, ARMcc, + N->getOperand(3), Cmp); + } else if (CC == ARMCC::EQ && TrueVal == RHS) { + SDValue ARMcc; + SDValue NewCmp = getARMCmp(LHS, RHS, ISD::SETNE, ARMcc, DAG, dl); + Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, FalseVal, ARMcc, + N->getOperand(3), NewCmp); + } + + if (Res.getNode()) { + APInt KnownZero, KnownOne; + APInt Mask = APInt::getAllOnesValue(VT.getScalarType().getSizeInBits()); + DAG.ComputeMaskedBits(SDValue(N,0), Mask, KnownZero, KnownOne); + // Capture demanded bits information that would be otherwise lost. + if (KnownZero == 0xfffffffe) + Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res, + DAG.getValueType(MVT::i1)); + else if (KnownZero == 0xffffff00) + Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res, + DAG.getValueType(MVT::i8)); + else if (KnownZero == 0xffff0000) + Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res, + DAG.getValueType(MVT::i16)); + } + + return Res; +} + SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { switch (N->getOpcode()) { @@ -6988,6 +7052,7 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, case ISD::ZERO_EXTEND: case ISD::ANY_EXTEND: return PerformExtendCombine(N, DCI.DAG, Subtarget); case ISD::SELECT_CC: return PerformSELECT_CCCombine(N, DCI.DAG, Subtarget); + case ARMISD::CMOV: return PerformCMOVCombine(N, DCI.DAG); case ARMISD::VLD2DUP: case ARMISD::VLD3DUP: case ARMISD::VLD4DUP: diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index dd9df0ead10..980fb404887 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -244,6 +244,7 @@ namespace llvm { EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const; + SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const; virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; bool isDesirableToTransformToIntegerOp(unsigned Opc, EVT VT) const; diff --git a/test/CodeGen/ARM/select-imm.ll b/test/CodeGen/ARM/select-imm.ll index 158c015f467..f43dde52bbf 100644 --- a/test/CodeGen/ARM/select-imm.ll +++ b/test/CodeGen/ARM/select-imm.ll @@ -76,3 +76,39 @@ entry: %1 = select i1 %0, i32 4283826005, i32 %x ret i32 %1 } + +; rdar://9758317 +define i32 @t5(i32 %a) nounwind { +entry: +; ARM: t5: +; ARM-NOT: mov +; ARM: cmp r0, #1 +; ARM-NOT: mov +; ARM: movne r0, #0 + +; THUMB2: t5: +; THUMB2-NOT: mov +; THUMB2: cmp r0, #1 +; THUMB2: it ne +; THUMB2: movne r0, #0 + %cmp = icmp eq i32 %a, 1 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @t6(i32 %a) nounwind { +entry: +; ARM: t6: +; ARM-NOT: mov +; ARM: cmp r0, #0 +; ARM: movne r0, #1 + +; THUMB2: t6: +; THUMB2-NOT: mov +; THUMB2: cmp r0, #0 +; THUMB2: it ne +; THUMB2: movne r0, #1 + %tobool = icmp ne i32 %a, 0 + %lnot.ext = zext i1 %tobool to i32 + ret i32 %lnot.ext +} diff --git a/test/CodeGen/Thumb2/thumb2-teq.ll b/test/CodeGen/Thumb2/thumb2-teq.ll index 566408a6048..00c928fc078 100644 --- a/test/CodeGen/Thumb2/thumb2-teq.ll +++ b/test/CodeGen/Thumb2/thumb2-teq.ll @@ -3,15 +3,6 @@ ; These tests implicitly depend on 'movs r0, #0' being rematerialized below the ; test as 'mov.w r0, #0'. So far, that requires physreg joining. -; 0x000000bb = 187 -define i1 @f1(i32 %a) { - %tmp = xor i32 %a, 187 - %tmp1 = icmp ne i32 %tmp, 0 - ret i1 %tmp1 -} -; CHECK: f1: -; CHECK: teq.w r0, #187 - ; 0x000000bb = 187 define i1 @f2(i32 %a) { %tmp = xor i32 %a, 187 @@ -30,24 +21,6 @@ define i1 @f3(i32 %a) { ; CHECK: f3: ; CHECK: teq.w r0, #11141290 -; 0x00aa00aa = 11141290 -define i1 @f4(i32 %a) { - %tmp = xor i32 %a, 11141290 - %tmp1 = icmp ne i32 0, %tmp - ret i1 %tmp1 -} -; CHECK: f4: -; CHECK: teq.w r0, #11141290 - -; 0xcc00cc00 = 3422604288 -define i1 @f5(i32 %a) { - %tmp = xor i32 %a, 3422604288 - %tmp1 = icmp ne i32 %tmp, 0 - ret i1 %tmp1 -} -; CHECK: f5: -; CHECK: teq.w r0, #-872363008 - ; 0xcc00cc00 = 3422604288 define i1 @f6(i32 %a) { %tmp = xor i32 %a, 3422604288 @@ -72,17 +45,6 @@ define i1 @f8(i32 %a) { %tmp1 = icmp ne i32 0, %tmp ret i1 %tmp1 } -; CHECK: f8: -; CHECK: teq.w r0, #-572662307 - -; 0x00110000 = 1114112 -define i1 @f9(i32 %a) { - %tmp = xor i32 %a, 1114112 - %tmp1 = icmp ne i32 %tmp, 0 - ret i1 %tmp1 -} -; CHECK: f9: -; CHECK: teq.w r0, #1114112 ; 0x00110000 = 1114112 define i1 @f10(i32 %a) { diff --git a/test/CodeGen/Thumb2/thumb2-teq2.ll b/test/CodeGen/Thumb2/thumb2-teq2.ll index cdd3489c50d..8acae9090f1 100644 --- a/test/CodeGen/Thumb2/thumb2-teq2.ll +++ b/test/CodeGen/Thumb2/thumb2-teq2.ll @@ -3,14 +3,6 @@ ; These tests implicitly depend on 'movs r0, #0' being rematerialized below the ; tst as 'mov.w r0, #0'. So far, that requires physreg joining. -define i1 @f1(i32 %a, i32 %b) { -; CHECK: f1 -; CHECK: teq.w r0, r1 - %tmp = xor i32 %a, %b - %tmp1 = icmp ne i32 %tmp, 0 - ret i1 %tmp1 -} - define i1 @f2(i32 %a, i32 %b) { ; CHECK: f2 ; CHECK: teq.w r0, r1 @@ -19,14 +11,6 @@ define i1 @f2(i32 %a, i32 %b) { ret i1 %tmp1 } -define i1 @f3(i32 %a, i32 %b) { -; CHECK: f3 -; CHECK: teq.w r0, r1 - %tmp = xor i32 %a, %b - %tmp1 = icmp ne i32 0, %tmp - ret i1 %tmp1 -} - define i1 @f4(i32 %a, i32 %b) { ; CHECK: f4 ; CHECK: teq.w r0, r1 diff --git a/test/CodeGen/Thumb2/thumb2-tst.ll b/test/CodeGen/Thumb2/thumb2-tst.ll index 47f553f2048..43e208cc59d 100644 --- a/test/CodeGen/Thumb2/thumb2-tst.ll +++ b/test/CodeGen/Thumb2/thumb2-tst.ll @@ -3,15 +3,6 @@ ; These tests implicitly depend on 'movs r0, #0' being rematerialized below the ; tst as 'mov.w r0, #0'. So far, that requires physreg joining. -; 0x000000bb = 187 -define i1 @f1(i32 %a) { - %tmp = and i32 %a, 187 - %tmp1 = icmp ne i32 %tmp, 0 - ret i1 %tmp1 -} -; CHECK: f1: -; CHECK: tst.w r0, #187 - ; 0x000000bb = 187 define i1 @f2(i32 %a) { %tmp = and i32 %a, 187 @@ -30,24 +21,6 @@ define i1 @f3(i32 %a) { ; CHECK: f3: ; CHECK: tst.w r0, #11141290 -; 0x00aa00aa = 11141290 -define i1 @f4(i32 %a) { - %tmp = and i32 %a, 11141290 - %tmp1 = icmp ne i32 0, %tmp - ret i1 %tmp1 -} -; CHECK: f4: -; CHECK: tst.w r0, #11141290 - -; 0xcc00cc00 = 3422604288 -define i1 @f5(i32 %a) { - %tmp = and i32 %a, 3422604288 - %tmp1 = icmp ne i32 %tmp, 0 - ret i1 %tmp1 -} -; CHECK: f5: -; CHECK: tst.w r0, #-872363008 - ; 0xcc00cc00 = 3422604288 define i1 @f6(i32 %a) { %tmp = and i32 %a, 3422604288 @@ -66,24 +39,6 @@ define i1 @f7(i32 %a) { ; CHECK: f7: ; CHECK: tst.w r0, #-572662307 -; 0xdddddddd = 3722304989 -define i1 @f8(i32 %a) { - %tmp = and i32 %a, 3722304989 - %tmp1 = icmp ne i32 0, %tmp - ret i1 %tmp1 -} -; CHECK: f8: -; CHECK: tst.w r0, #-572662307 - -; 0x00110000 = 1114112 -define i1 @f9(i32 %a) { - %tmp = and i32 %a, 1114112 - %tmp1 = icmp ne i32 %tmp, 0 - ret i1 %tmp1 -} -; CHECK: f9: -; CHECK: tst.w r0, #1114112 - ; 0x00110000 = 1114112 define i1 @f10(i32 %a) { %tmp = and i32 %a, 1114112 diff --git a/test/CodeGen/Thumb2/thumb2-tst2.ll b/test/CodeGen/Thumb2/thumb2-tst2.ll index 405b3bb1d38..bfe016fc8d6 100644 --- a/test/CodeGen/Thumb2/thumb2-tst2.ll +++ b/test/CodeGen/Thumb2/thumb2-tst2.ll @@ -3,14 +3,6 @@ ; These tests implicitly depend on 'movs r0, #0' being rematerialized below the ; tst as 'mov.w r0, #0'. So far, that requires physreg joining. -define i1 @f1(i32 %a, i32 %b) { -; CHECK: f1: -; CHECK: tst r0, r1 - %tmp = and i32 %a, %b - %tmp1 = icmp ne i32 %tmp, 0 - ret i1 %tmp1 -} - define i1 @f2(i32 %a, i32 %b) { ; CHECK: f2: ; CHECK: tst r0, r1 @@ -19,14 +11,6 @@ define i1 @f2(i32 %a, i32 %b) { ret i1 %tmp1 } -define i1 @f3(i32 %a, i32 %b) { -; CHECK: f3: -; CHECK: tst r0, r1 - %tmp = and i32 %a, %b - %tmp1 = icmp ne i32 0, %tmp - ret i1 %tmp1 -} - define i1 @f4(i32 %a, i32 %b) { ; CHECK: f4: ; CHECK: tst r0, r1 -- 2.34.1