From: James Molloy Date: Wed, 4 Nov 2015 16:55:07 +0000 (+0000) Subject: [ARM] Combine CMOV into BFI where possible X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=447c9ea9e1aa98e11664645d0772071c912301f0;p=oota-llvm.git [ARM] Combine CMOV into BFI where possible If we have a CMOV, OR and AND combination such as: if (x & CN) y |= CM; And: * CN is a single bit; * All bits covered by CM are known zero in y; Then we can convert this to a sequence of BFI instructions. This will always be a win if CM is a single bit, will always be no worse than the TST & OR sequence if CM is two bits, and for thumb will be no worse if CM is three bits (due to the extra IT instruction). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@252057 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 13f528517f1..221cb1ac361 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -10228,6 +10228,104 @@ static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); } +static void computeKnownBits(SelectionDAG &DAG, SDValue Op, APInt &KnownZero, + APInt &KnownOne) { + if (Op.getOpcode() == ARMISD::BFI) { + // Conservatively, we can recurse down the first operand + // and just mask out all affected bits. + computeKnownBits(DAG, Op.getOperand(0), KnownZero, KnownOne); + + // The operand to BFI is already a mask suitable for removing the bits it + // sets. + ConstantSDNode *CI = cast(Op.getOperand(2)); + APInt Mask = CI->getAPIntValue(); + KnownZero &= Mask; + KnownOne &= Mask; + return; + } + return DAG.computeKnownBits(Op, KnownZero, KnownOne); +} + +SDValue ARMTargetLowering::PerformCMOVToBFICombine(SDNode *CMOV, SelectionDAG &DAG) const { + // If we have a CMOV, OR and AND combination such as: + // if (x & CN) + // y |= CM; + // + // And: + // * CN is a single bit; + // * All bits covered by CM are known zero in y + // + // Then we can convert this into a sequence of BFI instructions. This will + // always be a win if CM is a single bit, will always be no worse than the + // TST&OR sequence if CM is two bits, and for thumb will be no worse if CM is + // three bits (due to the extra IT instruction). + + SDValue Op0 = CMOV->getOperand(0); + SDValue Op1 = CMOV->getOperand(1); + SDValue CmpZ = CMOV->getOperand(4); + + assert(CmpZ->getOpcode() == ARMISD::CMPZ); + SDValue And = CmpZ->getOperand(0); + if (And->getOpcode() != ISD::AND) + return SDValue(); + ConstantSDNode *AndC = dyn_cast(And->getOperand(1)); + if (!AndC || !AndC->getAPIntValue().isPowerOf2()) + return SDValue(); + SDValue X = And->getOperand(0); + + // Canonicalize so that the OR is on the left. + if (Op1->getOpcode() == ISD::OR) + std::swap(Op0, Op1); + if (Op0->getOpcode() != ISD::OR) + return SDValue(); + + ConstantSDNode *OrC = dyn_cast(Op0->getOperand(1)); + if (!OrC) + return SDValue(); + SDValue Y = Op0->getOperand(0); + + if (Op1 != Y) + return SDValue(); + + // Now, is it profitable to continue? + APInt OrCI = OrC->getAPIntValue(); + unsigned Heuristic = Subtarget->isThumb() ? 3 : 2; + if (OrCI.countPopulation() > Heuristic) + return SDValue(); + + // Lastly, can we determine that the bits defined by OrCI + // are zero in Y? + APInt KnownZero, KnownOne; + computeKnownBits(DAG, Y, KnownZero, KnownOne); + if ((OrCI & KnownZero) != OrCI) + return SDValue(); + + // OK, we can do the combine. + SDValue V = Y; + SDLoc dl(X); + EVT VT = X.getValueType(); + unsigned BitInX = AndC->getAPIntValue().logBase2(); + + if (BitInX != 0) { + // We must shift X first. + X = DAG.getNode(ISD::SRL, dl, VT, X, + DAG.getConstant(BitInX, dl, VT)); + } + + for (unsigned BitInY = 0, NumActiveBits = OrCI.getActiveBits(); + BitInY < NumActiveBits; ++BitInY) { + if (OrCI[BitInY] == 0) + continue; + APInt Mask(VT.getSizeInBits(), 0); + Mask.setBit(BitInY); + V = DAG.getNode(ARMISD::BFI, dl, VT, V, X, + // Confusingly, the operand is an *inverted* mask. + DAG.getConstant(~Mask, dl, VT)); + } + + return V; +} + /// PerformCMOVCombine - Target-specific DAG combining for ARMISD::CMOV. SDValue ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const { @@ -10246,6 +10344,13 @@ ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const { ARMCC::CondCodes CC = (ARMCC::CondCodes)cast(ARMcc)->getZExtValue(); + // BFI is only available on V6T2+. + if (!Subtarget->isThumb1Only() && Subtarget->hasV6T2Ops()) { + SDValue R = PerformCMOVToBFICombine(N, DAG); + if (R) + return R; + } + // Simplify // mov r1, r0 // cmp r1, x diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index 99a18f0d332..852a36b0c12 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -260,6 +260,7 @@ namespace llvm { SDNode *Node) const override; SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const; + SDValue PerformCMOVToBFICombine(SDNode *N, SelectionDAG &DAG) const; SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; bool isDesirableToTransformToIntegerOp(unsigned Opc, EVT VT) const override; diff --git a/test/CodeGen/ARM/bfi.ll b/test/CodeGen/ARM/bfi.ll index 0661960d1ae..46653c3e7be 100644 --- a/test/CodeGen/ARM/bfi.ll +++ b/test/CodeGen/ARM/bfi.ll @@ -74,3 +74,26 @@ entry: %or = or i32 %shl, %and ret i32 %or } + +define i32 @f7(i32 %x, i32 %y) { +; CHECK-LABEL: f7: +; CHECK: bfi r1, r0, #4, #1 + %y2 = and i32 %y, 4294967040 ; 0xFFFFFF00 + %and = and i32 %x, 4 + %or = or i32 %y2, 16 + %cmp = icmp ne i32 %and, 0 + %sel = select i1 %cmp, i32 %or, i32 %y2 + ret i32 %sel +} + +define i32 @f8(i32 %x, i32 %y) { +; CHECK-LABEL: f8: +; CHECK: bfi r1, r0, #4, #1 +; CHECK: bfi r1, r0, #5, #1 + %y2 = and i32 %y, 4294967040 ; 0xFFFFFF00 + %and = and i32 %x, 4 + %or = or i32 %y2, 48 + %cmp = icmp ne i32 %and, 0 + %sel = select i1 %cmp, i32 %or, i32 %y2 + ret i32 %sel +}