From c004eec71b49ae13ee4d9f859c61cdb9ed092b22 Mon Sep 17 00:00:00 2001 From: Owen Anderson Date: Tue, 21 Sep 2010 18:41:19 +0000 Subject: [PATCH] When adding the carry bit to another value on X86, exploit the fact that the carry-materialization (sbbl x, x) sets the registers to 0 or ~0. Combined with two's complement arithmetic, we can fold the intermediate AND and the ADD into a single SUB. This fixes . git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@114460 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 23 +++++++++++++++++++++++ test/CodeGen/X86/add-of-carry.ll | 14 ++++++++++++++ 2 files changed, 37 insertions(+) create mode 100644 test/CodeGen/X86/add-of-carry.ll diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index aaf91a83657..8d52f8c954d 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -1021,6 +1021,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setTargetDAGCombine(ISD::OR); setTargetDAGCombine(ISD::STORE); setTargetDAGCombine(ISD::ZERO_EXTEND); + setTargetDAGCombine(ISD::ADD); if (Subtarget->is64Bit()) setTargetDAGCombine(ISD::MUL); @@ -10452,6 +10453,27 @@ static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); } +/// PerformAddCombine - Optimize ADD when combined with X86 opcodes. +static SDValue PerformAddCombine(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI) { + if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer()) + return SDValue(); + + EVT VT = N->getValueType(0); + SDValue Op1 = N->getOperand(1); + if (Op1->getOpcode() == ISD::AND) { + SDValue AndOp0 = Op1->getOperand(0); + ConstantSDNode *AndOp1 = dyn_cast(Op1->getOperand(1)); + // (add z, (and (sbbl x, x), 1)) -> (sub z, (sbbl x, x)) + if (AndOp0->getOpcode() == X86ISD::SETCC_CARRY && + AndOp1 && AndOp1->getZExtValue() == 1) { + DebugLoc DL = N->getDebugLoc(); + return DAG.getNode(ISD::SUB, DL, VT, N->getOperand(0), AndOp0); + } + } + + return SDValue(); +} /// PerformMulCombine - Optimize a single multiply with constant into two /// in order to implement it with two cheaper instructions, e.g. @@ -10938,6 +10960,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, return PerformEXTRACT_VECTOR_ELTCombine(N, DAG, *this); case ISD::SELECT: return PerformSELECTCombine(N, DAG, Subtarget); case X86ISD::CMOV: return PerformCMOVCombine(N, DAG, DCI); + case ISD::ADD: return PerformAddCombine(N, DAG, DCI); case ISD::MUL: return PerformMulCombine(N, DAG, DCI); case ISD::SHL: case ISD::SRA: diff --git a/test/CodeGen/X86/add-of-carry.ll b/test/CodeGen/X86/add-of-carry.ll new file mode 100644 index 00000000000..4c2257494d2 --- /dev/null +++ b/test/CodeGen/X86/add-of-carry.ll @@ -0,0 +1,14 @@ +; RUN: llc < %s -march=x86 | FileCheck %s +; + +define i32 @add32carry(i32 %sum, i32 %x) nounwind readnone ssp { +entry: +; CHECK: sbbl %ecx, %ecx +; CHECK-NOT: addl +; CHECK: subl %ecx, %eax + %add4 = add i32 %x, %sum + %cmp = icmp ult i32 %add4, %x + %inc = zext i1 %cmp to i32 + %z.0 = add i32 %add4, %inc + ret i32 %z.0 +} -- 2.34.1