From c19d1c3ba2b216f0f91d71cf6fc2e983fc995854 Mon Sep 17 00:00:00 2001 From: Chris Lattner Date: Sun, 19 Dec 2010 22:08:31 +0000 Subject: [PATCH] improve the setcc -> setcc_carry optimization to happen more consistently by moving it out of lowering into dag combine. Add some missing patterns for matching away extended versions of setcc_c. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@122201 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 33 +++++++++++++++++--------- lib/Target/X86/X86ISelLowering.h | 2 +- lib/Target/X86/X86InstrCompiler.td | 11 +++++++++ test/CodeGen/X86/add-of-carry.ll | 22 ++++++++++++++++- test/CodeGen/X86/avx-intrinsics-x86.ll | 31 +++++++++--------------- test/CodeGen/X86/sse41.ll | 4 ++-- 6 files changed, 68 insertions(+), 35 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 8311d337367..48e418d57ce 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -7053,17 +7053,9 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { if (X86CC == X86::COND_INVALID) return SDValue(); - SDValue Cond = EmitCmp(Op0, Op1, X86CC, DAG); - - // Use sbb x, x to materialize carry bit into a GPR. - if (X86CC == X86::COND_B) - return DAG.getNode(ISD::AND, dl, MVT::i8, - DAG.getNode(X86ISD::SETCC_CARRY, dl, MVT::i8, - DAG.getConstant(X86CC, MVT::i8), Cond), - DAG.getConstant(1, MVT::i8)); - + SDValue EFLAGS = EmitCmp(Op0, Op1, X86CC, DAG); return DAG.getNode(X86ISD::SETCC, dl, MVT::i8, - DAG.getConstant(X86CC, MVT::i8), Cond); + DAG.getConstant(X86CC, MVT::i8), EFLAGS); } SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const { @@ -11430,13 +11422,31 @@ static SDValue PerformZExtCombine(SDNode *N, SelectionDAG &DAG) { return SDValue(); } +// Optimize RES = X86ISD::SETCC CONDCODE, EFLAG_INPUT +static SDValue PerformSETCCCombine(SDNode *N, SelectionDAG &DAG) { + unsigned X86CC = N->getConstantOperandVal(0); + SDValue EFLAG = N->getOperand(1); + DebugLoc DL = N->getDebugLoc(); + + // Materialize "setb reg" as "sbb reg,reg", since it can be extended without + // a zext and produces an all-ones bit which is more useful than 0/1 in some + // cases. + if (X86CC == X86::COND_B) + return DAG.getNode(ISD::AND, DL, MVT::i8, + DAG.getNode(X86ISD::SETCC_CARRY, DL, MVT::i8, + DAG.getConstant(X86CC, MVT::i8), EFLAG), + DAG.getConstant(1, MVT::i8)); + + return SDValue(); +} + SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; switch (N->getOpcode()) { default: break; case ISD::EXTRACT_VECTOR_ELT: - return PerformEXTRACT_VECTOR_ELTCombine(N, DAG, *this); + return PerformEXTRACT_VECTOR_ELTCombine(N, DAG, *this); case ISD::SELECT: return PerformSELECTCombine(N, DAG, Subtarget); case X86ISD::CMOV: return PerformCMOVCombine(N, DAG, DCI); case ISD::MUL: return PerformMulCombine(N, DAG, DCI); @@ -11452,6 +11462,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case X86ISD::BT: return PerformBTCombine(N, DAG, DCI); case X86ISD::VZEXT_MOVL: return PerformVZEXT_MOVLCombine(N, DAG); case ISD::ZERO_EXTEND: return PerformZExtCombine(N, DAG); + case X86ISD::SETCC: return PerformSETCCCombine(N, DAG); case X86ISD::SHUFPS: // Handle all target specific shuffles case X86ISD::SHUFPD: case X86ISD::PALIGN: diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 5fea01de09d..7566f989822 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -92,7 +92,7 @@ namespace llvm { // Same as SETCC except it's materialized with a sbb and the value is all // one's or all zero's. - SETCC_CARRY, + SETCC_CARRY, // R = carry_bit ? ~0 : 0 /// X86 conditional moves. Operand 0 and operand 1 are the two values /// to select from. Operand 2 is the condition code, and operand 3 is the diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td index 2b1ea9c6851..724e6b895e4 100644 --- a/lib/Target/X86/X86InstrCompiler.td +++ b/lib/Target/X86/X86InstrCompiler.td @@ -193,9 +193,20 @@ def SETB_C64r : RI<0x19, MRMInitReg, (outs GR64:$dst), (ins), "", } // isCodeGenOnly +def : Pat<(i16 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))), + (SETB_C16r)>; +def : Pat<(i32 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))), + (SETB_C32r)>; def : Pat<(i64 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))), (SETB_C64r)>; +def : Pat<(i16 (sext (i8 (X86setcc_c X86_COND_B, EFLAGS)))), + (SETB_C16r)>; +def : Pat<(i32 (sext (i8 (X86setcc_c X86_COND_B, EFLAGS)))), + (SETB_C32r)>; +def : Pat<(i64 (sext (i8 (X86setcc_c X86_COND_B, EFLAGS)))), + (SETB_C64r)>; + //===----------------------------------------------------------------------===// // String Pseudo Instructions diff --git a/test/CodeGen/X86/add-of-carry.ll b/test/CodeGen/X86/add-of-carry.ll index 4c2257494d2..f924ec8132e 100644 --- a/test/CodeGen/X86/add-of-carry.ll +++ b/test/CodeGen/X86/add-of-carry.ll @@ -1,8 +1,9 @@ ; RUN: llc < %s -march=x86 | FileCheck %s ; -define i32 @add32carry(i32 %sum, i32 %x) nounwind readnone ssp { +define i32 @test1(i32 %sum, i32 %x) nounwind readnone ssp { entry: +; CHECK: test1: ; CHECK: sbbl %ecx, %ecx ; CHECK-NOT: addl ; CHECK: subl %ecx, %eax @@ -12,3 +13,22 @@ entry: %z.0 = add i32 %add4, %inc ret i32 %z.0 } + +; Instcombine transforms test1 into test2: +; CHECK: test2: +; CHECK: movl +; CHECK-NEXT: addl +; CHECK-NEXT: sbbl +; CHECK-NEXT: subl +; CHECK-NEXT: ret +define i32 @test2(i32 %sum, i32 %x) nounwind readnone ssp { +entry: + %uadd = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %x, i32 %sum) + %0 = extractvalue { i32, i1 } %uadd, 0 + %cmp = extractvalue { i32, i1 } %uadd, 1 + %inc = zext i1 %cmp to i32 + %z.0 = add i32 %0, %inc + ret i32 %z.0 +} + +declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32) nounwind readnone diff --git a/test/CodeGen/X86/avx-intrinsics-x86.ll b/test/CodeGen/X86/avx-intrinsics-x86.ll index ec5ed17ad84..6c32396a417 100644 --- a/test/CodeGen/X86/avx-intrinsics-x86.ll +++ b/test/CodeGen/X86/avx-intrinsics-x86.ll @@ -114,8 +114,8 @@ declare i32 @llvm.x86.sse2.comile.sd(<2 x double>, <2 x double>) nounwind readno define i32 @test_x86_sse2_comilt_sd(<2 x double> %a0, <2 x double> %a1) { ; CHECK: vcomisd - ; CHECK: setb - ; CHECK: movzbl + ; CHECK: sbbl %eax, %eax + ; CHECK: andl $1, %eax %res = call i32 @llvm.x86.sse2.comilt.sd(<2 x double> %a0, <2 x double> %a1) ; [#uses=1] ret i32 %res } @@ -825,8 +825,7 @@ declare i32 @llvm.x86.sse2.ucomile.sd(<2 x double>, <2 x double>) nounwind readn define i32 @test_x86_sse2_ucomilt_sd(<2 x double> %a0, <2 x double> %a1) { ; CHECK: vucomisd - ; CHECK: setb - ; CHECK: movzbl + ; CHECK: sbbl %res = call i32 @llvm.x86.sse2.ucomilt.sd(<2 x double> %a0, <2 x double> %a1) ; [#uses=1] ret i32 %res } @@ -1183,8 +1182,7 @@ declare <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32>, <4 x i32>) nounwind readnone define i32 @test_x86_sse41_ptestc(<4 x float> %a0, <4 x float> %a1) { ; CHECK: vptest - ; CHECK: setb - ; CHECK: movzbl + ; CHECK: sbbl %res = call i32 @llvm.x86.sse41.ptestc(<4 x float> %a0, <4 x float> %a1) ; [#uses=1] ret i32 %res } @@ -1455,8 +1453,7 @@ declare i32 @llvm.x86.sse.comile.ss(<4 x float>, <4 x float>) nounwind readnone define i32 @test_x86_sse_comilt_ss(<4 x float> %a0, <4 x float> %a1) { ; CHECK: vcomiss - ; CHECK: setb - ; CHECK: movzbl + ; CHECK: sbb %res = call i32 @llvm.x86.sse.comilt.ss(<4 x float> %a0, <4 x float> %a1) ; [#uses=1] ret i32 %res } @@ -1697,8 +1694,7 @@ declare i32 @llvm.x86.sse.ucomile.ss(<4 x float>, <4 x float>) nounwind readnone define i32 @test_x86_sse_ucomilt_ss(<4 x float> %a0, <4 x float> %a1) { ; CHECK: vucomiss - ; CHECK: setb - ; CHECK: movzbl + ; CHECK: sbbl %res = call i32 @llvm.x86.sse.ucomilt.ss(<4 x float> %a0, <4 x float> %a1) ; [#uses=1] ret i32 %res } @@ -2173,8 +2169,7 @@ declare void @llvm.x86.avx.movnt.ps.256(i8*, <8 x float>) nounwind define i32 @test_x86_avx_ptestc_256(<4 x i64> %a0, <4 x i64> %a1) { ; CHECK: vptest - ; CHECK: setb - ; CHECK: movzbl + ; CHECK: sbbl %res = call i32 @llvm.x86.avx.ptestc.256(<4 x i64> %a0, <4 x i64> %a1) ; [#uses=1] ret i32 %res } @@ -2451,8 +2446,7 @@ declare <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float>, <8 x i32>) noun define i32 @test_x86_avx_vtestc_pd(<2 x double> %a0, <2 x double> %a1) { ; CHECK: vtestpd - ; CHECK: setb - ; CHECK: movzbl + ; CHECK: sbbl %res = call i32 @llvm.x86.avx.vtestc.pd(<2 x double> %a0, <2 x double> %a1) ; [#uses=1] ret i32 %res } @@ -2461,8 +2455,7 @@ declare i32 @llvm.x86.avx.vtestc.pd(<2 x double>, <2 x double>) nounwind readnon define i32 @test_x86_avx_vtestc_pd_256(<4 x double> %a0, <4 x double> %a1) { ; CHECK: vtestpd - ; CHECK: setb - ; CHECK: movzbl + ; CHECK: sbbl %res = call i32 @llvm.x86.avx.vtestc.pd.256(<4 x double> %a0, <4 x double> %a1) ; [#uses=1] ret i32 %res } @@ -2471,8 +2464,7 @@ declare i32 @llvm.x86.avx.vtestc.pd.256(<4 x double>, <4 x double>) nounwind rea define i32 @test_x86_avx_vtestc_ps(<4 x float> %a0, <4 x float> %a1) { ; CHECK: vtestps - ; CHECK: setb - ; CHECK: movzbl + ; CHECK: sbbl %res = call i32 @llvm.x86.avx.vtestc.ps(<4 x float> %a0, <4 x float> %a1) ; [#uses=1] ret i32 %res } @@ -2481,8 +2473,7 @@ declare i32 @llvm.x86.avx.vtestc.ps(<4 x float>, <4 x float>) nounwind readnone define i32 @test_x86_avx_vtestc_ps_256(<8 x float> %a0, <8 x float> %a1) { ; CHECK: vtestps - ; CHECK: setb - ; CHECK: movzbl + ; CHECK: sbbl %res = call i32 @llvm.x86.avx.vtestc.ps.256(<8 x float> %a0, <8 x float> %a1) ; [#uses=1] ret i32 %res } diff --git a/test/CodeGen/X86/sse41.ll b/test/CodeGen/X86/sse41.ll index 3a14fa26300..2ac4cb435a7 100644 --- a/test/CodeGen/X86/sse41.ll +++ b/test/CodeGen/X86/sse41.ll @@ -200,11 +200,11 @@ define i32 @ptestz_2(<4 x float> %t1, <4 x float> %t2) nounwind { ret i32 %tmp1 ; X32: _ptestz_2: ; X32: ptest %xmm1, %xmm0 -; X32: setb %al +; X32: sbbl %eax ; X64: _ptestz_2: ; X64: ptest %xmm1, %xmm0 -; X64: setb %al +; X64: sbbl %eax } define i32 @ptestz_3(<4 x float> %t1, <4 x float> %t2) nounwind { -- 2.34.1