From b4c945716f232ee07ec6fd3e1146175801fa1278 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Fri, 21 Oct 2011 06:55:01 +0000 Subject: [PATCH] Remove intrinsics for X86 BLSI, BLSMSK, and BLSR intrinsics and replace with custom isel lowering code. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142642 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/IntrinsicsX86.td | 20 +++-------- lib/Target/X86/X86ISelLowering.cpp | 54 +++++++++++++++++++++++++++++- lib/Target/X86/X86ISelLowering.h | 4 +++ lib/Target/X86/X86InstrInfo.td | 24 +++++++------ test/CodeGen/X86/bmi.ll | 42 ++++++++++------------- 5 files changed, 93 insertions(+), 51 deletions(-) diff --git a/include/llvm/IntrinsicsX86.td b/include/llvm/IntrinsicsX86.td index db9c9ce0030..2bad822abee 100644 --- a/include/llvm/IntrinsicsX86.td +++ b/include/llvm/IntrinsicsX86.td @@ -1642,24 +1642,12 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_bmi_bzhi_64 : GCCBuiltin<"__builtin_ia32_bzhi_di">, Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>; - def int_x86_bmi_blsi_32 : - Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>; - def int_x86_bmi_blsi_64 : - Intrinsic<[llvm_i64_ty], [llvm_i64_ty], [IntrNoMem]>; - def int_x86_bmi_blsmsk_32 : - Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>; - def int_x86_bmi_blsmsk_64 : - Intrinsic<[llvm_i64_ty], [llvm_i64_ty], [IntrNoMem]>; - def int_x86_bmi_blsr_32 : - Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>; - def int_x86_bmi_blsr_64 : - Intrinsic<[llvm_i64_ty], [llvm_i64_ty], [IntrNoMem]>; - def int_x86_bmi_pdep_32 : + def int_x86_bmi_pdep_32 : GCCBuiltin<"__builtin_ia32_pdep_si">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_bmi_pdep_64 : + def int_x86_bmi_pdep_64 : GCCBuiltin<"__builtin_ia32_pdep_di">, Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>; - def int_x86_bmi_pext_32 : + def int_x86_bmi_pext_32 : GCCBuiltin<"__builtin_ia32_pext_si">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_bmi_pext_64 : + def int_x86_bmi_pext_64 : GCCBuiltin<"__builtin_ia32_pext_di">, Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>; } diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index f1b160cdfae..96f04e72981 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -1155,6 +1155,8 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setTargetDAGCombine(ISD::SINT_TO_FP); if (Subtarget->is64Bit()) setTargetDAGCombine(ISD::MUL); + if (Subtarget->hasBMI()) + setTargetDAGCombine(ISD::XOR); computeRegisterProperties(); @@ -13300,7 +13302,9 @@ static SDValue PerformAndCombine(SDNode *N, SelectionDAG &DAG, EVT VT = N->getValueType(0); - // Create ANDN instructions + // Create ANDN, BLSI, and BLSR instructions + // BLSI is X & (-X) + // BLSR is X & (X-1) if (Subtarget->hasBMI() && (VT == MVT::i32 || VT == MVT::i64)) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -13313,6 +13317,26 @@ static SDValue PerformAndCombine(SDNode *N, SelectionDAG &DAG, if (N1.getOpcode() == ISD::XOR && isAllOnes(N1.getOperand(1))) return DAG.getNode(X86ISD::ANDN, DL, VT, N1.getOperand(0), N0); + // Check LHS for neg + if (N0.getOpcode() == ISD::SUB && N0.getOperand(1) == N1 && + isZero(N0.getOperand(0))) + return DAG.getNode(X86ISD::BLSI, DL, VT, N1); + + // Check RHS for neg + if (N1.getOpcode() == ISD::SUB && N1.getOperand(1) == N0 && + isZero(N1.getOperand(0))) + return DAG.getNode(X86ISD::BLSI, DL, VT, N0); + + // Check LHS for X-1 + if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1 && + isAllOnes(N0.getOperand(1))) + return DAG.getNode(X86ISD::BLSR, DL, VT, N1); + + // Check RHS for X-1 + if (N1.getOpcode() == ISD::ADD && N1.getOperand(0) == N0 && + isAllOnes(N1.getOperand(1))) + return DAG.getNode(X86ISD::BLSR, DL, VT, N0); + return SDValue(); } @@ -13500,6 +13524,33 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); } +static SDValue PerformXorCombine(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const X86Subtarget *Subtarget) { + if (DCI.isBeforeLegalizeOps()) + return SDValue(); + + EVT VT = N->getValueType(0); + + if (VT != MVT::i32 && VT != MVT::i64) + return SDValue(); + + // Create BLSMSK instructions by finding X ^ (X-1) + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + DebugLoc DL = N->getDebugLoc(); + + if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1 && + isAllOnes(N0.getOperand(1))) + return DAG.getNode(X86ISD::BLSMSK, DL, VT, N1); + + if (N1.getOpcode() == ISD::ADD && N1.getOperand(0) == N0 && + isAllOnes(N1.getOperand(1))) + return DAG.getNode(X86ISD::BLSMSK, DL, VT, N0); + + return SDValue(); +} + /// PerformLOADCombine - Do target-specific dag combines on LOAD nodes. static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG, const X86Subtarget *Subtarget) { @@ -14180,6 +14231,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case ISD::SRL: return PerformShiftCombine(N, DAG, Subtarget); case ISD::AND: return PerformAndCombine(N, DAG, DCI, Subtarget); case ISD::OR: return PerformOrCombine(N, DAG, DCI, Subtarget); + case ISD::XOR: return PerformXorCombine(N, DAG, DCI, Subtarget); case ISD::LOAD: return PerformLOADCombine(N, DAG, Subtarget); case ISD::STORE: return PerformSTORECombine(N, DAG, Subtarget); case ISD::SINT_TO_FP: return PerformSINT_TO_FPCombine(N, DAG, this); diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 342a5e61754..0903b9f7ab0 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -230,6 +230,10 @@ namespace llvm { ANDN, // ANDN - Bitwise AND NOT with FLAGS results. + BLSI, // BLSI - Extract lowest set isolated bit + BLSMSK, // BLSMSK - Get mask up to lowest set bit + BLSR, // BLSR - Reset lowest set bit + UMUL, // LOW, HI, FLAGS = umul LHS, RHS // MUL_IMM - X86 specific multiply by immediate. diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 0994ab94767..aa35cf0f009 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -226,6 +226,10 @@ def X86and_flag : SDNode<"X86ISD::AND", SDTBinaryArithWithFlags, [SDNPCommutative]>; def X86andn_flag : SDNode<"X86ISD::ANDN", SDTBinaryArithWithFlags>; +def X86blsi_flag : SDNode<"X86ISD::BLSI", SDTUnaryArithWithFlags>; +def X86blsmsk_flag : SDNode<"X86ISD::BLSMSK", SDTUnaryArithWithFlags>; +def X86blsr_flag : SDNode<"X86ISD::BLSR", SDTUnaryArithWithFlags>; + def X86mul_imm : SDNode<"X86ISD::MUL_IMM", SDTIntBinOp>; def X86WinAlloca : SDNode<"X86ISD::WIN_ALLOCA", SDTX86Void, @@ -1401,30 +1405,30 @@ let Predicates = [HasBMI], Defs = [EFLAGS] in { } multiclass bmi_bls { + RegisterClass RC, X86MemOperand x86memop, SDNode OpNode, + PatFrag ld_frag> { def rr : I<0xF3, RegMRM, (outs RC:$dst), (ins RC:$src), !strconcat(mnemonic, "\t{$src, $dst|$dst, $src}"), - [(set RC:$dst, (Int RC:$src)), (implicit EFLAGS)]>, T8, VEX_4V; + [(set RC:$dst, EFLAGS, (OpNode RC:$src))]>, T8, VEX_4V; def rm : I<0xF3, MemMRM, (outs RC:$dst), (ins x86memop:$src), !strconcat(mnemonic, "\t{$src, $dst|$dst, $src}"), - [(set RC:$dst, (Int (ld_frag addr:$src))), (implicit EFLAGS)]>, + [(set RC:$dst, EFLAGS, (OpNode (ld_frag addr:$src)))]>, T8, VEX_4V; } let Predicates = [HasBMI], Defs = [EFLAGS] in { defm BLSR32 : bmi_bls<"blsr{l}", MRM1r, MRM1m, GR32, i32mem, - int_x86_bmi_blsr_32, loadi32>; + X86blsr_flag, loadi32>; defm BLSR64 : bmi_bls<"blsr{q}", MRM1r, MRM1m, GR64, i64mem, - int_x86_bmi_blsr_64, loadi64>, VEX_W; + X86blsr_flag, loadi64>, VEX_W; defm BLSMSK32 : bmi_bls<"blsmsk{l}", MRM2r, MRM2m, GR32, i32mem, - int_x86_bmi_blsmsk_32, loadi32>; + X86blsmsk_flag, loadi32>; defm BLSMSK64 : bmi_bls<"blsmsk{q}", MRM2r, MRM2m, GR64, i64mem, - int_x86_bmi_blsmsk_64, loadi64>, VEX_W; + X86blsmsk_flag, loadi64>, VEX_W; defm BLSI32 : bmi_bls<"blsi{l}", MRM3r, MRM3m, GR32, i32mem, - int_x86_bmi_blsi_32, loadi32>; + X86blsi_flag, loadi32>; defm BLSI64 : bmi_bls<"blsi{q}", MRM3r, MRM3m, GR64, i64mem, - int_x86_bmi_blsi_64, loadi64>, VEX_W; + X86blsi_flag, loadi64>, VEX_W; } multiclass bmi_bextr_bzhi opc, string mnemonic, RegisterClass RC, diff --git a/test/CodeGen/X86/bmi.ll b/test/CodeGen/X86/bmi.ll index 4b40d903380..69cf7365c50 100644 --- a/test/CodeGen/X86/bmi.ll +++ b/test/CodeGen/X86/bmi.ll @@ -89,59 +89,53 @@ define i64 @bzhi64(i64 %x, i64 %y) nounwind readnone { declare i64 @llvm.x86.bmi.bzhi.64(i64, i64) nounwind readnone define i32 @blsi32(i32 %x) nounwind readnone { - %tmp = tail call i32 @llvm.x86.bmi.blsi.32(i32 %x) - ret i32 %tmp + %tmp = sub i32 0, %x + %tmp2 = and i32 %x, %tmp + ret i32 %tmp2 ; CHECK: blsi32: ; CHECK: blsil } -declare i32 @llvm.x86.bmi.blsi.32(i32) nounwind readnone - define i64 @blsi64(i64 %x) nounwind readnone { - %tmp = tail call i64 @llvm.x86.bmi.blsi.64(i64 %x) - ret i64 %tmp + %tmp = sub i64 0, %x + %tmp2 = and i64 %tmp, %x + ret i64 %tmp2 ; CHECK: blsi64: ; CHECK: blsiq } -declare i64 @llvm.x86.bmi.blsi.64(i64) nounwind readnone - define i32 @blsmsk32(i32 %x) nounwind readnone { - %tmp = tail call i32 @llvm.x86.bmi.blsmsk.32(i32 %x) - ret i32 %tmp + %tmp = sub i32 %x, 1 + %tmp2 = xor i32 %x, %tmp + ret i32 %tmp2 ; CHECK: blsmsk32: ; CHECK: blsmskl } -declare i32 @llvm.x86.bmi.blsmsk.32(i32) nounwind readnone - define i64 @blsmsk64(i64 %x) nounwind readnone { - %tmp = tail call i64 @llvm.x86.bmi.blsmsk.64(i64 %x) - ret i64 %tmp + %tmp = sub i64 %x, 1 + %tmp2 = xor i64 %tmp, %x + ret i64 %tmp2 ; CHECK: blsmsk64: ; CHECK: blsmskq } -declare i64 @llvm.x86.bmi.blsmsk.64(i64) nounwind readnone - define i32 @blsr32(i32 %x) nounwind readnone { - %tmp = tail call i32 @llvm.x86.bmi.blsr.32(i32 %x) - ret i32 %tmp + %tmp = sub i32 %x, 1 + %tmp2 = and i32 %x, %tmp + ret i32 %tmp2 ; CHECK: blsr32: ; CHECK: blsrl } -declare i32 @llvm.x86.bmi.blsr.32(i32) nounwind readnone - define i64 @blsr64(i64 %x) nounwind readnone { - %tmp = tail call i64 @llvm.x86.bmi.blsr.64(i64 %x) - ret i64 %tmp + %tmp = sub i64 %x, 1 + %tmp2 = and i64 %tmp, %x + ret i64 %tmp2 ; CHECK: blsr64: ; CHECK: blsrq } -declare i64 @llvm.x86.bmi.blsr.64(i64) nounwind readnone - define i32 @pdep32(i32 %x, i32 %y) nounwind readnone { %tmp = tail call i32 @llvm.x86.bmi.pdep.32(i32 %x, i32 %y) ret i32 %tmp -- 2.34.1