From: Owen Anderson Date: Fri, 5 Nov 2010 19:27:46 +0000 (+0000) Subject: Add codegen and encoding support for the immediate form of vbic. X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=080c09229739ec2b13f7bccc361994a8d26b4ed2;p=oota-llvm.git Add codegen and encoding support for the immediate form of vbic. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@118291 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 5a3520e3a2b..31bde5fdb99 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -673,8 +673,10 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setTargetDAGCombine(ISD::SUB); setTargetDAGCombine(ISD::MUL); - if (Subtarget->hasV6T2Ops()) + if (Subtarget->hasV6T2Ops() || Subtarget->hasNEON()) setTargetDAGCombine(ISD::OR); + if (Subtarget->hasNEON()) + setTargetDAGCombine(ISD::AND); setStackPointerRegisterToSaveRestore(ARM::SP); @@ -4443,6 +4445,36 @@ static SDValue PerformMULCombine(SDNode *N, return SDValue(); } +static SDValue PerformANDCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI) { + // Attempt to use immediate-form VBIC + BuildVectorSDNode *BVN = dyn_cast(N->getOperand(1)); + DebugLoc dl = N->getDebugLoc(); + EVT VT = N->getValueType(0); + SelectionDAG &DAG = DCI.DAG; + + APInt SplatBits, SplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; + if (BVN && + BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) { + if (SplatBitSize <= 64) { + EVT VbicVT; + SDValue Val = isNEONModifiedImm((~SplatBits).getZExtValue(), + SplatUndef.getZExtValue(), SplatBitSize, + DAG, VbicVT, VT.is128BitVector(), false); + if (Val.getNode()) { + SDValue Input = + DAG.getNode(ISD::BIT_CONVERT, dl, VbicVT, N->getOperand(0)); + SDValue Vbic = DAG.getNode(ARMISD::VBICIMM, dl, VbicVT, Input, Val); + return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vbic); + } + } + } + + return SDValue(); +} + /// PerformORCombine - Target-specific dag combine xforms for ISD::OR static SDValue PerformORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, @@ -5066,6 +5098,7 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, case ISD::SUB: return PerformSUBCombine(N, DCI); case ISD::MUL: return PerformMULCombine(N, DCI, Subtarget); case ISD::OR: return PerformORCombine(N, DCI, Subtarget); + case ISD::AND: return PerformANDCombine(N, DCI); case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI); case ARMISD::VMOVDRR: return PerformVMOVDRRCombine(N, DCI.DAG); case ISD::BUILD_VECTOR: return PerformBUILD_VECTORCombine(N, DCI.DAG); diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index 4a7dec21583..4e74193fc57 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -165,7 +165,9 @@ namespace llvm { BFI, // Vector OR with immediate - VORRIMM + VORRIMM, + // Vector AND with NOT of immediate + VBICIMM }; } diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 29a1f2c360a..e64fefc6ca3 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -72,6 +72,7 @@ def NEONvmvnImm : SDNode<"ARMISD::VMVNIMM", SDTARMVMOVIMM>; def SDTARMVORRIMM : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisVT<2, i32>]>; def NEONvorrImm : SDNode<"ARMISD::VORRIMM", SDTARMVORRIMM>; +def NEONvbicImm : SDNode<"ARMISD::VBICIMM", SDTARMVORRIMM>; def NEONvdup : SDNode<"ARMISD::VDUP", SDTypeProfile<1, 1, [SDTCisVec<0>]>>; @@ -3308,13 +3309,13 @@ def VORRiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 0, 1, let Inst{9} = SIMM{9}; } -def VORRiv2i32 : N1ModImm<1, 0b000, {?,?,?,1}, 0, 0, 0, 1, +def VORRiv2i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 0, 0, 1, (outs DPR:$Vd), (ins nModImm:$SIMM, DPR:$src), IIC_VMOVImm, "vorr", "i32", "$Vd, $SIMM", "$src = $Vd", [(set DPR:$Vd, (v2i32 (NEONvorrImm DPR:$src, timm:$SIMM)))]> { - let Inst{11-9} = SIMM{11-9}; + let Inst{10-9} = SIMM{10-9}; } def VORRiv8i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 1, 0, 1, @@ -3326,13 +3327,13 @@ def VORRiv8i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 1, 0, 1, let Inst{9} = SIMM{9}; } -def VORRiv4i32 : N1ModImm<1, 0b000, {?,?,?,1}, 0, 1, 0, 1, +def VORRiv4i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 1, 0, 1, (outs QPR:$Vd), (ins nModImm:$SIMM, QPR:$src), IIC_VMOVImm, "vorr", "i32", "$Vd, $SIMM", "$src = $Vd", [(set QPR:$Vd, (v4i32 (NEONvorrImm QPR:$src, timm:$SIMM)))]> { - let Inst{11-9} = SIMM{11-9}; + let Inst{10-9} = SIMM{10-9}; } @@ -3348,6 +3349,42 @@ def VBICq : N3VX<0, 0, 0b01, 0b0001, 1, 1, (outs QPR:$dst), [(set QPR:$dst, (v4i32 (and QPR:$src1, (vnotq QPR:$src2))))]>; +def VBICiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 1, 1, + (outs DPR:$Vd), (ins nModImm:$SIMM, DPR:$src), + IIC_VMOVImm, + "vbic", "i16", "$Vd, $SIMM", "$src = $Vd", + [(set DPR:$Vd, + (v4i16 (NEONvbicImm DPR:$src, timm:$SIMM)))]> { + let Inst{9} = SIMM{9}; +} + +def VBICiv2i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 0, 1, 1, + (outs DPR:$Vd), (ins nModImm:$SIMM, DPR:$src), + IIC_VMOVImm, + "vbic", "i32", "$Vd, $SIMM", "$src = $Vd", + [(set DPR:$Vd, + (v2i32 (NEONvbicImm DPR:$src, timm:$SIMM)))]> { + let Inst{10-9} = SIMM{10-9}; +} + +def VBICiv8i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 1, 1, 1, + (outs QPR:$Vd), (ins nModImm:$SIMM, QPR:$src), + IIC_VMOVImm, + "vbic", "i16", "$Vd, $SIMM", "$src = $Vd", + [(set QPR:$Vd, + (v8i16 (NEONvbicImm QPR:$src, timm:$SIMM)))]> { + let Inst{9} = SIMM{9}; +} + +def VBICiv4i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 1, 1, 1, + (outs QPR:$Vd), (ins nModImm:$SIMM, QPR:$src), + IIC_VMOVImm, + "vbic", "i32", "$Vd, $SIMM", "$src = $Vd", + [(set QPR:$Vd, + (v4i32 (NEONvbicImm QPR:$src, timm:$SIMM)))]> { + let Inst{10-9} = SIMM{10-9}; +} + // VORN : Vector Bitwise OR NOT def VORNd : N3VX<0, 0, 0b11, 0b0001, 0, 1, (outs DPR:$dst), (ins DPR:$src1, DPR:$src2), N3RegFrm, IIC_VBINiD, diff --git a/test/CodeGen/ARM/vbits.ll b/test/CodeGen/ARM/vbits.ll index b6077c05ec3..880495dd7fb 100644 --- a/test/CodeGen/ARM/vbits.ll +++ b/test/CodeGen/ARM/vbits.ll @@ -525,3 +525,23 @@ define <16 x i8> @v_orrimmQ(<16 x i8>* %A) nounwind { %tmp3 = or <16 x i8> %tmp1, ret <16 x i8> %tmp3 } + +define <8 x i8> @v_bicimm(<8 x i8>* %A) nounwind { +; CHECK: v_bicimm: +; CHECK-NOT: vmov +; CHECK-NOT vmvn +; CHECK: vbic + %tmp1 = load <8 x i8>* %A + %tmp3 = and <8 x i8> %tmp1, < i8 -1, i8 -1, i8 -1, i8 0, i8 -1, i8 -1, i8 -1, i8 0 > + ret <8 x i8> %tmp3 +} + +define <16 x i8> @v_bicimmQ(<16 x i8>* %A) nounwind { +; CHECK: v_bicimmQ: +; CHECK-NOT: vmov +; CHECK-NOT: vmvn +; CHECK: vbic + %tmp1 = load <16 x i8>* %A + %tmp3 = and <16 x i8> %tmp1, < i8 -1, i8 -1, i8 -1, i8 0, i8 -1, i8 -1, i8 -1, i8 0, i8 -1, i8 -1, i8 -1, i8 0, i8 -1, i8 -1, i8 -1, i8 0 > + ret <16 x i8> %tmp3 +} diff --git a/test/MC/ARM/neon-bitwise-encoding.s b/test/MC/ARM/neon-bitwise-encoding.s index 43a73ff1280..7a4c8fde515 100644 --- a/test/MC/ARM/neon-bitwise-encoding.s +++ b/test/MC/ARM/neon-bitwise-encoding.s @@ -26,6 +26,10 @@ vbic d16, d17, d16 @ CHECK: vbic q8, q8, q9 @ encoding: [0xf2,0x01,0x50,0xf2] vbic q8, q8, q9 +@ CHECK: vbic.i32 d16, #0xFF000000 @ encoding: [0x3f,0x07,0xc7,0xf3] + vbic.i32 d16, #0xFF000000 +@ CHECK: vbic.i32 q8, #0xFF000000 @ encoding: [0x7f,0x07,0xc7,0xf3] + vbic.i32 q8, #0xFF000000 @ CHECK: vorn d16, d17, d16 @ encoding: [0xb0,0x01,0x71,0xf2] vorn d16, d17, d16