From c24cb3551ed66830b53362f593269873cb53a0c4 Mon Sep 17 00:00:00 2001 From: Owen Anderson Date: Mon, 8 Nov 2010 23:21:22 +0000 Subject: [PATCH] Add support for ARM's specialized vector-compare-against-zero instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@118453 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMISelLowering.cpp | 33 +++++++++++++++++- lib/Target/ARM/ARMISelLowering.h | 5 +++ lib/Target/ARM/ARMInstrNEON.td | 54 +++++++++++++++++------------- test/CodeGen/ARM/vceq.ll | 11 ++++++ test/CodeGen/ARM/vcge.ll | 22 ++++++++++++ test/CodeGen/ARM/vcgt.ll | 22 ++++++++++++ test/MC/ARM/neon-cmp-encoding.s | 11 ++++++ 7 files changed, 134 insertions(+), 24 deletions(-) diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index fa3170cc2e8..6577a246e1b 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -3074,7 +3074,38 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) { if (Swap) std::swap(Op0, Op1); - SDValue Result = DAG.getNode(Opc, dl, VT, Op0, Op1); + // If one of the operands is a constant vector zero, attempt to fold the + // comparison to a specialized compare-against-zero form. + SDValue SingleOp; + if (ISD::isBuildVectorAllZeros(Op1.getNode())) + SingleOp = Op0; + else if (ISD::isBuildVectorAllZeros(Op0.getNode())) { + if (Opc == ARMISD::VCGE) + Opc = ARMISD::VCLEZ; + else if (Opc == ARMISD::VCGT) + Opc = ARMISD::VCLTZ; + SingleOp = Op1; + } + + SDValue Result; + if (SingleOp.getNode()) { + switch (Opc) { + case ARMISD::VCEQ: + Result = DAG.getNode(ARMISD::VCEQZ, dl, VT, SingleOp); break; + case ARMISD::VCGE: + Result = DAG.getNode(ARMISD::VCGEZ, dl, VT, SingleOp); break; + case ARMISD::VCLEZ: + Result = DAG.getNode(ARMISD::VCLEZ, dl, VT, SingleOp); break; + case ARMISD::VCGT: + Result = DAG.getNode(ARMISD::VCGTZ, dl, VT, SingleOp); break; + case ARMISD::VCLTZ: + Result = DAG.getNode(ARMISD::VCLTZ, dl, VT, SingleOp); break; + default: + Result = DAG.getNode(Opc, dl, VT, Op0, Op1); + } + } else { + Result = DAG.getNode(Opc, dl, VT, Op0, Op1); + } if (Invert) Result = DAG.getNOT(dl, Result, VT); diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index dd2a1ad14fd..4a4b83db0f5 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -87,9 +87,14 @@ namespace llvm { PRELOAD, // Preload VCEQ, // Vector compare equal. + VCEQZ, // Vector compare equal to zero. VCGE, // Vector compare greater than or equal. + VCGEZ, // Vector compare greater than or equal to zero. + VCLEZ, // Vector compare less than or equal to zero. VCGEU, // Vector compare unsigned greater than or equal. VCGT, // Vector compare greater than. + VCGTZ, // Vector compare greater than zero. + VCLTZ, // Vector compare less than zero. VCGTU, // Vector compare unsigned greater than. VTST, // Vector test bits. diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index e64fefc6ca3..d3e83f42b1c 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -16,11 +16,17 @@ //===----------------------------------------------------------------------===// def SDTARMVCMP : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<1, 2>]>; +def SDTARMVCMPZ : SDTypeProfile<1, 1, []>; def NEONvceq : SDNode<"ARMISD::VCEQ", SDTARMVCMP>; +def NEONvceqz : SDNode<"ARMISD::VCEQZ", SDTARMVCMPZ>; def NEONvcge : SDNode<"ARMISD::VCGE", SDTARMVCMP>; +def NEONvcgez : SDNode<"ARMISD::VCGEZ", SDTARMVCMPZ>; +def NEONvclez : SDNode<"ARMISD::VCLEZ", SDTARMVCMPZ>; def NEONvcgeu : SDNode<"ARMISD::VCGEU", SDTARMVCMP>; def NEONvcgt : SDNode<"ARMISD::VCGT", SDTARMVCMP>; +def NEONvcgtz : SDNode<"ARMISD::VCGTZ", SDTARMVCMPZ>; +def NEONvcltz : SDNode<"ARMISD::VCLTZ", SDTARMVCMPZ>; def NEONvcgtu : SDNode<"ARMISD::VCGTU", SDTARMVCMP>; def NEONvtst : SDNode<"ARMISD::VTST", SDTARMVCMP>; @@ -2150,36 +2156,44 @@ class N2VCvtQ op11_8, bit op7, bit op4, // First with only element sizes of 8, 16 and 32 bits: multiclass N2V_QHS_cmp op24_23, bits<2> op21_20, bits<2> op17_16, bits<5> op11_7, bit op4, string opc, string Dt, - string asm> { + string asm, SDNode OpNode> { // 64-bit vector types. def v8i8 : N2V; + opc, !strconcat(Dt, "8"), asm, "", + [(set DPR:$dst, (v8i8 (OpNode (v8i8 DPR:$src))))]>; def v4i16 : N2V; + opc, !strconcat(Dt, "16"), asm, "", + [(set DPR:$dst, (v4i16 (OpNode (v4i16 DPR:$src))))]>; def v2i32 : N2V; + opc, !strconcat(Dt, "32"), asm, "", + [(set DPR:$dst, (v2i32 (OpNode (v2i32 DPR:$src))))]>; def v2f32 : N2V { + opc, "f32", asm, "", + [(set DPR:$dst, (v2f32 (OpNode (v2f32 DPR:$src))))]> { let Inst{10} = 1; // overwrite F = 1 } // 128-bit vector types. def v16i8 : N2V; + opc, !strconcat(Dt, "8"), asm, "", + [(set QPR:$dst, (v16i8 (OpNode (v16i8 QPR:$src))))]>; def v8i16 : N2V; + opc, !strconcat(Dt, "16"), asm, "", + [(set QPR:$dst, (v8i16 (OpNode (v8i16 QPR:$src))))]>; def v4i32 : N2V; + opc, !strconcat(Dt, "32"), asm, "", + [(set QPR:$dst, (v4i32 (OpNode (v4i32 QPR:$src))))]>; def v4f32 : N2V { + opc, "f32", asm, "", + [(set QPR:$dst, (v4f32 (OpNode (v4f32 QPR:$src))))]> { let Inst{10} = 1; // overwrite F = 1 } } @@ -3220,9 +3234,9 @@ def VCEQfd : N3VD<0,0,0b00,0b1110,0, IIC_VBIND, "vceq", "f32", v2i32, v2f32, NEONvceq, 1>; def VCEQfq : N3VQ<0,0,0b00,0b1110,0, IIC_VBINQ, "vceq", "f32", v4i32, v4f32, NEONvceq, 1>; -// For disassembly only. + defm VCEQz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00010, 0, "vceq", "i", - "$dst, $src, #0">; + "$dst, $src, #0", NEONvceqz>; // VCGE : Vector Compare Greater Than or Equal defm VCGEs : N3V_QHS<0, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, @@ -3233,14 +3247,11 @@ def VCGEfd : N3VD<1,0,0b00,0b1110,0, IIC_VBIND, "vcge", "f32", v2i32, v2f32, NEONvcge, 0>; def VCGEfq : N3VQ<1,0,0b00,0b1110,0, IIC_VBINQ, "vcge", "f32", v4i32, v4f32, NEONvcge, 0>; -// For disassembly only. -// FIXME: This instruction's encoding MAY NOT BE correct. + defm VCGEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00001, 0, "vcge", "s", - "$dst, $src, #0">; -// For disassembly only. -// FIXME: This instruction's encoding MAY NOT BE correct. + "$dst, $src, #0", NEONvcgez>; defm VCLEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00011, 0, "vcle", "s", - "$dst, $src, #0">; + "$dst, $src, #0", NEONvclez>; // VCGT : Vector Compare Greater Than defm VCGTs : N3V_QHS<0, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, @@ -3251,14 +3262,11 @@ def VCGTfd : N3VD<1,0,0b10,0b1110,0, IIC_VBIND, "vcgt", "f32", v2i32, v2f32, NEONvcgt, 0>; def VCGTfq : N3VQ<1,0,0b10,0b1110,0, IIC_VBINQ, "vcgt", "f32", v4i32, v4f32, NEONvcgt, 0>; -// For disassembly only. -// FIXME: This instruction's encoding MAY NOT BE correct. + defm VCGTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00000, 0, "vcgt", "s", - "$dst, $src, #0">; -// For disassembly only. -// FIXME: This instruction's encoding MAY NOT BE correct. + "$dst, $src, #0", NEONvcgtz>; defm VCLTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00100, 0, "vclt", "s", - "$dst, $src, #0">; + "$dst, $src, #0", NEONvcltz>; // VACGE : Vector Absolute Compare Greater Than or Equal (aka VCAGE) def VACGEd : N3VDInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacge", diff --git a/test/CodeGen/ARM/vceq.ll b/test/CodeGen/ARM/vceq.ll index e4787518e73..051c349a06a 100644 --- a/test/CodeGen/ARM/vceq.ll +++ b/test/CodeGen/ARM/vceq.ll @@ -79,3 +79,14 @@ define <4 x i32> @vceqQf32(<4 x float>* %A, <4 x float>* %B) nounwind { %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> ret <4 x i32> %tmp4 } + +define <8 x i8> @vceqi8Z(<8 x i8>* %A) nounwind { +;CHECK: vceqi8Z: +;CHECK-NOT: vmov +;CHECK-NOT: vmvn +;CHECK: vceq.i8 + %tmp1 = load <8 x i8>* %A + %tmp3 = icmp eq <8 x i8> %tmp1, + %tmp4 = sext <8 x i1> %tmp3 to <8 x i8> + ret <8 x i8> %tmp4 +} diff --git a/test/CodeGen/ARM/vcge.ll b/test/CodeGen/ARM/vcge.ll index 2c161113c11..f190931f1bf 100644 --- a/test/CodeGen/ARM/vcge.ll +++ b/test/CodeGen/ARM/vcge.ll @@ -160,3 +160,25 @@ define <4 x i32> @vacgeQf32(<4 x float>* %A, <4 x float>* %B) nounwind { declare <2 x i32> @llvm.arm.neon.vacged(<2 x float>, <2 x float>) nounwind readnone declare <4 x i32> @llvm.arm.neon.vacgeq(<4 x float>, <4 x float>) nounwind readnone + +define <8 x i8> @vcgei8Z(<8 x i8>* %A) nounwind { +;CHECK: vcgei8Z: +;CHECK-NOT: vmov +;CHECK-NOT: vmvn +;CHECK: vcge.s8 + %tmp1 = load <8 x i8>* %A + %tmp3 = icmp sge <8 x i8> %tmp1, + %tmp4 = sext <8 x i1> %tmp3 to <8 x i8> + ret <8 x i8> %tmp4 +} + +define <8 x i8> @vclei8Z(<8 x i8>* %A) nounwind { +;CHECK: vclei8Z: +;CHECK-NOT: vmov +;CHECK-NOT: vmvn +;CHECK: vcle.s8 + %tmp1 = load <8 x i8>* %A + %tmp3 = icmp sle <8 x i8> %tmp1, + %tmp4 = sext <8 x i1> %tmp3 to <8 x i8> + ret <8 x i8> %tmp4 +} diff --git a/test/CodeGen/ARM/vcgt.ll b/test/CodeGen/ARM/vcgt.ll index e3318cd2069..7663da3c612 100644 --- a/test/CodeGen/ARM/vcgt.ll +++ b/test/CodeGen/ARM/vcgt.ll @@ -173,3 +173,25 @@ define <4 x i32> @vcgt_zext(<4 x float>* %A, <4 x float>* %B) nounwind { declare <2 x i32> @llvm.arm.neon.vacgtd(<2 x float>, <2 x float>) nounwind readnone declare <4 x i32> @llvm.arm.neon.vacgtq(<4 x float>, <4 x float>) nounwind readnone + +define <8 x i8> @vcgti8Z(<8 x i8>* %A) nounwind { +;CHECK: vcgti8Z: +;CHECK-NOT: vmov +;CHECK-NOT: vmvn +;CHECK: vcgt.s8 + %tmp1 = load <8 x i8>* %A + %tmp3 = icmp sgt <8 x i8> %tmp1, + %tmp4 = sext <8 x i1> %tmp3 to <8 x i8> + ret <8 x i8> %tmp4 +} + +define <8 x i8> @vclti8Z(<8 x i8>* %A) nounwind { +;CHECK: vclti8Z: +;CHECK-NOT: vmov +;CHECK-NOT: vmvn +;CHECK: vclt.s8 + %tmp1 = load <8 x i8>* %A + %tmp3 = icmp slt <8 x i8> %tmp1, + %tmp4 = sext <8 x i1> %tmp3 to <8 x i8> + ret <8 x i8> %tmp4 +} diff --git a/test/MC/ARM/neon-cmp-encoding.s b/test/MC/ARM/neon-cmp-encoding.s index 27ebc8a9363..da59ee7209c 100644 --- a/test/MC/ARM/neon-cmp-encoding.s +++ b/test/MC/ARM/neon-cmp-encoding.s @@ -102,3 +102,14 @@ vtst.16 q8, q8, q9 @ CHECK: vtst.32 q8, q8, q9 @ encoding: [0xf2,0x08,0x60,0xf2] vtst.32 q8, q8, q9 + +@ CHECK: vceq.i8 d16, d16, #0 @ encoding: [0x20,0x01,0xf1,0xf3] + vceq.i8 d16, d16, #0 +@ CHECK: vcge.s8 d16, d16, #0 @ encoding: [0xa0,0x00,0xf1,0xf3] + vcge.s8 d16, d16, #0 +@ CHECK: vcle.s8 d16, d16, #0 @ encoding: [0xa0,0x01,0xf1,0xf3] + vcle.s8 d16, d16, #0 +@ CHECK: vcgt.s8 d16, d16, #0 @ encoding: [0x20,0x00,0xf1,0xf3] + vcgt.s8 d16, d16, #0 +@ CHECK: vclt.s8 d16, d16, #0 @ encoding: [0x20,0x02,0xf1,0xf3] + vclt.s8 d16, d16, #0 -- 2.34.1