From: Louis Gerbarg Date: Thu, 17 Apr 2014 20:51:50 +0000 (+0000) Subject: Improve ARM64 vector creation X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=5540570374a374e702bce1c2fa4208ebe9433d74;p=oota-llvm.git Improve ARM64 vector creation This patch improves the performance of vector creation in caseiswhere where several of the lanes in the vector are a constant floating point value. It also includes new patterns to fold together some of the instructions when the value is 0.0f. Test cases included. rdar://16349427 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@206496 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/ARM64/ARM64ISelLowering.cpp b/lib/Target/ARM64/ARM64ISelLowering.cpp index 1bb2198c1e3..d8ff81104ac 100644 --- a/lib/Target/ARM64/ARM64ISelLowering.cpp +++ b/lib/Target/ARM64/ARM64ISelLowering.cpp @@ -4891,7 +4891,7 @@ FailedModImm: if (!isa(V) && !isa(V)) isConstant = false; - if (isa(V)) { + if (isa(V) || isa(V)) { ++NumConstantLanes; if (!ConstantValue.getNode()) ConstantValue = V; @@ -4955,7 +4955,7 @@ FailedModImm: for (unsigned i = 0; i < NumElts; ++i) { SDValue V = Op.getOperand(i); SDValue LaneIdx = DAG.getConstant(i, MVT::i64); - if (!isa(V)) { + if (!isa(V) && !isa(V)) { // Note that type legalization likely mucked about with the VT of the // source operand, so we may have to convert it here before inserting. Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Val, V, LaneIdx); diff --git a/lib/Target/ARM64/ARM64InstrInfo.td b/lib/Target/ARM64/ARM64InstrInfo.td index 5bb57c5092b..ce15789b548 100644 --- a/lib/Target/ARM64/ARM64InstrInfo.td +++ b/lib/Target/ARM64/ARM64InstrInfo.td @@ -3472,6 +3472,9 @@ def : Pat<(v4i32 immAllOnesV), (MOVIv2d_ns (i32 255))>; def : Pat<(v8i16 immAllOnesV), (MOVIv2d_ns (i32 255))>; def : Pat<(v16i8 immAllOnesV), (MOVIv2d_ns (i32 255))>; +def : Pat<(v2f64 (ARM64dup (f64 fpimm0))), (MOVIv2d_ns (i32 0))>; +def : Pat<(v4f32 (ARM64dup (f32 fpimm0))), (MOVIv2d_ns (i32 0))>; + // EDIT per word & halfword: 2s, 4h, 4s, & 8h defm MOVI : SIMDModifiedImmVectorShift<0, 0b10, 0b00, "movi">; def : Pat<(v2i32 (ARM64movi_shift imm0_255:$imm8, (i32 imm:$shift))), diff --git a/test/CodeGen/ARM64/vector-insertion.ll b/test/CodeGen/ARM64/vector-insertion.ll new file mode 100644 index 00000000000..c9ca74929e9 --- /dev/null +++ b/test/CodeGen/ARM64/vector-insertion.ll @@ -0,0 +1,33 @@ +; RUN: llc -march=arm64 -mcpu=generic < %s | FileCheck %s + +define void @test0f(float* nocapture %x, float %a) #0 { +entry: + %0 = insertelement <4 x float> , float %a, i32 0 + %1 = bitcast float* %x to <4 x float>* + store <4 x float> %0, <4 x float>* %1, align 16 + ret void + + ; CHECK-LABEL: test0f + ; CHECK: movi.2d v[[TEMP:[0-9]+]], #0000000000000000 + ; CHECK: ins.s v[[TEMP]][0], v{{[0-9]+}}[0] + ; CHECK: str q[[TEMP]], [x0] + ; CHECK: ret + + +} + + +define void @test1f(float* nocapture %x, float %a) #0 { +entry: + %0 = insertelement <4 x float> , float %a, i32 0 + %1 = bitcast float* %x to <4 x float>* + store <4 x float> %0, <4 x float>* %1, align 16 + ret void + + ; CHECK-LABEL: test1f + ; CHECK: fmov s[[TEMP:[0-9]+]], #1.000000e+00 + ; CHECK: dup.4s v[[TEMP2:[0-9]+]], v[[TEMP]][0] + ; CHECK: ins.s v[[TEMP2]][0], v0[0] + ; CHECK: str q[[TEMP2]], [x0] + ; CHECK: ret +}