From: Jim Grosbach Date: Sat, 2 Mar 2013 20:16:24 +0000 (+0000) Subject: ARM: Creating a vector from a lane of another. X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=7bf504c58fcf1345f0278d6dab3840a45a623965;p=oota-llvm.git ARM: Creating a vector from a lane of another. The VDUP instruction source register doesn't allow a non-constant lane index, so make sure we don't construct a ARM::VDUPLANE node asking it to do so. rdar://13328063 http://llvm.org/bugs/show_bug.cgi?id=13963 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@176413 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 5d24e92f235..514971f01ee 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -4452,8 +4452,11 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, // If we are VDUPing a value that comes directly from a vector, that will // cause an unnecessary move to and from a GPR, where instead we could - // just use VDUPLANE. - if (Value->getOpcode() == ISD::EXTRACT_VECTOR_ELT) { + // just use VDUPLANE. We can only do this if the lane being extracted + // is at a constant index, as the VDUP from lane instructions only have + // constant-index forms. + if (Value->getOpcode() == ISD::EXTRACT_VECTOR_ELT && + isa(Value->getOperand(1))) { // We need to create a new undef vector to use for the VDUPLANE if the // size of the vector from which we get the value is different than the // size of the vector that we need to create. We will insert the element diff --git a/test/CodeGen/Thumb2/2013-03-02-vduplane-nonconstant-source-index.ll b/test/CodeGen/Thumb2/2013-03-02-vduplane-nonconstant-source-index.ll new file mode 100644 index 00000000000..937ecc0d667 --- /dev/null +++ b/test/CodeGen/Thumb2/2013-03-02-vduplane-nonconstant-source-index.ll @@ -0,0 +1,14 @@ +; RUN: llc < %s -mtriple=thumbv7-apple-ios | FileCheck %s + +define void @bar(<4 x i32>* %p, i32 %lane, <4 x i32> %phitmp) nounwind { +; CHECK: vst1.64 {d{{[0-9]+}}, d{{[0-9]+}}}, [r[[SOURCE:[0-9]+]]:128] +; CHECK: add.w r[[ADDR:[0-9]+]], r[[SOURCE]], {{r[0-9]+}}, lsl #2 +; CHECK: vld1.32 {[[DREG:d[0-9]+]][], [[DREG2:d[0-9]+]][]}, [r[[ADDR]]:32] +; CHECK: vst1.32 {[[DREG]], [[DREG2]]}, [r0] + %val = extractelement <4 x i32> %phitmp, i32 %lane + %r1 = insertelement <4 x i32> undef, i32 %val, i32 1 + %r2 = insertelement <4 x i32> %r1, i32 %val, i32 2 + %r3 = insertelement <4 x i32> %r2, i32 %val, i32 3 + store <4 x i32> %r3, <4 x i32>* %p, align 4 + ret void +}