From: Ahmed Bougacha Date: Tue, 1 Sep 2015 21:56:00 +0000 (+0000) Subject: [ARM] Don't abort on variable-idx extractelt in ReconstructShuffle. X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=fc98f116be9f160797b68c5207a6ddbe713e7195;p=oota-llvm.git [ARM] Don't abort on variable-idx extractelt in ReconstructShuffle. The code introduced in r244314 assumed that EXTRACT_VECTOR_ELT only takes constant indices, but it does accept variables. Bail out for those: we can't use them, as the shuffles we want to reconstruct do require constant masks. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@246594 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 17c48287f05..f2915a4ab15 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -5554,6 +5554,10 @@ SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op, // A shuffle can only come from building a vector from various // elements of other vectors. return SDValue(); + } else if (!isa(V.getOperand(1))) { + // Furthermore, shuffles require a constant mask, whereas extractelts + // accept variable indices. + return SDValue(); } // Add this element source to the list if it's not already there. diff --git a/test/CodeGen/ARM/vdup.ll b/test/CodeGen/ARM/vdup.ll index 36eebbfc465..25c4807d986 100644 --- a/test/CodeGen/ARM/vdup.ll +++ b/test/CodeGen/ARM/vdup.ll @@ -364,3 +364,19 @@ define <4 x float> @check_spr_splat4_lane1(<4 x float> %p, i16 %q) { %sub = fsub <4 x float> %splat.splat, %p ret <4 x float> %sub } + +; Also make sure we don't barf on variable-index extractelts, where we almost +; could have generated a vdup. + +define <8 x i8> @check_i8_varidx(<16 x i8> %v, i32 %idx) { +; CHECK-LABEL: check_i8_varidx: +; CHECK: mov r[[FP:[0-9]+]], sp +; CHECK: ldr r[[IDX:[0-9]+]], [r[[FP]], #4] +; CHECK: mov r[[SPCOPY:[0-9]+]], sp +; CHECK: vst1.64 {d{{.*}}, d{{.*}}}, [r[[SPCOPY]]:128], r[[IDX]] +; CHECK: vld1.8 {d{{.*}}[]}, [r[[SPCOPY]]] + %x = extractelement <16 x i8> %v, i32 %idx + %1 = insertelement <8 x i8> undef, i8 %x, i32 0 + %2 = insertelement <8 x i8> %1, i8 %x, i32 1 + ret <8 x i8> %2 +}