From: Benjamin Kramer Date: Thu, 11 Apr 2013 15:10:09 +0000 (+0000) Subject: Fix for wrong instcombine on vector insert/extract X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=c37cb66e6ee256bcb3ba138383e4cb9aab55ddb9;p=oota-llvm.git Fix for wrong instcombine on vector insert/extract When trying to collapse sequences of insertelement/extractelement instructions into single shuffle instructions, there is one specific case where the Instruction Combiner wrongly updates the resulting Mask of shuffle indexes. The problem is in function CollectShuffleElments. If we have a sequence of insert/extract element instructions like the one below: %tmp1 = extractelement <4 x float> %LHS, i32 0 %tmp2 = insertelement <4 x float> %RHS, float %tmp1, i32 1 %tmp3 = extractelement <4 x float> %RHS, i32 2 %tmp4 = insertelement <4 x float> %tmp2, float %tmp3, i32 3 Where: . %RHS will have a mask of [4,5,6,7] . %LHS will have a mask of [0,1,2,3] The Mask of shuffle indexes is wrongly computed to [4,1,6,7] instead of [4,0,6,7]. When analyzing %tmp2 in order to compute the Mask for the resulting shuffle instruction, the algorithm forgets to update the mask index at position 1 with the index associated to the element extracted from %LHS by instruction %tmp1. Patch by Andrea DiBiagio! git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@179291 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp index 4f71db1a4b0..bbfad8693eb 100644 --- a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -336,6 +336,10 @@ static Value *CollectShuffleElements(Value *V, SmallVectorImpl &Mask, if (VecOp == RHS) { Value *V = CollectShuffleElements(EI->getOperand(0), Mask, RHS); + // Update Mask to reflect that `ScalarOp' has been inserted at + // position `InsertedIdx' within the vector returned by IEI. + Mask[InsertedIdx % NumElts] = Mask[ExtractedIdx]; + // Everything but the extracted element is replaced with the RHS. for (unsigned i = 0; i != NumElts; ++i) { if (i != InsertedIdx) diff --git a/test/Transforms/InstCombine/vec_shuffle.ll b/test/Transforms/InstCombine/vec_shuffle.ll index 14f532195d7..37d4d56e913 100644 --- a/test/Transforms/InstCombine/vec_shuffle.ll +++ b/test/Transforms/InstCombine/vec_shuffle.ll @@ -196,3 +196,30 @@ define <4 x i16> @test13e(<4 x i16> %lhs, <4 x i16> %rhs) { <4 x i16> %lhs, <4 x i16> %rhs ret <4 x i16> %A } + +; Check that sequences of insert/extract element are +; collapsed into shuffle instruction with correct shuffle indexes. + +define <4 x float> @test14a(<4 x float> %LHS, <4 x float> %RHS) { +; CHECK: @test14a +; CHECK-NEXT: shufflevector <4 x float> %LHS, <4 x float> %RHS, <4 x i32> +; CHECK-NEXT: ret <4 x float> %tmp4 + %tmp1 = extractelement <4 x float> %LHS, i32 0 + %tmp2 = insertelement <4 x float> %RHS, float %tmp1, i32 1 + %tmp3 = extractelement <4 x float> %RHS, i32 2 + %tmp4 = insertelement <4 x float> %tmp2, float %tmp3, i32 3 + ret <4 x float> %tmp4 +} + +define <4 x float> @test14b(<4 x float> %LHS, <4 x float> %RHS) { +; CHECK: @test14b +; CHECK-NEXT: shufflevector <4 x float> %LHS, <4 x float> %RHS, <4 x i32> +; CHECK-NEXT: ret <4 x float> %tmp5 + %tmp0 = extractelement <4 x float> %LHS, i32 3 + %tmp1 = insertelement <4 x float> %RHS, float %tmp0, i32 0 + %tmp2 = extractelement <4 x float> %tmp1, i32 0 + %tmp3 = insertelement <4 x float> %RHS, float %tmp2, i32 1 + %tmp4 = extractelement <4 x float> %RHS, i32 2 + %tmp5 = insertelement <4 x float> %tmp3, float %tmp4, i32 3 + ret <4 x float> %tmp5 +}