From fe6d2cd9d169894b0263df8c5f26df663f4f0ea8 Mon Sep 17 00:00:00 2001 From: Mon P Wang Date: Mon, 26 Jan 2009 04:39:00 +0000 Subject: [PATCH] Fixed optimization of combining two shuffles where the first shuffle inputs has a different number of elements than the output. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@62998 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../Scalar/InstructionCombining.cpp | 4 +++- test/Transforms/InstCombine/vec_shuffle2.ll | 19 +++++++++++++++++++ 2 files changed, 22 insertions(+), 1 deletion(-) create mode 100644 test/Transforms/InstCombine/vec_shuffle2.ll diff --git a/lib/Transforms/Scalar/InstructionCombining.cpp b/lib/Transforms/Scalar/InstructionCombining.cpp index 40289eaf6bf..7f7592840a4 100644 --- a/lib/Transforms/Scalar/InstructionCombining.cpp +++ b/lib/Transforms/Scalar/InstructionCombining.cpp @@ -12179,9 +12179,11 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) { // If the result mask is equal to the src shuffle or this shuffle mask, do // the replacement. if (NewMask == LHSMask || NewMask == Mask) { + unsigned LHSInNElts = + cast(LHSSVI->getOperand(0)->getType())->getNumElements(); std::vector Elts; for (unsigned i = 0, e = NewMask.size(); i != e; ++i) { - if (NewMask[i] >= e*2) { + if (NewMask[i] >= LHSInNElts*2) { Elts.push_back(UndefValue::get(Type::Int32Ty)); } else { Elts.push_back(ConstantInt::get(Type::Int32Ty, NewMask[i])); diff --git a/test/Transforms/InstCombine/vec_shuffle2.ll b/test/Transforms/InstCombine/vec_shuffle2.ll new file mode 100644 index 00000000000..3bd8924903f --- /dev/null +++ b/test/Transforms/InstCombine/vec_shuffle2.ll @@ -0,0 +1,19 @@ +; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep undef | count 1 +; END. + +; Test fold of two shuffles where the first shuffle vectors inputs are a +; different length then the second. + +define void @test_cl(<4 x i8> addrspace(1)* %dest, <16 x i8> addrspace(1)* %old) nounwind { +entry: + %arrayidx = getelementptr <4 x i8> addrspace(1)* %dest, i32 0 ; <<4 x i8> addrspace(1)*> [#uses=1] + %arrayidx5 = getelementptr <16 x i8> addrspace(1)* %old, i32 0 ; <<16 x i8> addrspace(1)*> [#uses=1] + %tmp6 = load <16 x i8> addrspace(1)* %arrayidx5 ; <<16 x i8>> [#uses=1] + %tmp7 = shufflevector <16 x i8> %tmp6, <16 x i8> undef, <4 x i32> < i32 13, i32 9, i32 4, i32 13 > ; <<4 x i8>> [#uses=1] + %tmp9 = shufflevector <4 x i8> %tmp7, <4 x i8> undef, <4 x i32> < i32 3, i32 1, i32 2, i32 0 > ; <<4 x i8>> [#uses=1] + store <4 x i8> %tmp9, <4 x i8> addrspace(1)* %arrayidx + ret void + +return: ; preds = %entry + ret void +} \ No newline at end of file -- 2.34.1