unsigned VWidth = cast<VectorType>(SVI.getType())->getNumElements();
uint64_t AllOnesEltMask = ~0ULL >> (64-VWidth);
if (VWidth <= 64 &&
- SimplifyDemandedVectorElts(&SVI, AllOnesEltMask, UndefElts))
+ SimplifyDemandedVectorElts(&SVI, AllOnesEltMask, UndefElts)) {
+ LHS = SVI.getOperand(0);
+ RHS = SVI.getOperand(1);
MadeChange = true;
+ }
// Canonicalize shuffle(x ,x,mask) -> shuffle(x, undef,mask')
// Canonicalize shuffle(undef,x,mask) -> shuffle(x, undef,mask').
--- /dev/null
+; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep {insertelement <4 x float> undef}
+
+; Instcombine should be able to prove that none of the
+; insertelement's first operand's elements are needed.
+
+define internal void @""(i8*) {
+; <label>:1
+ bitcast i8* %0 to i32* ; <i32*>:2 [#uses=1]
+ load i32* %2, align 1 ; <i32>:3 [#uses=1]
+ getelementptr i8* %0, i32 4 ; <i8*>:4 [#uses=1]
+ bitcast i8* %4 to i32* ; <i32*>:5 [#uses=1]
+ load i32* %5, align 1 ; <i32>:6 [#uses=1]
+ br label %7
+
+; <label>:7 ; preds = %9, %1
+ %.01 = phi <4 x float> [ undef, %1 ], [ %12, %9 ] ; <<4 x float>> [#uses=1]
+ %.0 = phi i32 [ %3, %1 ], [ %15, %9 ] ; <i32> [#uses=3]
+ icmp slt i32 %.0, %6 ; <i1>:8 [#uses=1]
+ br i1 %8, label %9, label %16
+
+; <label>:9 ; preds = %7
+ sitofp i32 %.0 to float ; <float>:10 [#uses=1]
+ insertelement <4 x float> %.01, float %10, i32 0 ; <<4 x float>>:11 [#uses=1]
+ shufflevector <4 x float> %11, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>>:12 [#uses=2]
+ getelementptr i8* %0, i32 48 ; <i8*>:13 [#uses=1]
+ bitcast i8* %13 to <4 x float>* ; <<4 x float>*>:14 [#uses=1]
+ store <4 x float> %12, <4 x float>* %14, align 16
+ add i32 %.0, 2 ; <i32>:15 [#uses=1]
+ br label %7
+
+; <label>:16 ; preds = %7
+ ret void
+}
--- /dev/null
+; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep shufflevector
+; PR2645
+
+; instcombine shouldn't delete the shufflevector.
+
+define internal void @""(i8*, i32, i8*) {
+; <label>:3
+ br label %4
+
+; <label>:4 ; preds = %6, %3
+ %.0 = phi i32 [ 0, %3 ], [ %19, %6 ] ; <i32> [#uses=4]
+ %5 = icmp slt i32 %.0, %1 ; <i1> [#uses=1]
+ br i1 %5, label %6, label %20
+
+; <label>:6 ; preds = %4
+ %7 = getelementptr i8* %2, i32 %.0 ; <i8*> [#uses=1]
+ %8 = bitcast i8* %7 to <4 x i16>* ; <<4 x i16>*> [#uses=1]
+ %9 = load <4 x i16>* %8, align 1 ; <<4 x i16>> [#uses=1]
+ %10 = bitcast <4 x i16> %9 to <1 x i64> ; <<1 x i64>> [#uses=1]
+ %11 = call <2 x i64> @foo(<1 x i64> %10)
+; <<2 x i64>> [#uses=1]
+ %12 = bitcast <2 x i64> %11 to <4 x i32> ; <<4 x i32>> [#uses=1]
+ %13 = bitcast <4 x i32> %12 to <8 x i16> ; <<8 x i16>> [#uses=2]
+ %14 = shufflevector <8 x i16> %13, <8 x i16> %13, <8 x i32> < i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3 > ; <<8 x i16>> [#uses=1]
+ %15 = bitcast <8 x i16> %14 to <4 x i32> ; <<4 x i32>> [#uses=1]
+ %16 = sitofp <4 x i32> %15 to <4 x float> ; <<4 x float>> [#uses=1]
+ %17 = getelementptr i8* %0, i32 %.0 ; <i8*> [#uses=1]
+ %18 = bitcast i8* %17 to <4 x float>* ; <<4 x float>*> [#uses=1]
+ store <4 x float> %16, <4 x float>* %18, align 1
+ %19 = add i32 %.0, 1 ; <i32> [#uses=1]
+ br label %4
+
+; <label>:20 ; preds = %4
+ call void @llvm.x86.mmx.emms( )
+ ret void
+}
+
+declare <2 x i64> @foo(<1 x i64>)
+declare void @llvm.x86.mmx.emms( )
+++ /dev/null
-; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep {insertelement <4 x float> undef}
-
-; Instcombine should be able to prove that none of the
-; insertelement's first operand's elements are needed.
-
-define internal void @""(i8*) {
-; <label>:1
- bitcast i8* %0 to i32* ; <i32*>:2 [#uses=1]
- load i32* %2, align 1 ; <i32>:3 [#uses=1]
- getelementptr i8* %0, i32 4 ; <i8*>:4 [#uses=1]
- bitcast i8* %4 to i32* ; <i32*>:5 [#uses=1]
- load i32* %5, align 1 ; <i32>:6 [#uses=1]
- br label %7
-
-; <label>:7 ; preds = %9, %1
- %.01 = phi <4 x float> [ undef, %1 ], [ %12, %9 ] ; <<4 x float>> [#uses=1]
- %.0 = phi i32 [ %3, %1 ], [ %15, %9 ] ; <i32> [#uses=3]
- icmp slt i32 %.0, %6 ; <i1>:8 [#uses=1]
- br i1 %8, label %9, label %16
-
-; <label>:9 ; preds = %7
- sitofp i32 %.0 to float ; <float>:10 [#uses=1]
- insertelement <4 x float> %.01, float %10, i32 0 ; <<4 x float>>:11 [#uses=1]
- shufflevector <4 x float> %11, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>>:12 [#uses=2]
- getelementptr i8* %0, i32 48 ; <i8*>:13 [#uses=1]
- bitcast i8* %13 to <4 x float>* ; <<4 x float>*>:14 [#uses=1]
- store <4 x float> %12, <4 x float>* %14, align 16
- add i32 %.0, 2 ; <i32>:15 [#uses=1]
- br label %7
-
-; <label>:16 ; preds = %7
- ret void
-}