From fee6969463d455201a5179620ccd8c3171effaa6 Mon Sep 17 00:00:00 2001 From: Nadav Rotem Date: Wed, 1 May 2013 19:53:30 +0000 Subject: [PATCH] SROA: Generate selects instead of shuffles when blending values because this is the cannonical form. Shuffles are more difficult to lower and we usually don't touch them, while we do optimize selects more often. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@180875 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Scalar/SROA.cpp | 14 +++++------- test/Transforms/SROA/vector-promotion.ll | 28 ++++++++++++------------ 2 files changed, 20 insertions(+), 22 deletions(-) diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp index c383e2f8ed8..d073e789dcb 100644 --- a/lib/Transforms/Scalar/SROA.cpp +++ b/lib/Transforms/Scalar/SROA.cpp @@ -2322,17 +2322,15 @@ static Value *insertVector(IRBuilderTy &IRB, Value *Old, Value *V, V = IRB.CreateShuffleVector(V, UndefValue::get(V->getType()), ConstantVector::get(Mask), Name + ".expand"); - DEBUG(dbgs() << " shuffle1: " << *V << "\n"); + DEBUG(dbgs() << " shuffle: " << *V << "\n"); Mask.clear(); for (unsigned i = 0; i != VecTy->getNumElements(); ++i) - if (i >= BeginIndex && i < EndIndex) - Mask.push_back(IRB.getInt32(i)); - else - Mask.push_back(IRB.getInt32(i + VecTy->getNumElements())); - V = IRB.CreateShuffleVector(V, Old, ConstantVector::get(Mask), - Name + "insert"); - DEBUG(dbgs() << " shuffle2: " << *V << "\n"); + Mask.push_back(IRB.getInt1(i >= BeginIndex && i < EndIndex)); + + V = IRB.CreateSelect(ConstantVector::get(Mask), V, Old, Name + "blend"); + + DEBUG(dbgs() << " blend: " << *V << "\n"); return V; } diff --git a/test/Transforms/SROA/vector-promotion.ll b/test/Transforms/SROA/vector-promotion.ll index 02f6d040cc9..3336515770a 100644 --- a/test/Transforms/SROA/vector-promotion.ll +++ b/test/Transforms/SROA/vector-promotion.ll @@ -224,26 +224,26 @@ entry: %a.cast0 = bitcast i32* %a.gep0 to <2 x i32>* store <2 x i32> , <2 x i32>* %a.cast0 ; CHECK-NOT: store -; CHECK: %[[insert1:.*]] = shufflevector <4 x i32> , <4 x i32> undef, <4 x i32> +; CHECK: select <4 x i1> %a.gep1 = getelementptr <4 x i32>* %a, i32 0, i32 1 %a.cast1 = bitcast i32* %a.gep1 to <2 x i32>* store <2 x i32> , <2 x i32>* %a.cast1 -; CHECK-NEXT: %[[insert2:.*]] = shufflevector <4 x i32> , <4 x i32> %[[insert1]], <4 x i32> +; CHECK-NEXT: select <4 x i1> %a.gep2 = getelementptr <4 x i32>* %a, i32 0, i32 2 %a.cast2 = bitcast i32* %a.gep2 to <2 x i32>* store <2 x i32> , <2 x i32>* %a.cast2 -; CHECK-NEXT: %[[insert3:.*]] = shufflevector <4 x i32> , <4 x i32> %[[insert2]], <4 x i32> +; CHECK-NEXT: select <4 x i1> %a.gep3 = getelementptr <4 x i32>* %a, i32 0, i32 3 store i32 3, i32* %a.gep3 -; CHECK-NEXT: %[[insert4:.*]] = insertelement <4 x i32> %[[insert3]], i32 3, i32 3 +; CHECK-NEXT: insertelement <4 x i32> %ret = load <4 x i32>* %a ret <4 x i32> %ret -; CHECK-NEXT: ret <4 x i32> %[[insert4]] +; CHECK-NEXT: ret <4 x i32> } define <4 x i32> @test_subvec_load() { @@ -291,27 +291,27 @@ entry: %a.cast0 = bitcast float* %a.gep0 to i8* call void @llvm.memset.p0i8.i32(i8* %a.cast0, i8 0, i32 8, i32 0, i1 false) ; CHECK-NOT: store -; CHECK: %[[insert1:.*]] = shufflevector <4 x float> , <4 x float> undef, <4 x i32> +; CHECK: select <4 x i1> %a.gep1 = getelementptr <4 x float>* %a, i32 0, i32 1 %a.cast1 = bitcast float* %a.gep1 to i8* call void @llvm.memset.p0i8.i32(i8* %a.cast1, i8 1, i32 8, i32 0, i1 false) -; CHECK-NEXT: %[[insert2:.*]] = shufflevector <4 x float> , <4 x float> %[[insert1]], <4 x i32> +; CHECK-NEXT: select <4 x i1> %a.gep2 = getelementptr <4 x float>* %a, i32 0, i32 2 %a.cast2 = bitcast float* %a.gep2 to i8* call void @llvm.memset.p0i8.i32(i8* %a.cast2, i8 3, i32 8, i32 0, i1 false) -; CHECK-NEXT: %[[insert3:.*]] = shufflevector <4 x float> , <4 x float> %[[insert2]], <4 x i32> +; CHECK-NEXT: select <4 x i1> %a.gep3 = getelementptr <4 x float>* %a, i32 0, i32 3 %a.cast3 = bitcast float* %a.gep3 to i8* call void @llvm.memset.p0i8.i32(i8* %a.cast3, i8 7, i32 4, i32 0, i1 false) -; CHECK-NEXT: %[[insert4:.*]] = insertelement <4 x float> %[[insert3]], float 0x38E0E0E0E0000000, i32 3 +; CHECK-NEXT: insertelement <4 x float> %ret = load <4 x float>* %a ret <4 x float> %ret -; CHECK-NEXT: ret <4 x float> %[[insert4]] +; CHECK-NEXT: ret <4 x float> } define <4 x float> @test_subvec_memcpy(i8* %x, i8* %y, i8* %z, i8* %f, i8* %out) { @@ -326,7 +326,7 @@ entry: ; CHECK: %[[xptr:.*]] = bitcast i8* %x to <2 x float>* ; CHECK-NEXT: %[[x:.*]] = load <2 x float>* %[[xptr]] ; CHECK-NEXT: %[[expand_x:.*]] = shufflevector <2 x float> %[[x]], <2 x float> undef, <4 x i32> -; CHECK-NEXT: %[[insert_x:.*]] = shufflevector <4 x float> %[[expand_x]], <4 x float> undef, <4 x i32> +; CHECK-NEXT: select <4 x i1> %a.gep1 = getelementptr <4 x float>* %a, i32 0, i32 1 %a.cast1 = bitcast float* %a.gep1 to i8* @@ -334,7 +334,7 @@ entry: ; CHECK-NEXT: %[[yptr:.*]] = bitcast i8* %y to <2 x float>* ; CHECK-NEXT: %[[y:.*]] = load <2 x float>* %[[yptr]] ; CHECK-NEXT: %[[expand_y:.*]] = shufflevector <2 x float> %[[y]], <2 x float> undef, <4 x i32> -; CHECK-NEXT: %[[insert_y:.*]] = shufflevector <4 x float> %[[expand_y]], <4 x float> %[[insert_x]], <4 x i32> +; CHECK-NEXT: select <4 x i1> %a.gep2 = getelementptr <4 x float>* %a, i32 0, i32 2 %a.cast2 = bitcast float* %a.gep2 to i8* @@ -342,14 +342,14 @@ entry: ; CHECK-NEXT: %[[zptr:.*]] = bitcast i8* %z to <2 x float>* ; CHECK-NEXT: %[[z:.*]] = load <2 x float>* %[[zptr]] ; CHECK-NEXT: %[[expand_z:.*]] = shufflevector <2 x float> %[[z]], <2 x float> undef, <4 x i32> -; CHECK-NEXT: %[[insert_z:.*]] = shufflevector <4 x float> %[[expand_z]], <4 x float> %[[insert_y]], <4 x i32> +; CHECK-NEXT: select <4 x i1> %a.gep3 = getelementptr <4 x float>* %a, i32 0, i32 3 %a.cast3 = bitcast float* %a.gep3 to i8* call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a.cast3, i8* %f, i32 4, i32 0, i1 false) ; CHECK-NEXT: %[[fptr:.*]] = bitcast i8* %f to float* ; CHECK-NEXT: %[[f:.*]] = load float* %[[fptr]] -; CHECK-NEXT: %[[insert_f:.*]] = insertelement <4 x float> %[[insert_z]], float %[[f]], i32 3 +; CHECK-NEXT: %[[insert_f:.*]] = insertelement <4 x float> call void @llvm.memcpy.p0i8.p0i8.i32(i8* %out, i8* %a.cast2, i32 8, i32 0, i1 false) ; CHECK-NEXT: %[[outptr:.*]] = bitcast i8* %out to <2 x float>* -- 2.34.1