From 0a765136e66c72541f2e7d29fb1ae5591b38d682 Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Thu, 7 Jan 2016 15:01:46 +0000 Subject: [PATCH] Revert r257055, it caused PR26064. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@257066 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 9 ++------- test/CodeGen/X86/2012-01-12-extract-sv.ll | 4 +++- test/CodeGen/X86/avx-vbroadcast.ll | 10 ++++++++-- 3 files changed, 13 insertions(+), 10 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 06def2cacd8..d32146b32cf 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -8163,11 +8163,6 @@ static SDValue lowerVectorShuffleAsBroadcast(SDLoc DL, MVT VT, SDValue V, break; } - // Peek through any bitcast (only useful for loads). - SDValue BC = V; - while (BC.getOpcode() == ISD::BITCAST) - BC = BC.getOperand(0); - // Check if this is a broadcast of a scalar. We special case lowering // for scalars so that we can more effectively fold with loads. // First, look through bitcast: if the original value has a larger element @@ -8187,10 +8182,10 @@ static SDValue lowerVectorShuffleAsBroadcast(SDLoc DL, MVT VT, SDValue V, // Only AVX2 has register broadcasts. if (!Subtarget->hasAVX2() && !isShuffleFoldableLoad(V)) return SDValue(); - } else if (MayFoldLoad(BC) && !cast(BC)->isVolatile()) { + } else if (MayFoldLoad(V) && !cast(V)->isVolatile()) { // If we are broadcasting a load that is only used by the shuffle // then we can reduce the vector load to the broadcasted scalar load. - LoadSDNode *Ld = cast(BC); + LoadSDNode *Ld = cast(V); SDValue BaseAddr = Ld->getOperand(1); EVT AddrVT = BaseAddr.getValueType(); EVT SVT = VT.getScalarType(); diff --git a/test/CodeGen/X86/2012-01-12-extract-sv.ll b/test/CodeGen/X86/2012-01-12-extract-sv.ll index 6950641a08a..92ec107a007 100644 --- a/test/CodeGen/X86/2012-01-12-extract-sv.ll +++ b/test/CodeGen/X86/2012-01-12-extract-sv.ll @@ -3,7 +3,9 @@ define void @endless_loop() { ; CHECK-LABEL: endless_loop: ; CHECK-NEXT: # BB#0: -; CHECK-NEXT: vbroadcastss (%eax), %ymm0 +; CHECK-NEXT: vmovaps (%eax), %ymm0 +; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 +; CHECK-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] ; CHECK-NEXT: vmovddup {{.*#+}} xmm1 = xmm0[0,0] ; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 diff --git a/test/CodeGen/X86/avx-vbroadcast.ll b/test/CodeGen/X86/avx-vbroadcast.ll index 837333a0b1f..86b0628aa0b 100644 --- a/test/CodeGen/X86/avx-vbroadcast.ll +++ b/test/CodeGen/X86/avx-vbroadcast.ll @@ -130,7 +130,10 @@ entry: define <8 x i32> @load_splat_8i32_8i32_55555555(<8 x i32>* %ptr) nounwind uwtable readnone ssp { ; CHECK-LABEL: load_splat_8i32_8i32_55555555: ; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vbroadcastss 20(%rdi), %ymm0 +; CHECK-NEXT: vmovaps (%rdi), %ymm0 +; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 +; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1] +; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 ; CHECK-NEXT: retq entry: %ld = load <8 x i32>, <8 x i32>* %ptr @@ -198,7 +201,10 @@ entry: define <4 x i64> @load_splat_4i64_4i64_2222(<4 x i64>* %ptr) nounwind uwtable readnone ssp { ; CHECK-LABEL: load_splat_4i64_4i64_2222: ; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: vbroadcastsd 16(%rdi), %ymm0 +; CHECK-NEXT: vmovapd (%rdi), %ymm0 +; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 +; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] +; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 ; CHECK-NEXT: retq entry: %ld = load <4 x i64>, <4 x i64>* %ptr -- 2.34.1