From: Manman Ren Date: Tue, 13 Nov 2012 19:13:05 +0000 (+0000) Subject: X86: when constructing VZEXT_LOAD from other loads, makes sure its output X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=2adc503f291d69763c5fc59a8e35d318ee22b77a;p=oota-llvm.git X86: when constructing VZEXT_LOAD from other loads, makes sure its output chain is correctly setup. As an example, if the original load must happen before later stores, we need to make sure the constructed VZEXT_LOAD is constrained to be before the stores. rdar://12684358 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@167859 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index b35fb514bf9..025970370c5 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -14203,6 +14203,18 @@ static SDValue PerformShuffleCombine256(SDNode *N, SelectionDAG &DAG, Ld->getAlignment(), false/*isVolatile*/, true/*ReadMem*/, false/*WriteMem*/); + + // Make sure the newly-created LOAD is in the same position as Ld in + // terms of dependency. We create a TokenFactor for Ld and ResNode, + // and update uses of Ld's output chain to use the TokenFactor. + if (Ld->hasAnyUseOfValue(1)) { + SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + SDValue(Ld, 1), SDValue(ResNode.getNode(), 1)); + DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), NewChain); + DAG.UpdateNodeOperands(NewChain.getNode(), SDValue(Ld, 1), + SDValue(ResNode.getNode(), 1)); + } + return DAG.getNode(ISD::BITCAST, dl, VT, ResNode); } } diff --git a/test/CodeGen/X86/avx-shuffle.ll b/test/CodeGen/X86/avx-shuffle.ll index ec11654b355..904f048d1e9 100644 --- a/test/CodeGen/X86/avx-shuffle.ll +++ b/test/CodeGen/X86/avx-shuffle.ll @@ -246,3 +246,54 @@ define <8 x float> @test19(<8 x float> %A, <8 x float>%B) nounwind { ret <8 x float>%S } +; rdar://12684358 +; Make sure loads happen before stores. +; CHECK: swap8doubles +; CHECK: vmovups {{[0-9]*}}(%rdi), %xmm{{[0-9]+}} +; CHECK: vmovups {{[0-9]*}}(%rdi), %xmm{{[0-9]+}} +; CHECK: vmovups {{[0-9]*}}(%rdi), %xmm{{[0-9]+}} +; CHECK: vmovups {{[0-9]*}}(%rdi), %xmm{{[0-9]+}} +; CHECK: vmovaps {{[0-9]*}}(%rsi), %ymm{{[0-9]+}} +; CHECK: vmovaps {{[0-9]*}}(%rsi), %ymm{{[0-9]+}} +; CHECK: vmovaps %xmm{{[0-9]+}}, {{[0-9]*}}(%rdi) +; CHECK: vextractf128 +; CHECK: vmovaps %xmm{{[0-9]+}}, {{[0-9]*}}(%rdi) +; CHECK: vextractf128 +; CHECK: vmovaps %ymm{{[0-9]+}}, {{[0-9]*}}(%rsi) +; CHECK: vmovaps %ymm{{[0-9]+}}, {{[0-9]*}}(%rsi) +define void @swap8doubles(double* nocapture %A, double* nocapture %C) nounwind uwtable ssp { +entry: + %add.ptr = getelementptr inbounds double* %A, i64 2 + %v.i = bitcast double* %A to <2 x double>* + %0 = load <2 x double>* %v.i, align 1 + %shuffle.i.i = shufflevector <2 x double> %0, <2 x double> , <4 x i32> + %v1.i = bitcast double* %add.ptr to <2 x double>* + %1 = load <2 x double>* %v1.i, align 1 + %2 = tail call <4 x double> @llvm.x86.avx.vinsertf128.pd.256(<4 x double> %shuffle.i.i, <2 x double> %1, i8 1) nounwind + %add.ptr1 = getelementptr inbounds double* %A, i64 6 + %add.ptr2 = getelementptr inbounds double* %A, i64 4 + %v.i27 = bitcast double* %add.ptr2 to <2 x double>* + %3 = load <2 x double>* %v.i27, align 1 + %shuffle.i.i28 = shufflevector <2 x double> %3, <2 x double> , <4 x i32> + %v1.i29 = bitcast double* %add.ptr1 to <2 x double>* + %4 = load <2 x double>* %v1.i29, align 1 + %5 = tail call <4 x double> @llvm.x86.avx.vinsertf128.pd.256(<4 x double> %shuffle.i.i28, <2 x double> %4, i8 1) nounwind + %6 = bitcast double* %C to <4 x double>* + %7 = load <4 x double>* %6, align 32 + %add.ptr5 = getelementptr inbounds double* %C, i64 4 + %8 = bitcast double* %add.ptr5 to <4 x double>* + %9 = load <4 x double>* %8, align 32 + %shuffle.i26 = shufflevector <4 x double> %7, <4 x double> undef, <2 x i32> + %10 = tail call <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double> %7, i8 1) + %shuffle.i = shufflevector <4 x double> %9, <4 x double> undef, <2 x i32> + %11 = tail call <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double> %9, i8 1) + store <2 x double> %shuffle.i26, <2 x double>* %v.i, align 16 + store <2 x double> %10, <2 x double>* %v1.i, align 16 + store <2 x double> %shuffle.i, <2 x double>* %v.i27, align 16 + store <2 x double> %11, <2 x double>* %v1.i29, align 16 + store <4 x double> %2, <4 x double>* %6, align 32 + store <4 x double> %5, <4 x double>* %8, align 32 + ret void +} +declare <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double>, i8) nounwind readnone +declare <4 x double> @llvm.x86.avx.vinsertf128.pd.256(<4 x double>, <2 x double>, i8) nounwind readnone