From 86ccb922275baa98620d96673071144a0f7d71ec Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sun, 29 Nov 2015 16:41:04 +0000 Subject: [PATCH] [X86][SSE] Added support for lowering to ADDSUBPS/ADDSUBPD with commuted inputs We could already recognise shuffle(FSUB, FADD) -> ADDSUB, this allow us to recognise shuffle(FADD, FSUB) -> ADDSUB by commuting the shuffle mask prior to matching. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254259 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 15 ++++--- test/CodeGen/X86/sse3-avx-addsub.ll | 70 +++++++++++++++++++++++++++++ 2 files changed, 80 insertions(+), 5 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 29742df84c3..3904d273c7d 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -23141,14 +23141,19 @@ static SDValue combineShuffleToAddSub(SDNode *N, SelectionDAG &DAG) { return SDValue(); auto *SVN = cast(N); - ArrayRef Mask = SVN->getMask(); + SmallVector Mask; + for (int M : SVN->getMask()) + Mask.push_back(M); + SDValue V1 = N->getOperand(0); SDValue V2 = N->getOperand(1); - // We require the first shuffle operand to be the SUB node, and the second to - // be the ADD node. - // FIXME: We should support the commuted patterns. - if (V1->getOpcode() != ISD::FSUB || V2->getOpcode() != ISD::FADD) + // We require the first shuffle operand to be the FSUB node, and the second to + // be the FADD node. + if (V1.getOpcode() == ISD::FADD && V2.getOpcode() == ISD::FSUB) { + ShuffleVectorSDNode::commuteMask(Mask); + std::swap(V1, V2); + } else if (V1.getOpcode() != ISD::FSUB || V2.getOpcode() != ISD::FADD) return SDValue(); // If there are other uses of these operations we can't fold them. diff --git a/test/CodeGen/X86/sse3-avx-addsub.ll b/test/CodeGen/X86/sse3-avx-addsub.ll index a3324ad6264..8665edf8f1d 100644 --- a/test/CodeGen/X86/sse3-avx-addsub.ll +++ b/test/CodeGen/X86/sse3-avx-addsub.ll @@ -170,3 +170,73 @@ define <2 x double> @test4b(<2 x double> %A, <2 x double>* %B) { %vecinit2 = shufflevector <2 x double> %sub, <2 x double> %add, <2 x i32> ret <2 x double> %vecinit2 } + +define <4 x float> @test1c(<4 x float> %A, <4 x float>* %B) { +; SSE-LABEL: test1c: +; SSE: # BB#0: +; SSE-NEXT: addsubps (%rdi), %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: test1c: +; AVX: # BB#0: +; AVX-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 +; AVX-NEXT: retq + %1 = load <4 x float>, <4 x float>* %B + %add = fadd <4 x float> %A, %1 + %sub = fsub <4 x float> %A, %1 + %vecinit6 = shufflevector <4 x float> %add, <4 x float> %sub, <4 x i32> + ret <4 x float> %vecinit6 +} + +define <8 x float> @test2c(<8 x float> %A, <8 x float>* %B) { +; SSE-LABEL: test2c: +; SSE: # BB#0: +; SSE-NEXT: addsubps (%rdi), %xmm0 +; SSE-NEXT: addsubps 16(%rdi), %xmm1 +; SSE-NEXT: retq +; +; AVX-LABEL: test2c: +; AVX: # BB#0: +; AVX-NEXT: vaddsubps (%rdi), %ymm0, %ymm0 +; AVX-NEXT: retq + %1 = load <8 x float>, <8 x float>* %B + %add = fadd <8 x float> %A, %1 + %sub = fsub <8 x float> %A, %1 + %vecinit14 = shufflevector <8 x float> %add, <8 x float> %sub, <8 x i32> + ret <8 x float> %vecinit14 +} + +define <4 x double> @test3c(<4 x double> %A, <4 x double>* %B) { +; SSE-LABEL: test3c: +; SSE: # BB#0: +; SSE-NEXT: addsubpd (%rdi), %xmm0 +; SSE-NEXT: addsubpd 16(%rdi), %xmm1 +; SSE-NEXT: retq +; +; AVX-LABEL: test3c: +; AVX: # BB#0: +; AVX-NEXT: vaddsubpd (%rdi), %ymm0, %ymm0 +; AVX-NEXT: retq + %1 = load <4 x double>, <4 x double>* %B + %add = fadd <4 x double> %A, %1 + %sub = fsub <4 x double> %A, %1 + %vecinit6 = shufflevector <4 x double> %add, <4 x double> %sub, <4 x i32> + ret <4 x double> %vecinit6 +} + +define <2 x double> @test4c(<2 x double> %A, <2 x double>* %B) { +; SSE-LABEL: test4c: +; SSE: # BB#0: +; SSE-NEXT: addsubpd (%rdi), %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: test4c: +; AVX: # BB#0: +; AVX-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 +; AVX-NEXT: retq + %1 = load <2 x double>, <2 x double>* %B + %sub = fsub <2 x double> %A, %1 + %add = fadd <2 x double> %A, %1 + %vecinit2 = shufflevector <2 x double> %add, <2 x double> %sub, <2 x i32> + ret <2 x double> %vecinit2 +} -- 2.34.1