From: Simon Pilgrim Date: Wed, 6 Jan 2016 09:08:49 +0000 (+0000) Subject: [X86][SSE] There is no zmm addsubpd/addsubps instruction. X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=2d3ec5706a0a8e9de6fe9ed3dc87a0ba1a0942f0;p=oota-llvm.git [X86][SSE] There is no zmm addsubpd/addsubps instruction. Replace the assert in combineShuffleToAddSub with an early out. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@256922 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index cedadff364d..67f0725859c 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -23684,9 +23684,13 @@ static SDValue PerformTargetShuffleCombine(SDValue N, SelectionDAG &DAG, /// the operands which explicitly discard the lanes which are unused by this /// operation to try to flow through the rest of the combiner the fact that /// they're unused. -static SDValue combineShuffleToAddSub(SDNode *N, SelectionDAG &DAG) { +static SDValue combineShuffleToAddSub(SDNode *N, const X86Subtarget *Subtarget, + SelectionDAG &DAG) { SDLoc DL(N); EVT VT = N->getValueType(0); + if ((!Subtarget->hasSSE3() || (VT != MVT::v4f32 && VT != MVT::v2f64)) && + (!Subtarget->hasAVX() || (VT != MVT::v8f32 && VT != MVT::v4f64))) + return SDValue(); // We only handle target-independent shuffles. // FIXME: It would be easy and harmless to use the target shuffle mask @@ -23728,12 +23732,6 @@ static SDValue combineShuffleToAddSub(SDNode *N, SelectionDAG &DAG) { isShuffleEquivalent(V1, V2, Mask, {0, 9, 2, 11, 4, 13, 6, 15}))) return SDValue(); - // Only specific types are legal at this point, assert so we notice if and - // when these change. - assert((VT == MVT::v4f32 || VT == MVT::v2f64 || VT == MVT::v8f32 || - VT == MVT::v4f64) && - "Unknown vector type encountered!"); - return DAG.getNode(X86ISD::ADDSUB, DL, VT, LHS, RHS); } @@ -23753,8 +23751,8 @@ static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG, // If we have legalized the vector types, look for blends of FADD and FSUB // nodes that we can fuse into an ADDSUB node. - if (TLI.isTypeLegal(VT) && Subtarget->hasSSE3()) - if (SDValue AddSub = combineShuffleToAddSub(N, DAG)) + if (TLI.isTypeLegal(VT)) + if (SDValue AddSub = combineShuffleToAddSub(N, Subtarget, DAG)) return AddSub; // Combine 256-bit vector shuffles. This is only profitable when in AVX mode diff --git a/test/CodeGen/X86/sse3-avx-addsub.ll b/test/CodeGen/X86/sse3-avx-addsub.ll index 8665edf8f1d..c4da546ed77 100644 --- a/test/CodeGen/X86/sse3-avx-addsub.ll +++ b/test/CodeGen/X86/sse3-avx-addsub.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse3 | FileCheck %s --check-prefix=SSE -; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx | FileCheck %s --check-prefix=AVX +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1 +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 ; Test ADDSUB ISel patterns. @@ -101,6 +102,62 @@ define <2 x double> @test4(<2 x double> %A, <2 x double> %B) #0 { ret <2 x double> %vecinit2 } +define <16 x float> @test5(<16 x float> %A, <16 x float> %B) { +; SSE-LABEL: test5: +; SSE: # BB#0: +; SSE-NEXT: addsubps %xmm4, %xmm0 +; SSE-NEXT: addsubps %xmm5, %xmm1 +; SSE-NEXT: addsubps %xmm6, %xmm2 +; SSE-NEXT: addsubps %xmm7, %xmm3 +; SSE-NEXT: retq +; +; AVX1-LABEL: test5: +; AVX1: # BB#0: +; AVX1-NEXT: vaddsubps %ymm2, %ymm0, %ymm0 +; AVX1-NEXT: vaddsubps %ymm3, %ymm1, %ymm1 +; AVX1-NEXT: retq +; +; AVX512-LABEL: test5: +; AVX512: # BB#0: +; AVX512-NEXT: vaddps %zmm1, %zmm0, %zmm2 +; AVX512-NEXT: vsubps %zmm1, %zmm0, %zmm0 +; AVX512-NEXT: vmovdqa32 {{.*#+}} zmm1 = [0,17,2,19,4,21,6,23,8,25,10,27,12,29,14,31] +; AVX512-NEXT: vpermt2ps %zmm2, %zmm1, %zmm0 +; AVX512-NEXT: retq + %add = fadd <16 x float> %A, %B + %sub = fsub <16 x float> %A, %B + %vecinit2 = shufflevector <16 x float> %sub, <16 x float> %add, <16 x i32> + ret <16 x float> %vecinit2 +} + +define <8 x double> @test6(<8 x double> %A, <8 x double> %B) { +; SSE-LABEL: test6: +; SSE: # BB#0: +; SSE-NEXT: addsubpd %xmm4, %xmm0 +; SSE-NEXT: addsubpd %xmm5, %xmm1 +; SSE-NEXT: addsubpd %xmm6, %xmm2 +; SSE-NEXT: addsubpd %xmm7, %xmm3 +; SSE-NEXT: retq +; +; AVX1-LABEL: test6: +; AVX1: # BB#0: +; AVX1-NEXT: vaddsubpd %ymm2, %ymm0, %ymm0 +; AVX1-NEXT: vaddsubpd %ymm3, %ymm1, %ymm1 +; AVX1-NEXT: retq +; +; AVX512-LABEL: test6: +; AVX512: # BB#0: +; AVX512-NEXT: vaddpd %zmm1, %zmm0, %zmm2 +; AVX512-NEXT: vsubpd %zmm1, %zmm0, %zmm0 +; AVX512-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,9,2,11,4,13,6,15] +; AVX512-NEXT: vpermt2pd %zmm2, %zmm1, %zmm0 +; AVX512-NEXT: retq + %add = fadd <8 x double> %A, %B + %sub = fsub <8 x double> %A, %B + %vecinit2 = shufflevector <8 x double> %sub, <8 x double> %add, <8 x i32> + ret <8 x double> %vecinit2 +} + define <4 x float> @test1b(<4 x float> %A, <4 x float>* %B) { ; SSE-LABEL: test1b: ; SSE: # BB#0: