From 92ee945e2e783e49161a773af0a4f99dc6147423 Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Fri, 15 Aug 2014 17:42:00 +0000 Subject: [PATCH] [x86] Teach the new AVX v4f64 shuffle lowering to use UNPCK instructions where applicable for blending. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@215737 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 42 +++++++++++++++++++++++ test/CodeGen/X86/vector-shuffle-256-v4.ll | 26 +++++++++++++- 2 files changed, 67 insertions(+), 1 deletion(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 327cc296fe6..b31aa446998 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -7055,6 +7055,35 @@ static bool isSingleInputShuffleMask(ArrayRef Mask) { return true; } +/// \brief Implementation of the \c isShuffleEquivalent variadic functor. +/// +/// See its documentation for details. +static bool isShuffleEquivalentImpl(ArrayRef Mask, + ArrayRef Args) { + if (Mask.size() != Args.size()) + return false; + for (int i = 0, e = Mask.size(); i < e; ++i) { + assert(*Args[i] >= 0 && "Arguments must be positive integers!"); + assert(*Args[i] < (int)Args.size() * 2 && + "Argument outside the range of possible shuffle inputs!"); + if (Mask[i] != -1 && Mask[i] != *Args[i]) + return false; + } + return true; +} +/// \brief Checks whether a shuffle mask is equivalent to an explicit list of +/// arguments. +/// +/// This is a fast way to test a shuffle mask against a fixed pattern: +/// +/// if (isShuffleEquivalent(Mask, 3, 2, 1, 0)) { ... } +/// +/// It returns true if the mask is exactly as wide as the argument list, and +/// each element of the mask is either -1 (signifying undef) or the value given +/// in the argument. +static const VariadicFunction1< + bool, ArrayRef, int, isShuffleEquivalentImpl> isShuffleEquivalent = {}; + /// \brief Get a 4-lane 8-bit shuffle immediate for a mask. /// /// This helper function produces an 8-bit shuffle immediate corresponding to @@ -8440,6 +8469,19 @@ static SDValue lowerV4F64VectorShuffle(SDValue Op, SDValue V1, SDValue V2, DAG.getConstant(VPERMILPMask, MVT::i8)); } + // X86 has dedicated unpack instructions that can handle specific blend + // operations: UNPCKH and UNPCKL. + if (isShuffleEquivalent(Mask, 0, 4, 2, 6)) + return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v4f64, V1, V2); + if (isShuffleEquivalent(Mask, 1, 5, 3, 7)) + return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v4f64, V1, V2); + // FIXME: It would be nice to find a way to get canonicalization to commute + // these patterns. + if (isShuffleEquivalent(Mask, 4, 0, 6, 2)) + return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v4f64, V2, V1); + if (isShuffleEquivalent(Mask, 5, 1, 7, 3)) + return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v4f64, V2, V1); + // Check if the blend happens to exactly fit that of SHUFPD. if (Mask[0] < 4 && (Mask[1] == -1 || Mask[1] >= 4) && Mask[2] < 4 && (Mask[3] == -1 || Mask[3] >= 4)) { diff --git a/test/CodeGen/X86/vector-shuffle-256-v4.ll b/test/CodeGen/X86/vector-shuffle-256-v4.ll index b7047724e40..7051888302e 100644 --- a/test/CodeGen/X86/vector-shuffle-256-v4.ll +++ b/test/CodeGen/X86/vector-shuffle-256-v4.ll @@ -239,11 +239,35 @@ define <4 x double> @shuffle_v4f64_0462(<4 x double> %a, <4 x double> %b) { define <4 x double> @shuffle_v4f64_0426(<4 x double> %a, <4 x double> %b) { ; AVX1-LABEL: @shuffle_v4f64_0426 ; AVX1: # BB#0: -; AVX1-NEXT: vshufpd {{.*}} # ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] +; AVX1-NEXT: vunpcklpd {{.*}} # ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] ; AVX1-NEXT: retq %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> ret <4 x double> %shuffle } +define <4 x double> @shuffle_v4f64_1537(<4 x double> %a, <4 x double> %b) { +; AVX1-LABEL: @shuffle_v4f64_1537 +; AVX1: # BB#0: +; AVX1-NEXT: vunpckhpd {{.*}} # ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] +; AVX1-NEXT: retq + %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> + ret <4 x double> %shuffle +} +define <4 x double> @shuffle_v4f64_4062(<4 x double> %a, <4 x double> %b) { +; AVX1-LABEL: @shuffle_v4f64_4062 +; AVX1: # BB#0: +; AVX1-NEXT: vunpcklpd {{.*}} # ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2] +; AVX1-NEXT: retq + %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> + ret <4 x double> %shuffle +} +define <4 x double> @shuffle_v4f64_5173(<4 x double> %a, <4 x double> %b) { +; AVX1-LABEL: @shuffle_v4f64_5173 +; AVX1: # BB#0: +; AVX1-NEXT: vunpckhpd {{.*}} # ymm0 = ymm1[1],ymm0[1],ymm1[3],ymm0[3] +; AVX1-NEXT: retq + %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> + ret <4 x double> %shuffle +} define <4 x double> @shuffle_v4f64_5163(<4 x double> %a, <4 x double> %b) { ; AVX1-LABEL: @shuffle_v4f64_5163 ; AVX1: # BB#0: -- 2.34.1