From 2fdec16fbe545c2ba80ceac3ce62cf0277822fd9 Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Mon, 15 Sep 2014 11:26:25 +0000 Subject: [PATCH] [x86] Teach the x86 DAG combiner to form UNPCKLPS and UNPCKHPS instructions from the relevant shuffle patterns. This is the last tweak I'm aware of to generate essentially perfect v4f32 and v2f64 shuffles with the new vector shuffle lowering up through SSE4.1. I'm sure I've missed some and it'd be nice to check since v4f32 is amenable to exhaustive exploration, but this is all of the tricks I'm aware of. With AVX there is a new trick to use the VPERMILPS instruction, that's coming up in a subsequent patch. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@217761 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 14 ++++++++++++++ test/CodeGen/X86/vector-shuffle-128-v4.ll | 14 ++++++++++++++ 2 files changed, 28 insertions(+) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 04f1fafa2e7..da3ec8b35eb 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -19413,6 +19413,20 @@ static bool combineX86ShuffleChain(SDValue Op, SDValue Root, ArrayRef Mask, /*AddTo*/ true); return true; } + if (Mask.equals(0, 0, 1, 1) || Mask.equals(2, 2, 3, 3)) { + bool Lo = Mask.equals(0, 0, 1, 1); + unsigned Shuffle = Lo ? X86ISD::UNPCKL : X86ISD::UNPCKH; + MVT ShuffleVT = MVT::v4f32; + if (Depth == 1 && Root->getOpcode() == Shuffle) + return false; // Nothing to do! + Op = DAG.getNode(ISD::BITCAST, DL, ShuffleVT, Input); + DCI.AddToWorklist(Op.getNode()); + Op = DAG.getNode(Shuffle, DL, ShuffleVT, Op, Op); + DCI.AddToWorklist(Op.getNode()); + DCI.CombineTo(Root.getNode(), DAG.getNode(ISD::BITCAST, DL, RootVT, Op), + /*AddTo*/ true); + return true; + } } // We always canonicalize the 8 x i16 and 16 x i8 shuffles into their UNPCK diff --git a/test/CodeGen/X86/vector-shuffle-128-v4.ll b/test/CodeGen/X86/vector-shuffle-128-v4.ll index dcd8ab166cf..601db25748a 100644 --- a/test/CodeGen/X86/vector-shuffle-128-v4.ll +++ b/test/CodeGen/X86/vector-shuffle-128-v4.ll @@ -119,6 +119,20 @@ define <4 x float> @shuffle_v4f32_3210(<4 x float> %a, <4 x float> %b) { %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> ret <4 x float> %shuffle } +define <4 x float> @shuffle_v4f32_0011(<4 x float> %a, <4 x float> %b) { +; ALL-LABEL: @shuffle_v4f32_0011 +; ALL: unpcklps {{.*}} # xmm0 = xmm0[0,0,1,1] +; ALL-NEXT: retq + %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %shuffle +} +define <4 x float> @shuffle_v4f32_2233(<4 x float> %a, <4 x float> %b) { +; ALL-LABEL: @shuffle_v4f32_2233 +; ALL: unpckhps {{.*}} # xmm0 = xmm0[2,2,3,3] +; ALL-NEXT: retq + %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %shuffle +} define <4 x float> @shuffle_v4f32_0022(<4 x float> %a, <4 x float> %b) { ; SSE2-LABEL: @shuffle_v4f32_0022 ; SSE2: shufps {{.*}} # xmm0 = xmm0[0,0,2,2] -- 2.34.1