From 106b79744b185969faf8a74c6bd7cad35e6f11bd Mon Sep 17 00:00:00 2001 From: Andrea Di Biagio Date: Tue, 28 Jan 2014 18:14:21 +0000 Subject: [PATCH] [X86] Add extra rules for combining vselect dag nodes into movsd. This improves the fix committed at revision 199683 adding the following new target specific combine rules: 1) fold (v4i32: vselect <0,0,-1,-1>, A, B) -> (v4i32 (bitcast (movsd (v2i64 (bitcast A)), (v2i64 (bitcast B))) )) 2) fold (v4f32: vselect <0,0,-1,-1>, A, B) -> (v4f32 (bitcast (movsd (v2f64 (bitcast A)), (v2f64 (bitcast B))) )) 3) fold (v4i32: vselect <-1,-1,0,0>, A, B) -> (v4i32 (bitcast (movsd (v2i64 (bitcast B)), (v2i64 (bitcast A))) )) 4) fold (v4f32: vselect <-1,-1,0,0>, A, B) -> (v4f32 (bitcast (movsd (v2i64 (bitcast B)), (v2i64 (bitcast A))) )) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@200324 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 40 ++++++++++++++++++++++++++++++ test/CodeGen/X86/sse41-blend.ll | 4 +-- test/CodeGen/X86/vselect-2.ll | 33 ++++++++++++++++++++++++ 3 files changed, 75 insertions(+), 2 deletions(-) create mode 100644 test/CodeGen/X86/vselect-2.ll diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index f3ec8f9ac38..e292cab0262 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -17324,6 +17324,46 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, return getTargetShuffleNode(X86ISD::MOVSS, DL, VT, A, B, DAG); return getTargetShuffleNode(X86ISD::MOVSD, DL, VT, A, B, DAG); } + + if (Subtarget->hasSSE2() && (VT == MVT::v4i32 || VT == MVT::v4f32)) { + // fold (v4i32: vselect <0,0,-1,-1>, A, B) -> + // (v4i32 (bitcast (movsd (v2i64 (bitcast A)), + // (v2i64 (bitcast B))))) + // + // fold (v4f32: vselect <0,0,-1,-1>, A, B) -> + // (v4f32 (bitcast (movsd (v2f64 (bitcast A)), + // (v2f64 (bitcast B))))) + // + // fold (v4i32: vselect <-1,-1,0,0>, A, B) -> + // (v4i32 (bitcast (movsd (v2i64 (bitcast B)), + // (v2i64 (bitcast A))))) + // + // fold (v4f32: vselect <-1,-1,0,0>, A, B) -> + // (v4f32 (bitcast (movsd (v2f64 (bitcast B)), + // (v2f64 (bitcast A))))) + + CanFold = (isZero(Cond.getOperand(0)) && + isZero(Cond.getOperand(1)) && + isAllOnes(Cond.getOperand(2)) && + isAllOnes(Cond.getOperand(3))); + + if (!CanFold && isAllOnes(Cond.getOperand(0)) && + isAllOnes(Cond.getOperand(1)) && + isZero(Cond.getOperand(2)) && + isZero(Cond.getOperand(3))) { + CanFold = true; + std::swap(LHS, RHS); + } + + if (CanFold) { + EVT NVT = (VT == MVT::v4i32) ? MVT::v2i64 : MVT::v2f64; + SDValue NewA = DAG.getNode(ISD::BITCAST, DL, NVT, LHS); + SDValue NewB = DAG.getNode(ISD::BITCAST, DL, NVT, RHS); + SDValue Select = getTargetShuffleNode(X86ISD::MOVSD, DL, NVT, NewA, + NewB, DAG); + return DAG.getNode(ISD::BITCAST, DL, VT, Select); + } + } } } diff --git a/test/CodeGen/X86/sse41-blend.ll b/test/CodeGen/X86/sse41-blend.ll index 597852c3690..4681fde7548 100644 --- a/test/CodeGen/X86/sse41-blend.ll +++ b/test/CodeGen/X86/sse41-blend.ll @@ -13,7 +13,7 @@ define <4 x float> @vsel_float(<4 x float> %v1, <4 x float> %v2) { ;CHECK: blendvps ;CHECK: ret define <4 x i8> @vsel_4xi8(<4 x i8> %v1, <4 x i8> %v2) { - %vsel = select <4 x i1> , <4 x i8> %v1, <4 x i8> %v2 + %vsel = select <4 x i1> , <4 x i8> %v1, <4 x i8> %v2 ret <4 x i8> %vsel } @@ -30,7 +30,7 @@ define <4 x i16> @vsel_4xi16(<4 x i16> %v1, <4 x i16> %v2) { ;CHECK: blendvps ;CHECK: ret define <4 x i32> @vsel_i32(<4 x i32> %v1, <4 x i32> %v2) { - %vsel = select <4 x i1> , <4 x i32> %v1, <4 x i32> %v2 + %vsel = select <4 x i1> , <4 x i32> %v1, <4 x i32> %v2 ret <4 x i32> %vsel } diff --git a/test/CodeGen/X86/vselect-2.ll b/test/CodeGen/X86/vselect-2.ll new file mode 100644 index 00000000000..50da32c67a3 --- /dev/null +++ b/test/CodeGen/X86/vselect-2.ll @@ -0,0 +1,33 @@ +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -mattr=sse2 | FileCheck %s + +define <4 x i32> @test1(<4 x i32> %A, <4 x i32> %B) { + %select = select <4 x i1>, <4 x i32> %A, <4 x i32> %B + ret <4 x i32> %select +} +; CHECK-LABEL: test1 +; CHECK: movsd +; CHECK: ret + +define <4 x i32> @test2(<4 x i32> %A, <4 x i32> %B) { + %select = select <4 x i1>, <4 x i32> %A, <4 x i32> %B + ret <4 x i32> %select +} +; CHECK-LABEL: test2 +; CHECK: movsd +; CHECK-NEXT: ret + +define <4 x float> @test3(<4 x float> %A, <4 x float> %B) { + %select = select <4 x i1>, <4 x float> %A, <4 x float> %B + ret <4 x float> %select +} +; CHECK-LABEL: test3 +; CHECK: movsd +; CHECK: ret + +define <4 x float> @test4(<4 x float> %A, <4 x float> %B) { + %select = select <4 x i1>, <4 x float> %A, <4 x float> %B + ret <4 x float> %select +} +; CHECK-LABEL: test4 +; CHECK: movsd +; CHECK-NEXT: ret -- 2.34.1