From 74dad551d839814cf8f6d8a8e81f2e365837d2b8 Mon Sep 17 00:00:00 2001 From: Bruno Cardoso Lopes Date: Fri, 22 Jul 2011 00:15:00 +0000 Subject: [PATCH] Add a DAGCombine for transforming 128->256 casts into a simple vxorps + vinsertf128 pair of instructions git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@135727 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 68 +++++++++++++++++++++++++++--- test/CodeGen/X86/avx-cast.ll | 26 ++++++++++++ 2 files changed, 87 insertions(+), 7 deletions(-) create mode 100644 test/CodeGen/X86/avx-cast.ll diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 85c6f492351..55d568092c0 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -11232,23 +11232,77 @@ bool X86TargetLowering::isGAPlusOffset(SDNode *N, return TargetLowering::isGAPlusOffset(N, GA, Offset); } -/// PerformShuffleCombine - Combine a vector_shuffle that is equal to -/// build_vector load1, load2, load3, load4, <0, 1, 2, 3> into a 128-bit load -/// if the load addresses are consecutive, non-overlapping, and in the right -/// order. +/// PerformShuffleCombine256 - Performs shuffle combines for 256-bit vectors. +static SDValue PerformShuffleCombine256(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI) { + DebugLoc dl = N->getDebugLoc(); + ShuffleVectorSDNode *SVOp = cast(N); + SDValue V1 = SVOp->getOperand(0); + SDValue V2 = SVOp->getOperand(1); + EVT VT = SVOp->getValueType(0); + + if (V1.getOpcode() == ISD::CONCAT_VECTORS && + V2.getOpcode() == ISD::CONCAT_VECTORS) { + // + // 0,0,0,... + // \ + // V UNDEF BUILD_VECTOR UNDEF + // \ / \ / + // CONCAT_VECTOR CONCAT_VECTOR + // \ / + // \ / + // RESULT: V + zero extended + // + if (V2.getOperand(0).getOpcode() != ISD::BUILD_VECTOR || + V2.getOperand(1).getOpcode() != ISD::UNDEF || + V1.getOperand(1).getOpcode() != ISD::UNDEF) + return SDValue(); + + if (!ISD::isBuildVectorAllZeros(V2.getOperand(0).getNode())) + return SDValue(); + + // To match the shuffle mask, the first half of the mask should + // be exactly the first vector, and all the rest a splat with the + // first element of the second one. + int NumElems = VT.getVectorNumElements(); + for (int i = 0; i < NumElems/2; ++i) + if (!isUndefOrEqual(SVOp->getMaskElt(i), i) || + !isUndefOrEqual(SVOp->getMaskElt(i+NumElems/2), NumElems)) + return SDValue(); + + // Emit a zeroed vector and insert the desired subvector on its + // first half. + SDValue Zeros = getZeroVector(VT, true /* HasSSE2 */, DAG, dl); + SDValue InsV = Insert128BitVector(Zeros, V1.getOperand(0), + DAG.getConstant(0, MVT::i32), DAG, dl); + return DCI.CombineTo(N, InsV); + } + + return SDValue(); +} + +/// PerformShuffleCombine - Performs several different shuffle combines. static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI) { DebugLoc dl = N->getDebugLoc(); EVT VT = N->getValueType(0); - if (VT.getSizeInBits() != 128) - return SDValue(); - // Don't create instructions with illegal types after legalize types has run. const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (!DCI.isBeforeLegalize() && !TLI.isTypeLegal(VT.getVectorElementType())) return SDValue(); + // Only handle pure VECTOR_SHUFFLE nodes. + if (VT.getSizeInBits() == 256 && N->getOpcode() == ISD::VECTOR_SHUFFLE) + return PerformShuffleCombine256(N, DAG, DCI); + + // Only handle 128 wide vector from here on. + if (VT.getSizeInBits() != 128) + return SDValue(); + + // Combine a vector_shuffle that is equal to build_vector load1, load2, load3, + // load4, <0, 1, 2, 3> into a 128-bit load if the load addresses are + // consecutive, non-overlapping, and in the right order. SmallVector Elts; for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) Elts.push_back(getShuffleScalarElt(N, i, DAG, 0)); diff --git a/test/CodeGen/X86/avx-cast.ll b/test/CodeGen/X86/avx-cast.ll new file mode 100644 index 00000000000..bb742f8d97f --- /dev/null +++ b/test/CodeGen/X86/avx-cast.ll @@ -0,0 +1,26 @@ +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s + +; CHECK: vxorps +; CHECK-NEXT: vinsertf128 $0 +define <8 x float> @castA(<4 x float> %m) nounwind uwtable readnone ssp { +entry: + %shuffle.i = shufflevector <4 x float> %m, <4 x float> zeroinitializer, <8 x i32> + ret <8 x float> %shuffle.i +} + +; CHECK: vxorps +; CHECK-NEXT: vinsertf128 $0 +define <4 x double> @castB(<2 x double> %m) nounwind uwtable readnone ssp { +entry: + %shuffle.i = shufflevector <2 x double> %m, <2 x double> zeroinitializer, <4 x i32> + ret <4 x double> %shuffle.i +} + +; CHECK: vxorps +; CHECK-NEXT: vinsertf128 $0 +define <4 x i64> @castC(<2 x i64> %m) nounwind uwtable readnone ssp { +entry: + %shuffle.i = shufflevector <2 x i64> %m, <2 x i64> zeroinitializer, <4 x i32> + ret <4 x i64> %shuffle.i +} + -- 2.34.1