From b21635658f4b9b7c7e28ff035e731903545646f0 Mon Sep 17 00:00:00 2001 From: Elena Demikhovsky Date: Sun, 13 Sep 2015 08:15:15 +0000 Subject: [PATCH] AVX-512: Fixed a bug in OR/XOR operations for 512-bit FP values on KNL. KNL does not have VXORPS, VORPS for 512-bit values. I use integer VPXOR, VPOR that actually do the same. X86ISD::FXOR/FOR are generated as a result of FSUB combining. Differential Revision: http://reviews.llvm.org/D12753 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@247523 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 18 ++++++++++++++++-- test/CodeGen/X86/avx512-arith.ll | 10 ++++++++++ 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 4ac3b0f7ae5..ee7c445d0b1 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -25152,7 +25152,8 @@ static SDValue PerformFSUBCombine(SDNode *N, SelectionDAG &DAG, } /// Do target-specific dag combines on X86ISD::FOR and X86ISD::FXOR nodes. -static SDValue PerformFORCombine(SDNode *N, SelectionDAG &DAG) { +static SDValue PerformFORCombine(SDNode *N, SelectionDAG &DAG, + const X86Subtarget *Subtarget) { assert(N->getOpcode() == X86ISD::FOR || N->getOpcode() == X86ISD::FXOR); // F[X]OR(0.0, x) -> x @@ -25164,6 +25165,19 @@ static SDValue PerformFORCombine(SDNode *N, SelectionDAG &DAG) { if (ConstantFPSDNode *C = dyn_cast(N->getOperand(1))) if (C->getValueAPF().isPosZero()) return N->getOperand(0); + + EVT VT = N->getValueType(0); + if (VT.is512BitVector() && !Subtarget->hasDQI()) { + SDLoc dl(N); + MVT IntScalar = MVT::getIntegerVT(VT.getScalarSizeInBits()); + MVT IntVT = MVT::getVectorVT(IntScalar, VT.getVectorNumElements()); + + SDValue Op0 = DAG.getNode(ISD::BITCAST, dl, IntVT, N->getOperand(0)); + SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, IntVT, N->getOperand(1)); + unsigned IntOpcode = (N->getOpcode() == X86ISD::FOR) ? ISD::OR : ISD::XOR; + SDValue IntOp = DAG.getNode(IntOpcode, dl, IntVT, Op0, Op1); + return DAG.getNode(ISD::BITCAST, dl, VT, IntOp); + } return SDValue(); } @@ -26027,7 +26041,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case ISD::FADD: return PerformFADDCombine(N, DAG, Subtarget); case ISD::FSUB: return PerformFSUBCombine(N, DAG, Subtarget); case X86ISD::FXOR: - case X86ISD::FOR: return PerformFORCombine(N, DAG); + case X86ISD::FOR: return PerformFORCombine(N, DAG, Subtarget); case X86ISD::FMIN: case X86ISD::FMAX: return PerformFMinFMaxCombine(N, DAG); case X86ISD::FAND: return PerformFANDCombine(N, DAG); diff --git a/test/CodeGen/X86/avx512-arith.ll b/test/CodeGen/X86/avx512-arith.ll index 1ecd1007905..522abd26147 100644 --- a/test/CodeGen/X86/avx512-arith.ll +++ b/test/CodeGen/X86/avx512-arith.ll @@ -652,3 +652,13 @@ define <8 x double> @test_maskz_broadcast_vaddpd(<8 x double> %i, double* %j, %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> zeroinitializer ret <8 x double> %r } + +; CHECK-LABEL: test_fxor +; CHECK: vpxord +; CHECK: ret +define <16 x float> @test_fxor(<16 x float> %a) { + + %res = fsub <16 x float> , %a + ret <16 x float>%res +} + -- 2.34.1