setOperationAction(ISD::FDIV, MVT::v16f32, Legal);
setOperationAction(ISD::FSQRT, MVT::v16f32, Legal);
setOperationAction(ISD::FNEG, MVT::v16f32, Custom);
+ setOperationAction(ISD::FABS, MVT::v16f32, Custom);
setOperationAction(ISD::FADD, MVT::v8f64, Legal);
setOperationAction(ISD::FSUB, MVT::v8f64, Legal);
setOperationAction(ISD::FDIV, MVT::v8f64, Legal);
setOperationAction(ISD::FSQRT, MVT::v8f64, Legal);
setOperationAction(ISD::FNEG, MVT::v8f64, Custom);
+ setOperationAction(ISD::FABS, MVT::v8f64, Custom);
setOperationAction(ISD::FMA, MVT::v8f64, Legal);
setOperationAction(ISD::FMA, MVT::v16f32, Legal);
return SDValue();
}
+static SDValue lowerX86FPLogicOp(SDNode *N, SelectionDAG &DAG,
+ const X86Subtarget *Subtarget) {
+ EVT VT = N->getValueType(0);
+ if (VT.is512BitVector() && !Subtarget->hasDQI()) {
+ // VXORPS, VORPS, VANDPS, VANDNPS are supported only under DQ extention.
+ // These logic operations may be executed in the integer domain.
+ SDLoc dl(N);
+ MVT IntScalar = MVT::getIntegerVT(VT.getScalarSizeInBits());
+ MVT IntVT = MVT::getVectorVT(IntScalar, VT.getVectorNumElements());
+
+ SDValue Op0 = DAG.getNode(ISD::BITCAST, dl, IntVT, N->getOperand(0));
+ SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, IntVT, N->getOperand(1));
+ unsigned IntOpcode = 0;
+ switch (N->getOpcode()) {
+ default: llvm_unreachable("Unexpected FP logic op");
+ case X86ISD::FOR: IntOpcode = ISD::OR; break;
+ case X86ISD::FXOR: IntOpcode = ISD::XOR; break;
+ case X86ISD::FAND: IntOpcode = ISD::AND; break;
+ case X86ISD::FANDN: IntOpcode = X86ISD::ANDNP; break;
+ }
+ SDValue IntOp = DAG.getNode(IntOpcode, dl, IntVT, Op0, Op1);
+ return DAG.getNode(ISD::BITCAST, dl, VT, IntOp);
+ }
+ return SDValue();
+}
/// Do target-specific dag combines on X86ISD::FOR and X86ISD::FXOR nodes.
static SDValue PerformFORCombine(SDNode *N, SelectionDAG &DAG,
const X86Subtarget *Subtarget) {
if (C->getValueAPF().isPosZero())
return N->getOperand(0);
- EVT VT = N->getValueType(0);
- if (VT.is512BitVector() && !Subtarget->hasDQI()) {
- SDLoc dl(N);
- MVT IntScalar = MVT::getIntegerVT(VT.getScalarSizeInBits());
- MVT IntVT = MVT::getVectorVT(IntScalar, VT.getVectorNumElements());
-
- SDValue Op0 = DAG.getNode(ISD::BITCAST, dl, IntVT, N->getOperand(0));
- SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, IntVT, N->getOperand(1));
- unsigned IntOpcode = (N->getOpcode() == X86ISD::FOR) ? ISD::OR : ISD::XOR;
- SDValue IntOp = DAG.getNode(IntOpcode, dl, IntVT, Op0, Op1);
- return DAG.getNode(ISD::BITCAST, dl, VT, IntOp);
- }
- return SDValue();
+ return lowerX86FPLogicOp(N, DAG, Subtarget);
}
/// Do target-specific dag combines on X86ISD::FMIN and X86ISD::FMAX nodes.
}
/// Do target-specific dag combines on X86ISD::FAND nodes.
-static SDValue PerformFANDCombine(SDNode *N, SelectionDAG &DAG) {
+static SDValue PerformFANDCombine(SDNode *N, SelectionDAG &DAG,
+ const X86Subtarget *Subtarget) {
// FAND(0.0, x) -> 0.0
if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(0)))
if (C->getValueAPF().isPosZero())
if (C->getValueAPF().isPosZero())
return N->getOperand(1);
- return SDValue();
+ return lowerX86FPLogicOp(N, DAG, Subtarget);
}
/// Do target-specific dag combines on X86ISD::FANDN nodes
-static SDValue PerformFANDNCombine(SDNode *N, SelectionDAG &DAG) {
+static SDValue PerformFANDNCombine(SDNode *N, SelectionDAG &DAG,
+ const X86Subtarget *Subtarget) {
// FANDN(0.0, x) -> x
if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(0)))
if (C->getValueAPF().isPosZero())
if (C->getValueAPF().isPosZero())
return N->getOperand(1);
- return SDValue();
+ return lowerX86FPLogicOp(N, DAG, Subtarget);
}
static SDValue PerformBTCombine(SDNode *N,
case X86ISD::FOR: return PerformFORCombine(N, DAG, Subtarget);
case X86ISD::FMIN:
case X86ISD::FMAX: return PerformFMinFMaxCombine(N, DAG);
- case X86ISD::FAND: return PerformFANDCombine(N, DAG);
- case X86ISD::FANDN: return PerformFANDNCombine(N, DAG);
+ case X86ISD::FAND: return PerformFANDCombine(N, DAG, Subtarget);
+ case X86ISD::FANDN: return PerformFANDNCombine(N, DAG, Subtarget);
case X86ISD::BT: return PerformBTCombine(N, DAG, DCI);
case X86ISD::VZEXT_MOVL: return PerformVZEXT_MOVLCombine(N, DAG);
case ISD::ANY_EXTEND:
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f | FileCheck --check-prefix=CHECK --check-prefix=AVX512F %s
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512vl | FileCheck --check-prefix=CHECK --check-prefix=AVX512VL %s
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512bw | FileCheck --check-prefix=CHECK --check-prefix=AVX512BW %s
ret <16 x float>%res
}
+define <8 x float> @test_fxor_8f32(<8 x float> %a) {
+; CHECK-LABEL: test_fxor_8f32:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vxorps {{.*}}(%rip), %ymm0, %ymm0
+; CHECK-NEXT: retq
+ %res = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a
+ ret <8 x float>%res
+}
+
+define <8 x double> @fabs_v8f64(<8 x double> %p)
+; AVX512F-LABEL: fabs_v8f64:
+; AVX512F: ## BB#0:
+; AVX512F-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
+; AVX512F-NEXT: retq
+;
+; AVX512VL-LABEL: fabs_v8f64:
+; AVX512VL: ## BB#0:
+; AVX512VL-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
+; AVX512VL-NEXT: retq
+;
+; AVX512BW-LABEL: fabs_v8f64:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
+; AVX512BW-NEXT: retq
+;
+; AVX512DQ-LABEL: fabs_v8f64:
+; AVX512DQ: ## BB#0:
+; AVX512DQ-NEXT: vandpd {{.*}}(%rip), %zmm0, %zmm0
+; AVX512DQ-NEXT: retq
+;
+; SKX-LABEL: fabs_v8f64:
+; SKX: ## BB#0:
+; SKX-NEXT: vandpd {{.*}}(%rip), %zmm0, %zmm0
+; SKX-NEXT: retq
+{
+ %t = call <8 x double> @llvm.fabs.v8f64(<8 x double> %p)
+ ret <8 x double> %t
+}
+declare <8 x double> @llvm.fabs.v8f64(<8 x double> %p)
+
+define <16 x float> @fabs_v16f32(<16 x float> %p)
+; AVX512F-LABEL: fabs_v16f32:
+; AVX512F: ## BB#0:
+; AVX512F-NEXT: vpandd {{.*}}(%rip), %zmm0, %zmm0
+; AVX512F-NEXT: retq
+;
+; AVX512VL-LABEL: fabs_v16f32:
+; AVX512VL: ## BB#0:
+; AVX512VL-NEXT: vpandd {{.*}}(%rip), %zmm0, %zmm0
+; AVX512VL-NEXT: retq
+;
+; AVX512BW-LABEL: fabs_v16f32:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: vpandd {{.*}}(%rip), %zmm0, %zmm0
+; AVX512BW-NEXT: retq
+;
+; AVX512DQ-LABEL: fabs_v16f32:
+; AVX512DQ: ## BB#0:
+; AVX512DQ-NEXT: vandps {{.*}}(%rip), %zmm0, %zmm0
+; AVX512DQ-NEXT: retq
+;
+; SKX-LABEL: fabs_v16f32:
+; SKX: ## BB#0:
+; SKX-NEXT: vandps {{.*}}(%rip), %zmm0, %zmm0
+; SKX-NEXT: retq
+{
+ %t = call <16 x float> @llvm.fabs.v16f32(<16 x float> %p)
+ ret <16 x float> %t
+}
+declare <16 x float> @llvm.fabs.v16f32(<16 x float> %p)