[AArch64] Match fminnum/fmaxnum for vector fminnm/fmaxnm instead of an intrinsic.

author James Molloy <james.molloy@arm.com>

Tue, 11 Aug 2015 12:06:37 +0000 (12:06 +0000)

committer James Molloy <james.molloy@arm.com>

Tue, 11 Aug 2015 12:06:37 +0000 (12:06 +0000)
author James Molloy <james.molloy@arm.com>
Tue, 11 Aug 2015 12:06:37 +0000 (12:06 +0000)
committer James Molloy <james.molloy@arm.com>
Tue, 11 Aug 2015 12:06:37 +0000 (12:06 +0000)
diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp

index 42d8fe5c6fa357c9bf050bf615e285cf8b26bcb2..7bc56caa49cbabe7ae01f4f0eeefad004e7446ad 100644 (file)
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -389,6 +389,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
      setOperationAction(ISD::FRINT, Ty, Legal);
      setOperationAction(ISD::FTRUNC, Ty, Legal);
      setOperationAction(ISD::FROUND, Ty, Legal);
+    setOperationAction(ISD::FMINNUM, Ty, Legal);
+    setOperationAction(ISD::FMAXNUM, Ty, Legal);
    }
  
    setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
@@ -679,9 +681,10 @@ void AArch64TargetLowering::addTypeForNEON(EVT VT, EVT PromotedBitwiseVT) {
                              ISD::SABSDIFF, ISD::UABSDIFF})
        setOperationAction(Opcode, VT.getSimpleVT(), Legal);
  
-  // F[MIN|MAX]NAN are available for all FP NEON types.
+  // F[MIN|MAX][NUM|NAN] are available for all FP NEON types.
    if (VT.isFloatingPoint())
-    for (unsigned Opcode : {ISD::FMINNAN, ISD::FMAXNAN})
+    for (unsigned Opcode : {ISD::FMINNAN, ISD::FMAXNAN,
+                            ISD::FMINNUM, ISD::FMAXNUM})
        setOperationAction(Opcode, VT.getSimpleVT(), Legal);
  
    if (Subtarget->isLittleEndian()) {
@@ -8233,6 +8236,12 @@ static SDValue performIntrinsicCombine(SDNode *N,
    case Intrinsic::aarch64_neon_uabd:
      return DAG.getNode(ISD::UABSDIFF, SDLoc(N), N->getValueType(0),
                         N->getOperand(1), N->getOperand(2));
+  case Intrinsic::aarch64_neon_fmaxnm:
+    return DAG.getNode(ISD::FMAXNUM, SDLoc(N), N->getValueType(0),
+                       N->getOperand(1), N->getOperand(2));
+  case Intrinsic::aarch64_neon_fminnm:
+    return DAG.getNode(ISD::FMINNUM, SDLoc(N), N->getValueType(0),
+                       N->getOperand(1), N->getOperand(2));
    case Intrinsic::aarch64_neon_smull:
    case Intrinsic::aarch64_neon_umull:
    case Intrinsic::aarch64_neon_pmull:
diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td

index f54328ce790202fb5dbb1b584b43a4355fd04b10..349431eec9b377fc8c2dbd82f5305eef02c72c60 100644 (file)
--- a/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/lib/Target/AArch64/AArch64InstrInfo.td
@@ -2502,9 +2502,9 @@ defm FADD   : TwoOperandFPData<0b0010, "fadd", fadd>;
  let SchedRW = [WriteFDiv] in {
  defm FDIV   : TwoOperandFPData<0b0001, "fdiv", fdiv>;
  }
-defm FMAXNM : TwoOperandFPData<0b0110, "fmaxnm", int_aarch64_neon_fmaxnm>;
+defm FMAXNM : TwoOperandFPData<0b0110, "fmaxnm", fmaxnum>;
  defm FMAX   : TwoOperandFPData<0b0100, "fmax", fmaxnan>;
-defm FMINNM : TwoOperandFPData<0b0111, "fminnm", int_aarch64_neon_fminnm>;
+defm FMINNM : TwoOperandFPData<0b0111, "fminnm", fminnum>;
  defm FMIN   : TwoOperandFPData<0b0101, "fmin", fminnan>;
  let SchedRW = [WriteFMul] in {
  defm FMUL   : TwoOperandFPData<0b0000, "fmul", fmul>;
@@ -2516,9 +2516,9 @@ def : Pat<(v1f64 (fmaxnan (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
            (FMAXDrr FPR64:$Rn, FPR64:$Rm)>;
  def : Pat<(v1f64 (fminnan (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
            (FMINDrr FPR64:$Rn, FPR64:$Rm)>;
-def : Pat<(v1f64 (int_aarch64_neon_fmaxnm (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
+def : Pat<(v1f64 (fmaxnum (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
            (FMAXNMDrr FPR64:$Rn, FPR64:$Rm)>;
-def : Pat<(v1f64 (int_aarch64_neon_fminnm (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
+def : Pat<(v1f64 (fminnum (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
            (FMINNMDrr FPR64:$Rn, FPR64:$Rm)>;
  
  //===----------------------------------------------------------------------===//
@@ -2804,11 +2804,11 @@ defm FCMGE   : SIMDThreeSameVectorFPCmp<1, 0, 0b11100, "fcmge", AArch64fcmge>;
  defm FCMGT   : SIMDThreeSameVectorFPCmp<1, 1, 0b11100, "fcmgt", AArch64fcmgt>;
  defm FDIV    : SIMDThreeSameVectorFP<1,0,0b11111,"fdiv", fdiv>;
  defm FMAXNMP : SIMDThreeSameVectorFP<1,0,0b11000,"fmaxnmp", int_aarch64_neon_fmaxnmp>;
-defm FMAXNM  : SIMDThreeSameVectorFP<0,0,0b11000,"fmaxnm", int_aarch64_neon_fmaxnm>;
+defm FMAXNM  : SIMDThreeSameVectorFP<0,0,0b11000,"fmaxnm", fmaxnum>;
  defm FMAXP   : SIMDThreeSameVectorFP<1,0,0b11110,"fmaxp", int_aarch64_neon_fmaxp>;
  defm FMAX    : SIMDThreeSameVectorFP<0,0,0b11110,"fmax", fmaxnan>;
  defm FMINNMP : SIMDThreeSameVectorFP<1,1,0b11000,"fminnmp", int_aarch64_neon_fminnmp>;
-defm FMINNM  : SIMDThreeSameVectorFP<0,1,0b11000,"fminnm", int_aarch64_neon_fminnm>;
+defm FMINNM  : SIMDThreeSameVectorFP<0,1,0b11000,"fminnm", fminnum>;
  defm FMINP   : SIMDThreeSameVectorFP<1,1,0b11110,"fminp", int_aarch64_neon_fminp>;
  defm FMIN    : SIMDThreeSameVectorFP<0,1,0b11110,"fmin", fminnan>;
  
diff --git a/test/CodeGen/AArch64/arm64-vminmaxnm.ll b/test/CodeGen/AArch64/arm64-vminmaxnm.ll

index b5aca45cd479a0a5e49b6e9ce82a76ce821d46f3..302ba9d681c640f4414e76bc9d063b5655a2f559 100644 (file)
--- a/test/CodeGen/AArch64/arm64-vminmaxnm.ll
+++ b/test/CodeGen/AArch64/arm64-vminmaxnm.ll
@@ -42,13 +42,28 @@ define <2 x double> @f6(<2 x double> %a, <2 x double> %b) nounwind readnone ssp
    ret <2 x double> %vminnm2.i
  }
  
+define float @f7(float %a, float %b) nounwind readnone ssp {
+; CHECK: fmaxnm        s0, s0, s1
+; CHECK: ret
+  %vmaxnm2.i = tail call float @llvm.aarch64.neon.fmaxnm.f32(float %a, float %b) nounwind
+  ret float %vmaxnm2.i
+}
+
+define double @f8(double %a, double %b) nounwind readnone ssp {
+; CHECK: fminnm        d0, d0, d1
+; CHECK: ret
+  %vmaxnm2.i = tail call double @llvm.aarch64.neon.fminnm.f64(double %a, double %b) nounwind
+  ret double %vmaxnm2.i
+}
+
  declare <2 x double> @llvm.aarch64.neon.fminnm.v2f64(<2 x double>, <2 x double>) nounwind readnone
  declare <4 x float> @llvm.aarch64.neon.fminnm.v4f32(<4 x float>, <4 x float>) nounwind readnone
  declare <2 x float> @llvm.aarch64.neon.fminnm.v2f32(<2 x float>, <2 x float>) nounwind readnone
  declare <2 x double> @llvm.aarch64.neon.fmaxnm.v2f64(<2 x double>, <2 x double>) nounwind readnone
  declare <4 x float> @llvm.aarch64.neon.fmaxnm.v4f32(<4 x float>, <4 x float>) nounwind readnone
  declare <2 x float> @llvm.aarch64.neon.fmaxnm.v2f32(<2 x float>, <2 x float>) nounwind readnone
-
+declare float @llvm.aarch64.neon.fmaxnm.f32(float, float) nounwind readnone
+declare double @llvm.aarch64.neon.fminnm.f64(double, double) nounwind readnone
  
  define double @test_fmaxnmv(<2 x double> %in) {
  ; CHECK-LABEL: test_fmaxnmv:
diff --git a/test/CodeGen/AArch64/f16-instructions.ll b/test/CodeGen/AArch64/f16-instructions.ll

index 234447c07893aab02c46754751a40f086e687c80..0cadfc8c44b336247828b4208b248e98cc972131 100644 (file)
--- a/test/CodeGen/AArch64/f16-instructions.ll
+++ b/test/CodeGen/AArch64/f16-instructions.ll
@@ -644,13 +644,10 @@ define half @test_fabs(half %a) #0 {
  }
  
  ; CHECK-LABEL: test_minnum:
-; CHECK-NEXT: stp x29, x30, [sp, #-16]!
-; CHECK-NEXT: mov  x29, sp
-; CHECK-NEXT: fcvt s0, h0
  ; CHECK-NEXT: fcvt s1, h1
-; CHECK-NEXT: bl {{_?}}fminf
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: fminnm s0, s0, s1
  ; CHECK-NEXT: fcvt h0, s0
-; CHECK-NEXT: ldp x29, x30, [sp], #16
  ; CHECK-NEXT: ret
  define half @test_minnum(half %a, half %b) #0 {
    %r = call half @llvm.minnum.f16(half %a, half %b)
@@ -658,13 +655,10 @@ define half @test_minnum(half %a, half %b) #0 {
  }
  
  ; CHECK-LABEL: test_maxnum:
-; CHECK-NEXT: stp x29, x30, [sp, #-16]!
-; CHECK-NEXT: mov  x29, sp
-; CHECK-NEXT: fcvt s0, h0
  ; CHECK-NEXT: fcvt s1, h1
-; CHECK-NEXT: bl {{_?}}fmaxf
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: fmaxnm s0, s0, s1
  ; CHECK-NEXT: fcvt h0, s0
-; CHECK-NEXT: ldp x29, x30, [sp], #16
  ; CHECK-NEXT: ret
  define half @test_maxnum(half %a, half %b) #0 {
    %r = call half @llvm.maxnum.f16(half %a, half %b)
author	James Molloy <james.molloy@arm.com>
	Tue, 11 Aug 2015 12:06:37 +0000 (12:06 +0000)
committer	James Molloy <james.molloy@arm.com>
	Tue, 11 Aug 2015 12:06:37 +0000 (12:06 +0000)
lib/Target/AArch64/AArch64ISelLowering.cpp		patch \| blob \| history
lib/Target/AArch64/AArch64InstrInfo.td		patch \| blob \| history
test/CodeGen/AArch64/arm64-vminmaxnm.ll		patch \| blob \| history
test/CodeGen/AArch64/f16-instructions.ll		patch \| blob \| history