From d63e0fc2d9086ed223bebc8fab8a8774fb43a1dd Mon Sep 17 00:00:00 2001
From: James Molloy <james.molloy@arm.com>
Date: Fri, 15 May 2015 16:15:57 +0000
Subject: [PATCH] Mark SMIN/SMAX/UMIN/UMAX nodes as legal and add patterns for
 them.

The new [SU]{MIN,MAX} SDNodes can be lowered directly to instructions for
most NEON datatypes - the big exclusion being v2i64.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@237455 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Target/AArch64/AArch64ISelLowering.cpp |  6 ++
 lib/Target/AArch64/AArch64InstrInfo.td     | 49 +++++++++++
 test/CodeGen/AArch64/minmax.ll             | 96 ++++++++++++++++++++++
 3 files changed, 151 insertions(+)
 create mode 100644 test/CodeGen/AArch64/minmax.ll

diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp
index 4b93f0780e3..6251d4a5d26 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -679,6 +679,12 @@ void AArch64TargetLowering::addTypeForNEON(EVT VT, EVT PromotedBitwiseVT) {
   setOperationAction(ISD::FP_TO_SINT, VT.getSimpleVT(), Custom);
   setOperationAction(ISD::FP_TO_UINT, VT.getSimpleVT(), Custom);
 
+  // [SU][MIN|MAX] are available for all NEON types apart from i64.
+  if (!VT.isFloatingPoint() &&
+      VT.getSimpleVT() != MVT::v2i64 && VT.getSimpleVT() != MVT::v1i64)
+    for (unsigned Opcode : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
+      setOperationAction(Opcode, VT.getSimpleVT(), Legal);
+
   if (Subtarget->isLittleEndian()) {
     for (unsigned im = (unsigned)ISD::PRE_INC;
          im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td
index e76e74cc82f..c7d6a69b9fd 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/lib/Target/AArch64/AArch64InstrInfo.td
@@ -2809,6 +2809,55 @@ defm ORN : SIMDLogicalThreeVector<0, 0b11, "orn",
                                   BinOpFrag<(or node:$LHS, (vnot node:$RHS))> >;
 defm ORR : SIMDLogicalThreeVector<0, 0b10, "orr", or>;
 
+def : Pat<(v8i8 (smin V64:$Rn, V64:$Rm)),
+          (SMINv8i8 V64:$Rn, V64:$Rm)>;
+def : Pat<(v4i16 (smin V64:$Rn, V64:$Rm)),
+          (SMINv4i16 V64:$Rn, V64:$Rm)>;
+def : Pat<(v2i32 (smin V64:$Rn, V64:$Rm)),
+          (SMINv2i32 V64:$Rn, V64:$Rm)>;
+def : Pat<(v16i8 (smin V128:$Rn, V128:$Rm)),
+          (SMINv16i8 V128:$Rn, V128:$Rm)>;
+def : Pat<(v8i16 (smin V128:$Rn, V128:$Rm)),
+          (SMINv8i16 V128:$Rn, V128:$Rm)>;
+def : Pat<(v4i32 (smin V128:$Rn, V128:$Rm)),
+          (SMINv4i32 V128:$Rn, V128:$Rm)>;
+def : Pat<(v8i8 (smax V64:$Rn, V64:$Rm)),
+          (SMAXv8i8 V64:$Rn, V64:$Rm)>;
+def : Pat<(v4i16 (smax V64:$Rn, V64:$Rm)),
+          (SMAXv4i16 V64:$Rn, V64:$Rm)>;
+def : Pat<(v2i32 (smax V64:$Rn, V64:$Rm)),
+          (SMAXv2i32 V64:$Rn, V64:$Rm)>;
+def : Pat<(v16i8 (smax V128:$Rn, V128:$Rm)),
+          (SMAXv16i8 V128:$Rn, V128:$Rm)>;
+def : Pat<(v8i16 (smax V128:$Rn, V128:$Rm)),
+          (SMAXv8i16 V128:$Rn, V128:$Rm)>;
+def : Pat<(v4i32 (smax V128:$Rn, V128:$Rm)),
+          (SMAXv4i32 V128:$Rn, V128:$Rm)>;
+def : Pat<(v8i8 (umin V64:$Rn, V64:$Rm)),
+          (UMINv8i8 V64:$Rn, V64:$Rm)>;
+def : Pat<(v4i16 (umin V64:$Rn, V64:$Rm)),
+          (UMINv4i16 V64:$Rn, V64:$Rm)>;
+def : Pat<(v2i32 (umin V64:$Rn, V64:$Rm)),
+          (UMINv2i32 V64:$Rn, V64:$Rm)>;
+def : Pat<(v16i8 (umin V128:$Rn, V128:$Rm)),
+          (UMINv16i8 V128:$Rn, V128:$Rm)>;
+def : Pat<(v8i16 (umin V128:$Rn, V128:$Rm)),
+          (UMINv8i16 V128:$Rn, V128:$Rm)>;
+def : Pat<(v4i32 (umin V128:$Rn, V128:$Rm)),
+          (UMINv4i32 V128:$Rn, V128:$Rm)>;
+def : Pat<(v8i8 (umax V64:$Rn, V64:$Rm)),
+          (UMAXv8i8 V64:$Rn, V64:$Rm)>;
+def : Pat<(v4i16 (umax V64:$Rn, V64:$Rm)),
+          (UMAXv4i16 V64:$Rn, V64:$Rm)>;
+def : Pat<(v2i32 (umax V64:$Rn, V64:$Rm)),
+          (UMAXv2i32 V64:$Rn, V64:$Rm)>;
+def : Pat<(v16i8 (umax V128:$Rn, V128:$Rm)),
+          (UMAXv16i8 V128:$Rn, V128:$Rm)>;
+def : Pat<(v8i16 (umax V128:$Rn, V128:$Rm)),
+          (UMAXv8i16 V128:$Rn, V128:$Rm)>;
+def : Pat<(v4i32 (umax V128:$Rn, V128:$Rm)),
+          (UMAXv4i32 V128:$Rn, V128:$Rm)>;
+
 def : Pat<(AArch64bsl (v8i8 V64:$Rd), V64:$Rn, V64:$Rm),
           (BSLv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>;
 def : Pat<(AArch64bsl (v4i16 V64:$Rd), V64:$Rn, V64:$Rm),
diff --git a/test/CodeGen/AArch64/minmax.ll b/test/CodeGen/AArch64/minmax.ll
new file mode 100644
index 00000000000..a6b5adebe10
--- /dev/null
+++ b/test/CodeGen/AArch64/minmax.ll
@@ -0,0 +1,96 @@
+; RUN: llc < %s -mtriple=aarch64-linux-gnu | FileCheck %s
+
+; CHECK-LABEL: t1
+; CHECK: smax
+define <4 x i32> @t1(<4 x i32> %a, <4 x i32> %b) {
+  %t1 = icmp sgt <4 x i32> %a, %b
+  %t2 = select <4 x i1> %t1, <4 x i32> %a, <4 x i32> %b
+  ret <4 x i32> %t2
+}
+
+; CHECK-LABEL: t2
+; CHECK: smin
+define <4 x i32> @t2(<4 x i32> %a, <4 x i32> %b) {
+  %t1 = icmp slt <4 x i32> %a, %b
+  %t2 = select <4 x i1> %t1, <4 x i32> %a, <4 x i32> %b
+  ret <4 x i32> %t2
+}
+
+; CHECK-LABEL: t3
+; CHECK: umax
+define <4 x i32> @t3(<4 x i32> %a, <4 x i32> %b) {
+  %t1 = icmp ugt <4 x i32> %a, %b
+  %t2 = select <4 x i1> %t1, <4 x i32> %a, <4 x i32> %b
+  ret <4 x i32> %t2
+}
+
+; CHECK-LABEL: t4
+; CHECK: umin
+define <8 x i8> @t4(<8 x i8> %a, <8 x i8> %b) {
+  %t1 = icmp ult <8 x i8> %a, %b
+  %t2 = select <8 x i1> %t1, <8 x i8> %a, <8 x i8> %b
+  ret <8 x i8> %t2
+}
+
+; CHECK-LABEL: t5
+; CHECK: smin
+define <4 x i16> @t5(<4 x i16> %a, <4 x i16> %b) {
+  %t1 = icmp sgt <4 x i16> %b, %a
+  %t2 = select <4 x i1> %t1, <4 x i16> %a, <4 x i16> %b
+  ret <4 x i16> %t2
+}
+
+; CHECK-LABEL: t6
+; CHECK: smax
+define <2 x i32> @t6(<2 x i32> %a, <2 x i32> %b) {
+  %t1 = icmp slt <2 x i32> %b, %a
+  %t2 = select <2 x i1> %t1, <2 x i32> %a, <2 x i32> %b
+  ret <2 x i32> %t2
+}
+
+; CHECK-LABEL: t7
+; CHECK: umin
+define <16 x i8> @t7(<16 x i8> %a, <16 x i8> %b) {
+  %t1 = icmp ugt <16 x i8> %b, %a
+  %t2 = select <16 x i1> %t1, <16 x i8> %a, <16 x i8> %b
+  ret <16 x i8> %t2
+}
+
+; CHECK-LABEL: t8
+; CHECK: umax
+define <8 x i16> @t8(<8 x i16> %a, <8 x i16> %b) {
+  %t1 = icmp ult <8 x i16> %b, %a
+  %t2 = select <8 x i1> %t1, <8 x i16> %a, <8 x i16> %b
+  ret <8 x i16> %t2
+}
+
+; CHECK-LABEL: t9
+; CHECK: umin
+; CHECK: smax
+define <4 x i32> @t9(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
+  %t1 = icmp ugt <4 x i32> %b, %a
+  %t2 = select <4 x i1> %t1, <4 x i32> %a, <4 x i32> %b
+  %t3 = icmp sge <4 x i32> %t2, %c
+  %t4 = select <4 x i1> %t3, <4 x i32> %t2, <4 x i32> %c
+  ret <4 x i32> %t4
+}
+
+; CHECK-LABEL: t10
+; CHECK: smax
+; CHECK: smax
+define <8 x i32> @t10(<8 x i32> %a, <8 x i32> %b) {
+  %t1 = icmp sgt <8 x i32> %a, %b
+  %t2 = select <8 x i1> %t1, <8 x i32> %a, <8 x i32> %b
+  ret <8 x i32> %t2
+}
+
+; CHECK-LABEL: t11
+; CHECK: smin
+; CHECK: smin
+; CHECK: smin
+; CHECK: smin
+define <16 x i32> @t11(<16 x i32> %a, <16 x i32> %b) {
+  %t1 = icmp sle <16 x i32> %a, %b
+  %t2 = select <16 x i1> %t1, <16 x i32> %a, <16 x i32> %b
+  ret <16 x i32> %t2
+}
-- 
2.34.1