From d63e0fc2d9086ed223bebc8fab8a8774fb43a1dd Mon Sep 17 00:00:00 2001 From: James Molloy Date: Fri, 15 May 2015 16:15:57 +0000 Subject: [PATCH] Mark SMIN/SMAX/UMIN/UMAX nodes as legal and add patterns for them. The new [SU]{MIN,MAX} SDNodes can be lowered directly to instructions for most NEON datatypes - the big exclusion being v2i64. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@237455 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AArch64/AArch64ISelLowering.cpp | 6 ++ lib/Target/AArch64/AArch64InstrInfo.td | 49 +++++++++++ test/CodeGen/AArch64/minmax.ll | 96 ++++++++++++++++++++++ 3 files changed, 151 insertions(+) create mode 100644 test/CodeGen/AArch64/minmax.ll diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index 4b93f0780e3..6251d4a5d26 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -679,6 +679,12 @@ void AArch64TargetLowering::addTypeForNEON(EVT VT, EVT PromotedBitwiseVT) { setOperationAction(ISD::FP_TO_SINT, VT.getSimpleVT(), Custom); setOperationAction(ISD::FP_TO_UINT, VT.getSimpleVT(), Custom); + // [SU][MIN|MAX] are available for all NEON types apart from i64. + if (!VT.isFloatingPoint() && + VT.getSimpleVT() != MVT::v2i64 && VT.getSimpleVT() != MVT::v1i64) + for (unsigned Opcode : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}) + setOperationAction(Opcode, VT.getSimpleVT(), Legal); + if (Subtarget->isLittleEndian()) { for (unsigned im = (unsigned)ISD::PRE_INC; im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) { diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td index e76e74cc82f..c7d6a69b9fd 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.td +++ b/lib/Target/AArch64/AArch64InstrInfo.td @@ -2809,6 +2809,55 @@ defm ORN : SIMDLogicalThreeVector<0, 0b11, "orn", BinOpFrag<(or node:$LHS, (vnot node:$RHS))> >; defm ORR : SIMDLogicalThreeVector<0, 0b10, "orr", or>; +def : Pat<(v8i8 (smin V64:$Rn, V64:$Rm)), + (SMINv8i8 V64:$Rn, V64:$Rm)>; +def : Pat<(v4i16 (smin V64:$Rn, V64:$Rm)), + (SMINv4i16 V64:$Rn, V64:$Rm)>; +def : Pat<(v2i32 (smin V64:$Rn, V64:$Rm)), + (SMINv2i32 V64:$Rn, V64:$Rm)>; +def : Pat<(v16i8 (smin V128:$Rn, V128:$Rm)), + (SMINv16i8 V128:$Rn, V128:$Rm)>; +def : Pat<(v8i16 (smin V128:$Rn, V128:$Rm)), + (SMINv8i16 V128:$Rn, V128:$Rm)>; +def : Pat<(v4i32 (smin V128:$Rn, V128:$Rm)), + (SMINv4i32 V128:$Rn, V128:$Rm)>; +def : Pat<(v8i8 (smax V64:$Rn, V64:$Rm)), + (SMAXv8i8 V64:$Rn, V64:$Rm)>; +def : Pat<(v4i16 (smax V64:$Rn, V64:$Rm)), + (SMAXv4i16 V64:$Rn, V64:$Rm)>; +def : Pat<(v2i32 (smax V64:$Rn, V64:$Rm)), + (SMAXv2i32 V64:$Rn, V64:$Rm)>; +def : Pat<(v16i8 (smax V128:$Rn, V128:$Rm)), + (SMAXv16i8 V128:$Rn, V128:$Rm)>; +def : Pat<(v8i16 (smax V128:$Rn, V128:$Rm)), + (SMAXv8i16 V128:$Rn, V128:$Rm)>; +def : Pat<(v4i32 (smax V128:$Rn, V128:$Rm)), + (SMAXv4i32 V128:$Rn, V128:$Rm)>; +def : Pat<(v8i8 (umin V64:$Rn, V64:$Rm)), + (UMINv8i8 V64:$Rn, V64:$Rm)>; +def : Pat<(v4i16 (umin V64:$Rn, V64:$Rm)), + (UMINv4i16 V64:$Rn, V64:$Rm)>; +def : Pat<(v2i32 (umin V64:$Rn, V64:$Rm)), + (UMINv2i32 V64:$Rn, V64:$Rm)>; +def : Pat<(v16i8 (umin V128:$Rn, V128:$Rm)), + (UMINv16i8 V128:$Rn, V128:$Rm)>; +def : Pat<(v8i16 (umin V128:$Rn, V128:$Rm)), + (UMINv8i16 V128:$Rn, V128:$Rm)>; +def : Pat<(v4i32 (umin V128:$Rn, V128:$Rm)), + (UMINv4i32 V128:$Rn, V128:$Rm)>; +def : Pat<(v8i8 (umax V64:$Rn, V64:$Rm)), + (UMAXv8i8 V64:$Rn, V64:$Rm)>; +def : Pat<(v4i16 (umax V64:$Rn, V64:$Rm)), + (UMAXv4i16 V64:$Rn, V64:$Rm)>; +def : Pat<(v2i32 (umax V64:$Rn, V64:$Rm)), + (UMAXv2i32 V64:$Rn, V64:$Rm)>; +def : Pat<(v16i8 (umax V128:$Rn, V128:$Rm)), + (UMAXv16i8 V128:$Rn, V128:$Rm)>; +def : Pat<(v8i16 (umax V128:$Rn, V128:$Rm)), + (UMAXv8i16 V128:$Rn, V128:$Rm)>; +def : Pat<(v4i32 (umax V128:$Rn, V128:$Rm)), + (UMAXv4i32 V128:$Rn, V128:$Rm)>; + def : Pat<(AArch64bsl (v8i8 V64:$Rd), V64:$Rn, V64:$Rm), (BSLv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>; def : Pat<(AArch64bsl (v4i16 V64:$Rd), V64:$Rn, V64:$Rm), diff --git a/test/CodeGen/AArch64/minmax.ll b/test/CodeGen/AArch64/minmax.ll new file mode 100644 index 00000000000..a6b5adebe10 --- /dev/null +++ b/test/CodeGen/AArch64/minmax.ll @@ -0,0 +1,96 @@ +; RUN: llc < %s -mtriple=aarch64-linux-gnu | FileCheck %s + +; CHECK-LABEL: t1 +; CHECK: smax +define <4 x i32> @t1(<4 x i32> %a, <4 x i32> %b) { + %t1 = icmp sgt <4 x i32> %a, %b + %t2 = select <4 x i1> %t1, <4 x i32> %a, <4 x i32> %b + ret <4 x i32> %t2 +} + +; CHECK-LABEL: t2 +; CHECK: smin +define <4 x i32> @t2(<4 x i32> %a, <4 x i32> %b) { + %t1 = icmp slt <4 x i32> %a, %b + %t2 = select <4 x i1> %t1, <4 x i32> %a, <4 x i32> %b + ret <4 x i32> %t2 +} + +; CHECK-LABEL: t3 +; CHECK: umax +define <4 x i32> @t3(<4 x i32> %a, <4 x i32> %b) { + %t1 = icmp ugt <4 x i32> %a, %b + %t2 = select <4 x i1> %t1, <4 x i32> %a, <4 x i32> %b + ret <4 x i32> %t2 +} + +; CHECK-LABEL: t4 +; CHECK: umin +define <8 x i8> @t4(<8 x i8> %a, <8 x i8> %b) { + %t1 = icmp ult <8 x i8> %a, %b + %t2 = select <8 x i1> %t1, <8 x i8> %a, <8 x i8> %b + ret <8 x i8> %t2 +} + +; CHECK-LABEL: t5 +; CHECK: smin +define <4 x i16> @t5(<4 x i16> %a, <4 x i16> %b) { + %t1 = icmp sgt <4 x i16> %b, %a + %t2 = select <4 x i1> %t1, <4 x i16> %a, <4 x i16> %b + ret <4 x i16> %t2 +} + +; CHECK-LABEL: t6 +; CHECK: smax +define <2 x i32> @t6(<2 x i32> %a, <2 x i32> %b) { + %t1 = icmp slt <2 x i32> %b, %a + %t2 = select <2 x i1> %t1, <2 x i32> %a, <2 x i32> %b + ret <2 x i32> %t2 +} + +; CHECK-LABEL: t7 +; CHECK: umin +define <16 x i8> @t7(<16 x i8> %a, <16 x i8> %b) { + %t1 = icmp ugt <16 x i8> %b, %a + %t2 = select <16 x i1> %t1, <16 x i8> %a, <16 x i8> %b + ret <16 x i8> %t2 +} + +; CHECK-LABEL: t8 +; CHECK: umax +define <8 x i16> @t8(<8 x i16> %a, <8 x i16> %b) { + %t1 = icmp ult <8 x i16> %b, %a + %t2 = select <8 x i1> %t1, <8 x i16> %a, <8 x i16> %b + ret <8 x i16> %t2 +} + +; CHECK-LABEL: t9 +; CHECK: umin +; CHECK: smax +define <4 x i32> @t9(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { + %t1 = icmp ugt <4 x i32> %b, %a + %t2 = select <4 x i1> %t1, <4 x i32> %a, <4 x i32> %b + %t3 = icmp sge <4 x i32> %t2, %c + %t4 = select <4 x i1> %t3, <4 x i32> %t2, <4 x i32> %c + ret <4 x i32> %t4 +} + +; CHECK-LABEL: t10 +; CHECK: smax +; CHECK: smax +define <8 x i32> @t10(<8 x i32> %a, <8 x i32> %b) { + %t1 = icmp sgt <8 x i32> %a, %b + %t2 = select <8 x i1> %t1, <8 x i32> %a, <8 x i32> %b + ret <8 x i32> %t2 +} + +; CHECK-LABEL: t11 +; CHECK: smin +; CHECK: smin +; CHECK: smin +; CHECK: smin +define <16 x i32> @t11(<16 x i32> %a, <16 x i32> %b) { + %t1 = icmp sle <16 x i32> %a, %b + %t2 = select <16 x i1> %t1, <16 x i32> %a, <16 x i32> %b + ret <16 x i32> %t2 +} -- 2.34.1