From 3ad50284cbccee22c827d2b21ac2a4704ee4f90c Mon Sep 17 00:00:00 2001 From: Scott Douglass Date: Mon, 24 Aug 2015 09:17:18 +0000 Subject: [PATCH] [ARM] Use AEABI helpers for i64 div and rem Differential Revision: http://reviews.llvm.org/D12232 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@245830 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMISelLowering.cpp | 63 +++++++++++++++++++++++--- lib/Target/ARM/ARMISelLowering.h | 1 + test/CodeGen/ARM/div.ll | 71 ++++++++++++++++++++++++------ 3 files changed, 117 insertions(+), 18 deletions(-) diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index bfc2a9fb89d..ea842a68020 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -738,11 +738,13 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setOperationAction(ISD::UDIV, MVT::i32, Expand); } - // FIXME: Also set divmod for SREM on EABI/androideabi setOperationAction(ISD::SREM, MVT::i32, Expand); setOperationAction(ISD::UREM, MVT::i32, Expand); // Register based DivRem for AEABI (RTABI 4.2) if (Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid()) { + setOperationAction(ISD::SREM, MVT::i64, Custom); + setOperationAction(ISD::UREM, MVT::i64, Custom); + setLibcallName(RTLIB::SDIVREM_I8, "__aeabi_idivmod"); setLibcallName(RTLIB::SDIVREM_I16, "__aeabi_idivmod"); setLibcallName(RTLIB::SDIVREM_I32, "__aeabi_idivmod"); @@ -6716,6 +6718,8 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::SHL: case ISD::SRL: case ISD::SRA: return LowerShift(Op.getNode(), DAG, Subtarget); + case ISD::SREM: return LowerREM(Op.getNode(), DAG); + case ISD::UREM: return LowerREM(Op.getNode(), DAG); case ISD::SHL_PARTS: return LowerShiftLeftParts(Op, DAG); case ISD::SRL_PARTS: case ISD::SRA_PARTS: return LowerShiftRightParts(Op, DAG); @@ -6775,6 +6779,10 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N, case ISD::SRA: Res = Expand64BitShift(N, DAG, Subtarget); break; + case ISD::SREM: + case ISD::UREM: + Res = LowerREM(N, DAG); + break; case ISD::READCYCLECOUNTER: ReplaceREADCYCLECOUNTER(N, Results, DAG, Subtarget); return; @@ -11103,9 +11111,11 @@ void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op, static RTLIB::Libcall getDivRemLibcall( const SDNode *N, MVT::SimpleValueType SVT) { - assert((N->getOpcode() == ISD::SDIVREM || N->getOpcode() == ISD::UDIVREM) && + assert((N->getOpcode() == ISD::SDIVREM || N->getOpcode() == ISD::UDIVREM || + N->getOpcode() == ISD::SREM || N->getOpcode() == ISD::UREM) && "Unhandled Opcode in getDivRemLibcall"); - bool isSigned = N->getOpcode() == ISD::SDIVREM; + bool isSigned = N->getOpcode() == ISD::SDIVREM || + N->getOpcode() == ISD::SREM; RTLIB::Libcall LC; switch (SVT) { default: llvm_unreachable("Unexpected request for libcall!"); @@ -11119,9 +11129,11 @@ static RTLIB::Libcall getDivRemLibcall( static TargetLowering::ArgListTy getDivRemArgList( const SDNode *N, LLVMContext *Context) { - assert((N->getOpcode() == ISD::SDIVREM || N->getOpcode() == ISD::UDIVREM) && + assert((N->getOpcode() == ISD::SDIVREM || N->getOpcode() == ISD::UDIVREM || + N->getOpcode() == ISD::SREM || N->getOpcode() == ISD::UREM) && "Unhandled Opcode in getDivRemArgList"); - bool isSigned = N->getOpcode() == ISD::SDIVREM; + bool isSigned = N->getOpcode() == ISD::SDIVREM || + N->getOpcode() == ISD::SREM; TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { @@ -11168,6 +11180,47 @@ SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const { return CallInfo.first; } +// Lowers REM using divmod helpers +// see RTABI section 4.2/4.3 +SDValue ARMTargetLowering::LowerREM(SDNode *N, SelectionDAG &DAG) const { + // Build return types (div and rem) + std::vector RetTyParams; + Type *RetTyElement; + + switch (N->getValueType(0).getSimpleVT().SimpleTy) { + default: llvm_unreachable("Unexpected request for libcall!"); + case MVT::i8: RetTyElement = Type::getInt8Ty(*DAG.getContext()); break; + case MVT::i16: RetTyElement = Type::getInt16Ty(*DAG.getContext()); break; + case MVT::i32: RetTyElement = Type::getInt32Ty(*DAG.getContext()); break; + case MVT::i64: RetTyElement = Type::getInt64Ty(*DAG.getContext()); break; + } + + RetTyParams.push_back(RetTyElement); + RetTyParams.push_back(RetTyElement); + ArrayRef ret = ArrayRef(RetTyParams); + Type *RetTy = StructType::get(*DAG.getContext(), ret); + + RTLIB::Libcall LC = getDivRemLibcall(N, N->getValueType(0).getSimpleVT(). + SimpleTy); + SDValue InChain = DAG.getEntryNode(); + TargetLowering::ArgListTy Args = getDivRemArgList(N, DAG.getContext()); + bool isSigned = N->getOpcode() == ISD::SREM; + SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC), + getPointerTy(DAG.getDataLayout())); + + // Lower call + CallLoweringInfo CLI(DAG); + CLI.setChain(InChain) + .setCallee(CallingConv::ARM_AAPCS, RetTy, Callee, std::move(Args), 0) + .setSExtResult(isSigned).setZExtResult(!isSigned).setDebugLoc(SDLoc(N)); + std::pair CallResult = LowerCallTo(CLI); + + // Return second (rem) result operand (first contains div) + SDNode *ResNode = CallResult.first.getNode(); + assert(ResNode->getNumOperands() == 2 && "divmod should return two operands"); + return ResNode->getOperand(1); +} + SDValue ARMTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const { assert(Subtarget->isTargetWindows() && "unsupported target platform"); diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index 48e7bbf27b3..a9009de5169 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -522,6 +522,7 @@ namespace llvm { const ARMSubtarget *ST) const; SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const; SDValue LowerDivRem(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerREM(SDNode *N, SelectionDAG &DAG) const; SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const; diff --git a/test/CodeGen/ARM/div.ll b/test/CodeGen/ARM/div.ll index 7b298fee42a..997f50760f3 100644 --- a/test/CodeGen/ARM/div.ll +++ b/test/CodeGen/ARM/div.ll @@ -1,52 +1,97 @@ -; RUN: llc < %s -mtriple=arm-apple-ios -mcpu=cortex-a8 | FileCheck %s -check-prefix=CHECK-SWDIV -; RUN: llc < %s -mtriple=arm-apple-ios -mcpu=swift | FileCheck %s -check-prefix=CHECK-HWDIV -; RUN: llc < %s -mtriple=arm-apple-ios -mcpu=cortex-r4 | FileCheck %s -check-prefix=CHECK-SWDIV -; RUN: llc < %s -mtriple=arm-apple-ios -mcpu=cortex-r4f | FileCheck %s -check-prefix=CHECK-SWDIV -; RUN: llc < %s -mtriple=arm-apple-ios -mcpu=cortex-r5 | FileCheck %s -check-prefix=CHECK-HWDIV +; RUN: llc < %s -mtriple=arm-apple-ios -mcpu=cortex-a8 | \ +; RUN: FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-SWDIV +; RUN: llc < %s -mtriple=arm-apple-ios -mcpu=swift | \ +; RUN: FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-HWDIV +; RUN: llc < %s -mtriple=arm-apple-ios -mcpu=cortex-r4 | \ +; RUN: FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-SWDIV +; RUN: llc < %s -mtriple=arm-apple-ios -mcpu=cortex-r4f | \ +; RUN: FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-SWDIV +; RUN: llc < %s -mtriple=arm-apple-ios -mcpu=cortex-r5 | \ +; RUN: FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-HWDIV +; RUN: llc < %s -mtriple=arm-none-eabi -mcpu=cortex-a8 | \ +; RUN: FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-EABI define i32 @f1(i32 %a, i32 %b) { entry: -; CHECK-SWDIV: f1 +; CHECK-LABEL: f1 ; CHECK-SWDIV: __divsi3 -; CHECK-HWDIV: f1 ; CHECK-HWDIV: sdiv + +; CHECK-EABI: __aeabi_idiv %tmp1 = sdiv i32 %a, %b ; [#uses=1] ret i32 %tmp1 } define i32 @f2(i32 %a, i32 %b) { entry: -; CHECK-SWDIV: f2 +; CHECK-LABEL: f2 ; CHECK-SWDIV: __udivsi3 -; CHECK-HWDIV: f2 ; CHECK-HWDIV: udiv + +; CHECK-EABI: __aeabi_uidiv %tmp1 = udiv i32 %a, %b ; [#uses=1] ret i32 %tmp1 } define i32 @f3(i32 %a, i32 %b) { entry: -; CHECK-SWDIV: f3 +; CHECK-LABEL: f3 ; CHECK-SWDIV: __modsi3 -; CHECK-HWDIV: f3 ; CHECK-HWDIV: sdiv ; CHECK-HWDIV: mls + +; EABI MODE = Remainder in R1, quotient in R0 +; CHECK-EABI: __aeabi_idivmod +; CHECK-EABI-NEXT: mov r0, r1 %tmp1 = srem i32 %a, %b ; [#uses=1] ret i32 %tmp1 } define i32 @f4(i32 %a, i32 %b) { entry: -; CHECK-SWDIV: f4 +; CHECK-LABEL: f4 ; CHECK-SWDIV: __umodsi3 -; CHECK-HWDIV: f4 ; CHECK-HWDIV: udiv ; CHECK-HWDIV: mls + +; EABI MODE = Remainder in R1, quotient in R0 +; CHECK-EABI: __aeabi_uidivmod +; CHECK-EABI-NEXT: mov r0, r1 %tmp1 = urem i32 %a, %b ; [#uses=1] ret i32 %tmp1 } + +define i64 @f5(i64 %a, i64 %b) { +entry: +; CHECK-LABEL: f5 +; CHECK-SWDIV: __moddi3 + +; CHECK-HWDIV: __moddi3 + +; EABI MODE = Remainder in R2-R3, quotient in R0-R1 +; CHECK-EABI: __aeabi_ldivmod +; CHECK-EABI-NEXT: mov r0, r2 +; CHECK-EABI-NEXT: mov r1, r3 + %tmp1 = srem i64 %a, %b ; [#uses=1] + ret i64 %tmp1 +} + +define i64 @f6(i64 %a, i64 %b) { +entry: +; CHECK-LABEL: f6 +; CHECK-SWDIV: __umoddi3 + +; CHECK-HWDIV: __umoddi3 + +; EABI MODE = Remainder in R2-R3, quotient in R0-R1 +; CHECK-EABI: __aeabi_uldivmod +; CHECK-EABI-NEXT: mov r0, r2 +; CHECK-EABI-NEXT: mov r1, r3 + %tmp1 = urem i64 %a, %b ; [#uses=1] + ret i64 %tmp1 +} -- 2.34.1