From 11add26ec2bdf5109f0ff2ee19d237664687b914 Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Fri, 11 Nov 2011 23:31:03 +0000 Subject: [PATCH] Add support in fast-isel for selecting memset/memcpy/memmove intrinsics. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144426 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMFastISel.cpp | 70 ++++++++++++++++++---- test/CodeGen/ARM/fast-isel-intrinsic.ll | 78 +++++++++++++++++++++++++ 2 files changed, 138 insertions(+), 10 deletions(-) create mode 100644 test/CodeGen/ARM/fast-isel-intrinsic.ll diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index da2ca3ecd93..4bf55fb8f38 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -164,7 +164,8 @@ class ARMFastISel : public FastISel { bool SelectFPToSI(const Instruction *I); bool SelectSDiv(const Instruction *I); bool SelectSRem(const Instruction *I); - bool SelectCall(const Instruction *I); + bool SelectCall(const Instruction *I, const char *IntrMemName); + bool SelectIntrinsicCall(const IntrinsicInst &I); bool SelectSelect(const Instruction *I); bool SelectRet(const Instruction *I); bool SelectTrunc(const Instruction *I); @@ -1997,12 +1998,13 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) { return true; } -bool ARMFastISel::SelectCall(const Instruction *I) { +bool ARMFastISel::SelectCall(const Instruction *I, + const char *IntrMemName = 0) { const CallInst *CI = cast(I); const Value *Callee = CI->getCalledValue(); - // Can't handle inline asm or worry about intrinsics yet. - if (isa(Callee) || isa(CI)) return false; + // Can't handle inline asm. + if (isa(Callee)) return false; // Only handle global variable Callees. const GlobalValue *GV = dyn_cast(Callee); @@ -2044,8 +2046,12 @@ bool ARMFastISel::SelectCall(const Instruction *I) { ArgFlags.reserve(CS.arg_size()); for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end(); i != e; ++i) { - unsigned Arg = getRegForValue(*i); + // If we're lowering a memory intrinsic instead of a regular call, skip the + // last two arguments, which shouldn't be passed to the underlying function. + if (IntrMemName && e-i <= 2) + break; + unsigned Arg = getRegForValue(*i); if (Arg == 0) return false; ISD::ArgFlagsTy Flags; @@ -2090,14 +2096,16 @@ bool ARMFastISel::SelectCall(const Instruction *I) { if(isThumb2) // Explicitly adding the predicate here. MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, - TII.get(CallOpc))) - .addGlobalAddress(GV, 0, 0); + TII.get(CallOpc))); else // Explicitly adding the predicate here. MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, - TII.get(CallOpc)) - .addGlobalAddress(GV, 0, 0)); - + TII.get(CallOpc))); + if (!IntrMemName) + MIB.addGlobalAddress(GV, 0, 0); + else + MIB.addExternalSymbol(IntrMemName, 0); + // Add implicit physical register uses to the call. for (unsigned i = 0, e = RegArgs.size(); i != e; ++i) MIB.addReg(RegArgs[i]); @@ -2112,6 +2120,46 @@ bool ARMFastISel::SelectCall(const Instruction *I) { return true; } +bool ARMFastISel::SelectIntrinsicCall(const IntrinsicInst &I) { + // FIXME: Handle more intrinsics. + switch (I.getIntrinsicID()) { + default: return false; + case Intrinsic::memcpy: + case Intrinsic::memmove: { + // FIXME: Small memcpy/memmove's are common enough that we want to do them + // without a call if possible. + const MemTransferInst &MTI = cast(I); + // Don't handle volatile. + if (MTI.isVolatile()) + return false; + + if (!MTI.getLength()->getType()->isIntegerTy(32)) + return false; + + if (MTI.getSourceAddressSpace() > 255 || MTI.getDestAddressSpace() > 255) + return false; + + const char *IntrMemName = isa(I) ? "memcpy" : "memmove"; + return SelectCall(&I, IntrMemName); + } + case Intrinsic::memset: { + const MemSetInst &MSI = cast(I); + // Don't handle volatile. + if (MSI.isVolatile()) + return false; + + if (!MSI.getLength()->getType()->isIntegerTy(32)) + return false; + + if (MSI.getDestAddressSpace() > 255) + return false; + + return SelectCall(&I, "memset"); + } + } + return false; +} + bool ARMFastISel::SelectTrunc(const Instruction *I) { // The high bits for a type smaller than the register size are assumed to be // undefined. @@ -2235,6 +2283,8 @@ bool ARMFastISel::TargetSelectInstruction(const Instruction *I) { case Instruction::SRem: return SelectSRem(I); case Instruction::Call: + if (const IntrinsicInst *II = dyn_cast(I)) + return SelectIntrinsicCall(*II); return SelectCall(I); case Instruction::Select: return SelectSelect(I); diff --git a/test/CodeGen/ARM/fast-isel-intrinsic.ll b/test/CodeGen/ARM/fast-isel-intrinsic.ll new file mode 100644 index 00000000000..1954eccc5f9 --- /dev/null +++ b/test/CodeGen/ARM/fast-isel-intrinsic.ll @@ -0,0 +1,78 @@ +; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-darwin | FileCheck %s --check-prefix=ARM +; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=THUMB + +@message1 = global [60 x i8] c"The LLVM Compiler Infrastructure\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00", align 1 +@temp = common global [60 x i8] zeroinitializer, align 1 + +define void @t1() nounwind ssp { +; ARM: t1 +; ARM: ldr r0, LCPI0_0 +; ARM: add r0, r0, #5 +; ARM: movw r1, #64 +; ARM: movw r2, #10 +; ARM: uxtb r1, r1 +; ARM: bl #14 +; THUMB: t1 +; THUMB: ldr.n r0, LCPI0_0 +; THUMB: adds r0, #5 +; THUMB: movs r1, #64 +; THUMB: movt r1, #0 +; THUMB: movs r2, #10 +; THUMB: movt r2, #0 +; THUMB: uxtb r1, r1 +; THUMB: bl _memset + call void @llvm.memset.p0i8.i32(i8* getelementptr inbounds ([60 x i8]* @message1, i32 0, i32 5), i8 64, i32 10, i32 1, i1 false) + ret void +} + +declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind + +define void @t2() nounwind ssp { +; ARM: t2 +; ARM: ldr r0, LCPI1_0 +; ARM: ldr r0, [r0] +; ARM: add r1, r0, #4 +; ARM: add r0, r0, #16 +; ARM: movw r2, #10 +; ARM: str r0, [sp] @ 4-byte Spill +; ARM: mov r0, r1 +; ARM: ldr r1, [sp] @ 4-byte Reload +; ARM: bl #14 +; THUMB: t2 +; THUMB: ldr.n r0, LCPI1_0 +; THUMB: ldr r0, [r0] +; THUMB: adds r1, r0, #4 +; THUMB: adds r0, #16 +; THUMB: movs r2, #10 +; THUMB: movt r2, #0 +; THUMB: mov r0, r1 +; THUMB: bl _memcpy + call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 4), i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 16), i32 10, i32 1, i1 false) + ret void +} + +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind + +define void @t3() nounwind ssp { +; ARM: t3 +; ARM: ldr r0, LCPI2_0 +; ARM: ldr r0, [r0] +; ARM: add r1, r0, #4 +; ARM: add r0, r0, #16 +; ARM: movw r2, #10 +; ARM: mov r0, r1 +; ARM: bl #14 +; THUMB: t3 +; THUMB: ldr.n r0, LCPI2_0 +; THUMB: ldr r0, [r0] +; THUMB: adds r1, r0, #4 +; THUMB: adds r0, #16 +; THUMB: movs r2, #10 +; THUMB: movt r2, #0 +; THUMB: mov r0, r1 +; THUMB: bl _memmove + call void @llvm.memmove.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 4), i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 16), i32 10, i32 1, i1 false) + ret void +} + +declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind -- 2.34.1