From 0cc6b875835371b2e2d8eaa716e2a57808c5232e Mon Sep 17 00:00:00 2001 From: Vasileios Kalintiris Date: Mon, 1 Jun 2015 16:36:01 +0000 Subject: [PATCH] [mips][FastISel] Implement intrinsics memset, memcopy & memmove. Summary: Implement the intrinsics memset, memcopy and memmove in MIPS FastISel. Make some needed infrastructure fixes so that this can work. Based on a patch by Reed Kotler. Test Plan: memtest1.ll The patch passes test-suite for mips32 r1/r2 and at O0/O2 Reviewers: rkotler, dsanders Subscribers: llvm-commits, rfuhler Differential Revision: http://reviews.llvm.org/D7158 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@238759 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsFastISel.cpp | 96 +++++++++++++++++++++++-- test/CodeGen/Mips/Fast-ISel/memtest1.ll | 74 +++++++++++++++++++ 2 files changed, 163 insertions(+), 7 deletions(-) create mode 100644 test/CodeGen/Mips/Fast-ISel/memtest1.ll diff --git a/lib/Target/Mips/MipsFastISel.cpp b/lib/Target/Mips/MipsFastISel.cpp index 027157527f6..7d686234711 100644 --- a/lib/Target/Mips/MipsFastISel.cpp +++ b/lib/Target/Mips/MipsFastISel.cpp @@ -82,6 +82,7 @@ class MipsFastISel final : public FastISel { LLVMContext *Context; bool fastLowerCall(CallLoweringInfo &CLI) override; + bool fastLowerIntrinsicCall(const IntrinsicInst *II) override; bool TargetSupported; bool UnsupportedFPMode; // To allow fast-isel to proceed and just not handle @@ -142,6 +143,7 @@ private: unsigned materializeGV(const GlobalValue *GV, MVT VT); unsigned materializeInt(const Constant *C, MVT VT); unsigned materialize32BitInt(int64_t Imm, const TargetRegisterClass *RC); + unsigned materializeExternalCallSym(const char *SynName); MachineInstrBuilder emitInst(unsigned Opc) { return BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc)); @@ -367,6 +369,15 @@ unsigned MipsFastISel::materializeGV(const GlobalValue *GV, MVT VT) { return DestReg; } +unsigned MipsFastISel::materializeExternalCallSym(const char *SymName) { + const TargetRegisterClass *RC = &Mips::GPR32RegClass; + unsigned DestReg = createResultReg(RC); + emitInst(Mips::LW, DestReg) + .addReg(MFI->getGlobalBaseReg()) + .addExternalSymbol(SymName, MipsII::MO_GOT); + return DestReg; +} + // Materialize a constant into a register, and return the register // number (or zero if we failed to handle it). unsigned MipsFastISel::fastMaterializeConstant(const Constant *C) { @@ -471,15 +482,51 @@ bool MipsFastISel::computeAddress(const Value *Obj, Address &Addr) { } bool MipsFastISel::computeCallAddress(const Value *V, Address &Addr) { - const GlobalValue *GV = dyn_cast(V); - if (GV && isa(GV) && cast(GV)->isIntrinsic()) - return false; - if (!GV) - return false; + const User *U = nullptr; + unsigned Opcode = Instruction::UserOp1; + + if (const auto *I = dyn_cast(V)) { + // Check if the value is defined in the same basic block. This information + // is crucial to know whether or not folding an operand is valid. + if (I->getParent() == FuncInfo.MBB->getBasicBlock()) { + Opcode = I->getOpcode(); + U = I; + } + } else if (const auto *C = dyn_cast(V)) { + Opcode = C->getOpcode(); + U = C; + } + + switch (Opcode) { + default: + break; + case Instruction::BitCast: + // Look past bitcasts if its operand is in the same BB. + return computeCallAddress(U->getOperand(0), Addr); + break; + case Instruction::IntToPtr: + // Look past no-op inttoptrs if its operand is in the same BB. + if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy()) + return computeCallAddress(U->getOperand(0), Addr); + break; + case Instruction::PtrToInt: + // Look past no-op ptrtoints if its operand is in the same BB. + if (TLI.getValueType(U->getType()) == TLI.getPointerTy()) + return computeCallAddress(U->getOperand(0), Addr); + break; + } + if (const GlobalValue *GV = dyn_cast(V)) { Addr.setGlobalValue(GV); return true; } + + // If all else fails, try to materialize the value in a register. + if (!Addr.getGlobalValue()) { + Addr.setReg(getRegForValue(V)); + return Addr.getReg() != 0; + } + return false; } @@ -1187,7 +1234,7 @@ bool MipsFastISel::fastLowerCall(CallLoweringInfo &CLI) { bool IsTailCall = CLI.IsTailCall; bool IsVarArg = CLI.IsVarArg; const Value *Callee = CLI.Callee; - // const char *SymName = CLI.SymName; + const char *SymName = CLI.SymName; // Allow SelectionDAG isel to handle tail calls. if (IsTailCall) @@ -1234,8 +1281,15 @@ bool MipsFastISel::fastLowerCall(CallLoweringInfo &CLI) { if (!processCallArgs(CLI, OutVTs, NumBytes)) return false; + if (!Addr.getGlobalValue()) + return false; + // Issue the call. - unsigned DestAddress = materializeGV(Addr.getGlobalValue(), MVT::i32); + unsigned DestAddress; + if (SymName) + DestAddress = materializeExternalCallSym(SymName); + else + DestAddress = materializeGV(Addr.getGlobalValue(), MVT::i32); emitInst(TargetOpcode::COPY, Mips::T9).addReg(DestAddress); MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Mips::JALR), @@ -1255,6 +1309,34 @@ bool MipsFastISel::fastLowerCall(CallLoweringInfo &CLI) { return finishCall(CLI, RetVT, NumBytes); } +bool MipsFastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) { + switch (II->getIntrinsicID()) { + default: + return false; + case Intrinsic::memcpy: + case Intrinsic::memmove: { + const auto *MTI = cast(II); + // Don't handle volatile. + if (MTI->isVolatile()) + return false; + if (!MTI->getLength()->getType()->isIntegerTy(32)) + return false; + const char *IntrMemName = isa(II) ? "memcpy" : "memmove"; + return lowerCallTo(II, IntrMemName, II->getNumArgOperands() - 2); + } + case Intrinsic::memset: { + const MemSetInst *MSI = cast(II); + // Don't handle volatile. + if (MSI->isVolatile()) + return false; + if (!MSI->getLength()->getType()->isIntegerTy(32)) + return false; + return lowerCallTo(II, "memset", II->getNumArgOperands() - 2); + } + } + return false; +} + bool MipsFastISel::selectRet(const Instruction *I) { const Function &F = *I->getParent()->getParent(); const ReturnInst *Ret = cast(I); diff --git a/test/CodeGen/Mips/Fast-ISel/memtest1.ll b/test/CodeGen/Mips/Fast-ISel/memtest1.ll new file mode 100644 index 00000000000..a3fc4a32981 --- /dev/null +++ b/test/CodeGen/Mips/Fast-ISel/memtest1.ll @@ -0,0 +1,74 @@ +; RUN: llc < %s -march=mipsel -mcpu=mips32 -O0 -relocation-model=pic \ +; RUN: -fast-isel=true -mips-fast-isel -fast-isel-abort=1 | FileCheck %s \ +; RUN: -check-prefix=ALL -check-prefix=32R1 +; RUN: llc < %s -march=mipsel -mcpu=mips32r2 -O0 -relocation-model=pic \ +; RUN: -fast-isel=true -mips-fast-isel -fast-isel-abort=1 | FileCheck %s \ +; RUN: -check-prefix=ALL -check-prefix=32R2 + +@str = private unnamed_addr constant [12 x i8] c"hello there\00", align 1 +@src = global i8* getelementptr inbounds ([12 x i8], [12 x i8]* @str, i32 0, i32 0), align 4 +@i = global i32 12, align 4 +@dest = common global [50 x i8] zeroinitializer, align 1 + +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1) +declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1) +declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) + +define void @cpy(i8* %src, i32 %i) { + ; ALL-LABEL: cpy: + + ; ALL-DAG: lw $[[T0:[0-9]+]], %got(dest)(${{[0-9]+}}) + ; ALL-DAG: sw $4, 24($sp) + ; ALL-DAG: move $4, $[[T0]] + ; ALL-DAG: sw $5, 20($sp) + ; ALL-DAG: lw $[[T1:[0-9]+]], 24($sp) + ; ALL-DAG: move $5, $[[T1]] + ; ALL-DAG: lw $6, 20($sp) + ; ALL-DAG: lw $[[T2:[0-9]+]], %got(memcpy)(${{[0-9]+}}) + ; ALL: jalr $[[T2]] + ; ALL-NEXT: nop + ; ALL-NOT: {{.*}}$2{{.*}} + call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([50 x i8], [50 x i8]* @dest, i32 0, i32 0), + i8* %src, i32 %i, i32 1, i1 false) + ret void +} + +define void @mov(i8* %src, i32 %i) { + ; ALL-LABEL: mov: + + + ; ALL-DAG: lw $[[T0:[0-9]+]], %got(dest)(${{[0-9]+}}) + ; ALL-DAG: sw $4, 24($sp) + ; ALL-DAG: move $4, $[[T0]] + ; ALL-DAG: sw $5, 20($sp) + ; ALL-DAG: lw $[[T1:[0-9]+]], 24($sp) + ; ALL-DAG: move $5, $[[T1]] + ; ALL-DAG: lw $6, 20($sp) + ; ALL-DAG: lw $[[T2:[0-9]+]], %got(memmove)(${{[0-9]+}}) + ; ALL: jalr $[[T2]] + ; ALL-NEXT: nop + ; ALL-NOT: {{.*}}$2{{.*}} + call void @llvm.memmove.p0i8.p0i8.i32(i8* getelementptr inbounds ([50 x i8], [50 x i8]* @dest, i32 0, i32 0), + i8* %src, i32 %i, i32 1, i1 false) + ret void +} + +define void @clear(i32 %i) { + ; ALL-LABEL: clear: + + ; ALL-DAG: lw $[[T0:[0-9]+]], %got(dest)(${{[0-9]+}}) + ; ALL-DAG: sw $4, 16($sp) + ; ALL-DAG: move $4, $[[T0]] + ; ALL-DAG: addiu $[[T1:[0-9]+]], $zero, 42 + ; 32R1-DAG: sll $[[T2:[0-9]+]], $[[T1]], 24 + ; 32R1-DAG: sra $5, $[[T2]], 24 + ; 32R2-DAG: seb $5, $[[T1]] + ; ALL-DAG: lw $6, 16($sp) + ; ALL-DAG: lw $[[T2:[0-9]+]], %got(memset)(${{[0-9]+}}) + ; ALL: jalr $[[T2]] + ; ALL-NEXT: nop + ; ALL-NOT: {{.*}}$2{{.*}} + call void @llvm.memset.p0i8.i32(i8* getelementptr inbounds ([50 x i8], [50 x i8]* @dest, i32 0, i32 0), + i8 42, i32 %i, i32 1, i1 false) + ret void +} -- 2.34.1