From 69ea9d230865425ed7422a5c5fe9dbb6ea612433 Mon Sep 17 00:00:00 2001 From: Chris Lattner Date: Wed, 30 Apr 2008 06:39:11 +0000 Subject: [PATCH] move lowering of llvm.memset -> store from simplify libcalls to instcombine. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@50472 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../Scalar/InstructionCombining.cpp | 56 ++++++++++++++++--- test/Transforms/InstCombine/memset.ll | 15 +++++ 2 files changed, 64 insertions(+), 7 deletions(-) create mode 100644 test/Transforms/InstCombine/memset.ll diff --git a/lib/Transforms/Scalar/InstructionCombining.cpp b/lib/Transforms/Scalar/InstructionCombining.cpp index 82c67d03d93..507e6f2a854 100644 --- a/lib/Transforms/Scalar/InstructionCombining.cpp +++ b/lib/Transforms/Scalar/InstructionCombining.cpp @@ -370,6 +370,7 @@ namespace { Instruction *MatchBSwap(BinaryOperator &I); bool SimplifyStoreAtEndOfBlock(StoreInst &SI); Instruction *SimplifyMemTransfer(MemIntrinsic *MI); + Instruction *SimplifyMemSet(MemSetInst *MI); Value *EvaluateInDifferentType(Value *V, const Type *Ty, bool isSigned); @@ -8491,7 +8492,9 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) { // A single load+store correctly handles overlapping memory in the memmove // case. unsigned Size = MemOpLength->getZExtValue(); - if (Size == 0 || Size > 8 || (Size&(Size-1))) + if (Size == 0) return MI; // Delete this mem transfer. + + if (Size > 8 || (Size&(Size-1))) return 0; // If not 1/2/4/8 bytes, exit. // Use an integer load+store unless we can find something better. @@ -8545,6 +8548,48 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) { return MI; } +Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) { + unsigned Alignment = GetOrEnforceKnownAlignment(MI->getDest()); + if (MI->getAlignment()->getZExtValue() < Alignment) { + MI->setAlignment(ConstantInt::get(Type::Int32Ty, Alignment)); + return MI; + } + + // Extract the length and alignment and fill if they are constant. + ConstantInt *LenC = dyn_cast(MI->getLength()); + ConstantInt *FillC = dyn_cast(MI->getValue()); + if (!LenC || !FillC || FillC->getType() != Type::Int8Ty) + return 0; + uint64_t Len = LenC->getZExtValue(); + Alignment = MI->getAlignment()->getZExtValue(); + + // If the length is zero, this is a no-op + if (Len == 0) return MI; // memset(d,c,0,a) -> noop + + // memset(s,c,n) -> store s, c (for n=1,2,4,8) + if (Len <= 8 && isPowerOf2_32((uint32_t)Len)) { + const Type *ITy = IntegerType::get(Len*8); // n=1 -> i8. + + Value *Dest = MI->getDest(); + Dest = InsertBitCastBefore(Dest, PointerType::getUnqual(ITy), *MI); + + // Alignment 0 is identity for alignment 1 for memset, but not store. + if (Alignment == 0) Alignment = 1; + + // Extract the fill value and store. + uint64_t Fill = FillC->getZExtValue()*0x0101010101010101ULL; + InsertNewInstBefore(new StoreInst(ConstantInt::get(ITy, Fill), Dest, false, + Alignment), *MI); + + // Set the size of the copy to 0, it will be deleted on the next iteration. + MI->setLength(Constant::getNullValue(LenC->getType())); + return MI; + } + + return 0; +} + + /// visitCallInst - CallInst simplification. This mostly only handles folding /// of intrinsic instructions. For normal calls, it allows visitCallSite to do /// the heavy lifting. @@ -8592,12 +8637,9 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { if (isa(MI) || isa(MI)) { if (Instruction *I = SimplifyMemTransfer(MI)) return I; - } else if (isa(MI)) { - unsigned Alignment = GetOrEnforceKnownAlignment(MI->getDest()); - if (MI->getAlignment()->getZExtValue() < Alignment) { - MI->setAlignment(ConstantInt::get(Type::Int32Ty, Alignment)); - Changed = true; - } + } else if (MemSetInst *MSI = dyn_cast(MI)) { + if (Instruction *I = SimplifyMemSet(MSI)) + return I; } if (Changed) return II; diff --git a/test/Transforms/InstCombine/memset.ll b/test/Transforms/InstCombine/memset.ll new file mode 100644 index 00000000000..27a5b60cade --- /dev/null +++ b/test/Transforms/InstCombine/memset.ll @@ -0,0 +1,15 @@ +; RUN: llvm-as < %s | opt -instcombine | llvm-dis | not grep {call.*llvm.memset} + +declare void @llvm.memset.i32(i8*, i8, i32, i32) + +define i32 @main() { + %target = alloca [1024 x i8] ; <[1024 x i8]*> [#uses=1] + %target_p = getelementptr [1024 x i8]* %target, i32 0, i32 0 ; [#uses=5] + call void @llvm.memset.i32( i8* %target_p, i8 1, i32 0, i32 1 ) + call void @llvm.memset.i32( i8* %target_p, i8 1, i32 1, i32 1 ) + call void @llvm.memset.i32( i8* %target_p, i8 1, i32 2, i32 2 ) + call void @llvm.memset.i32( i8* %target_p, i8 1, i32 4, i32 4 ) + call void @llvm.memset.i32( i8* %target_p, i8 1, i32 8, i32 8 ) + ret i32 0 +} + -- 2.34.1