#include "llvm/Transforms/Scalar.h"
#include "llvm/IntrinsicInst.h"
#include "llvm/Instructions.h"
-#include "llvm/ParameterAttributes.h"
+#include "llvm/LLVMContext.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/Dominators.h"
/// true for all i8 values obviously, but is also true for i32 0, i32 -1,
/// i16 0xF0F0, double 0.0 etc. If the value can't be handled with a repeated
/// byte store (e.g. i16 0x1234), return null.
-static Value *isBytewiseValue(Value *V) {
+static Value *isBytewiseValue(Value *V, LLVMContext& Context) {
// All byte-wide stores are splatable, even of arbitrary variables.
if (V->getType() == Type::Int8Ty) return V;
// corresponding integer value is "byteable". An important case is 0.0.
if (ConstantFP *CFP = dyn_cast<ConstantFP>(V)) {
if (CFP->getType() == Type::FloatTy)
- V = ConstantExpr::getBitCast(CFP, Type::Int32Ty);
+ V = Context.getConstantExprBitCast(CFP, Type::Int32Ty);
if (CFP->getType() == Type::DoubleTy)
- V = ConstantExpr::getBitCast(CFP, Type::Int64Ty);
+ V = Context.getConstantExprBitCast(CFP, Type::Int64Ty);
// Don't handle long double formats, which have strange constraints.
}
if (Val != Val2)
return 0;
}
- return ConstantInt::get(Val);
+ return ConstantInt::get(Context, Val);
}
}
// Otherwise, we have a sequential type like an array or vector. Multiply
// the index by the ElementSize.
- uint64_t Size = TD.getABITypeSize(GTI.getIndexedType());
+ uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType());
Offset += Size*OpC->getSExtValue();
}
// End.
if (End > I->End) {
I->End = End;
- range_iterator NextI = I;;
+ range_iterator NextI = I;
while (++NextI != E && End >= NextI->Start) {
// Merge the range in.
I->TheStores.append(NextI->TheStores.begin(), NextI->TheStores.end());
bool runOnFunction(Function &F);
public:
static char ID; // Pass identification, replacement for typeid
- MemCpyOpt() : FunctionPass((intptr_t)&ID) { }
+ MemCpyOpt() : FunctionPass(&ID) {}
private:
// This transformation requires dominator postdominator info
// Ensure that the value being stored is something that can be memset'able a
// byte at a time like "0" or "-1" or any width, as well as things like
// 0xA0A0A0A0 and 0.0.
- Value *ByteVal = isBytewiseValue(SI->getOperand(0));
+ Value *ByteVal = isBytewiseValue(SI->getOperand(0), SI->getContext());
if (!ByteVal)
return false;
if (NextStore->isVolatile()) break;
// Check to see if this stored value is of the same byte-splattable value.
- if (ByteVal != isBytewiseValue(NextStore->getOperand(0)))
+ if (ByteVal != isBytewiseValue(NextStore->getOperand(0),
+ NextStore->getContext()))
break;
// Check to see if this store is to a constant offset from the start ptr.
// instruction needed by the start of the block.
BasicBlock::iterator InsertPt = BI;
- if (MemSetF == 0)
+ if (MemSetF == 0) {
+ const Type *Tys[] = {Type::Int64Ty};
MemSetF = Intrinsic::getDeclaration(SI->getParent()->getParent()
- ->getParent(), Intrinsic::memset_i64);
+ ->getParent(), Intrinsic::memset,
+ Tys, 1);
+ }
// Get the starting pointer of the block.
StartPtr = Range.StartPtr;
// Cast the start ptr to be i8* as memset requires.
- const Type *i8Ptr = PointerType::getUnqual(Type::Int8Ty);
+ const Type *i8Ptr = SI->getContext().getPointerTypeUnqual(Type::Int8Ty);
if (StartPtr->getType() != i8Ptr)
StartPtr = new BitCastInst(StartPtr, i8Ptr, StartPtr->getNameStart(),
InsertPt);
Value *Ops[] = {
StartPtr, ByteVal, // Start, value
- ConstantInt::get(Type::Int64Ty, Range.End-Range.Start), // size
- ConstantInt::get(Type::Int32Ty, Range.Alignment) // align
+ // size
+ ConstantInt::get(Type::Int64Ty, Range.End-Range.Start),
+ // align
+ ConstantInt::get(Type::Int32Ty, Range.Alignment)
};
Value *C = CallInst::Create(MemSetF, Ops, Ops+4, "", InsertPt);
DEBUG(cerr << "Replace stores:\n";
if (!srcArraySize)
return false;
- uint64_t srcSize = TD.getABITypeSize(srcAlloca->getAllocatedType()) *
+ uint64_t srcSize = TD.getTypeAllocSize(srcAlloca->getAllocatedType()) *
srcArraySize->getZExtValue();
if (cpyLength->getZExtValue() < srcSize)
if (!destArraySize)
return false;
- uint64_t destSize = TD.getABITypeSize(A->getAllocatedType()) *
+ uint64_t destSize = TD.getTypeAllocSize(A->getAllocatedType()) *
destArraySize->getZExtValue();
if (destSize < srcSize)
return false;
const Type* StructTy = cast<PointerType>(A->getType())->getElementType();
- uint64_t destSize = TD.getABITypeSize(StructTy);
+ uint64_t destSize = TD.getTypeAllocSize(StructTy);
if (destSize < srcSize)
return false;
User* UI = srcUseList.back();
srcUseList.pop_back();
- if (isa<GetElementPtrInst>(UI) || isa<BitCastInst>(UI)) {
+ if (isa<BitCastInst>(UI)) {
for (User::use_iterator I = UI->use_begin(), E = UI->use_end();
I != E; ++I)
srcUseList.push_back(*I);
+ } else if (GetElementPtrInst* G = dyn_cast<GetElementPtrInst>(UI)) {
+ if (G->hasAllZeroIndices())
+ for (User::use_iterator I = UI->use_begin(), E = UI->use_end();
+ I != E; ++I)
+ srcUseList.push_back(*I);
+ else
+ return false;
} else if (UI != C && UI != cpy) {
return false;
}
return false;
// All the checks have passed, so do the transformation.
+ bool changedArgument = false;
for (unsigned i = 0; i < CS.arg_size(); ++i)
- if (CS.getArgument(i) == cpySrc) {
+ if (CS.getArgument(i)->stripPointerCasts() == cpySrc) {
if (cpySrc->getType() != cpyDest->getType())
- cpyDest = CastInst::createPointerCast(cpyDest, cpySrc->getType(),
+ cpyDest = CastInst::CreatePointerCast(cpyDest, cpySrc->getType(),
cpyDest->getName(), C);
- CS.setArgument(i, cpyDest);
+ changedArgument = true;
+ if (CS.getArgument(i)->getType() != cpyDest->getType())
+ CS.setArgument(i, CastInst::CreatePointerCast(cpyDest,
+ CS.getArgument(i)->getType(), cpyDest->getName(), C));
+ else
+ CS.setArgument(i, cpyDest);
}
+ if (!changedArgument)
+ return false;
+
// Drop any cached information about the call, because we may have changed
// its dependence information by changing its parameter.
MemoryDependenceAnalysis& MD = getAnalysis<MemoryDependenceAnalysis>();
- MD.dropInstruction(C);
+ MD.removeInstruction(C);
// Remove the memcpy
MD.removeInstruction(cpy);
// The are two possible optimizations we can do for memcpy:
// a) memcpy-memcpy xform which exposes redundance for DSE
// b) call-memcpy xform for return slot optimization
- Instruction* dep = MD.getDependency(M);
- if (dep == MemoryDependenceAnalysis::None ||
- dep == MemoryDependenceAnalysis::NonLocal)
+ MemDepResult dep = MD.getDependency(M);
+ if (!dep.isClobber())
return false;
- else if (CallInst* C = dyn_cast<CallInst>(dep))
- return performCallSlotOptzn(M, C);
- else if (!isa<MemCpyInst>(dep))
+ if (!isa<MemCpyInst>(dep.getInst())) {
+ if (CallInst* C = dyn_cast<CallInst>(dep.getInst()))
+ return performCallSlotOptzn(M, C);
return false;
+ }
- MemCpyInst* MDep = cast<MemCpyInst>(dep);
+ MemCpyInst* MDep = cast<MemCpyInst>(dep.getInst());
// We can only transforms memcpy's where the dest of one is the source of the
// other
return false;
// If all checks passed, then we can transform these memcpy's
+ const Type *Tys[1];
+ Tys[0] = M->getLength()->getType();
Function* MemCpyFun = Intrinsic::getDeclaration(
M->getParent()->getParent()->getParent(),
- M->getIntrinsicID());
+ M->getIntrinsicID(), Tys, 1);
- std::vector<Value*> args;
- args.push_back(M->getRawDest());
- args.push_back(MDep->getRawSource());
- args.push_back(M->getLength());
- args.push_back(M->getAlignment());
+ Value *Args[4] = {
+ M->getRawDest(), MDep->getRawSource(), M->getLength(), M->getAlignmentCst()
+ };
- CallInst* C = CallInst::Create(MemCpyFun, args.begin(), args.end(), "", M);
+ CallInst* C = CallInst::Create(MemCpyFun, Args, Args+4, "", M);
- if (MD.getDependency(C) == MDep) {
- MD.dropInstruction(M);
+
+ // If C and M don't interfere, then this is a valid transformation. If they
+ // did, this would mean that the two sources overlap, which would be bad.
+ if (MD.getDependency(C) == dep) {
+ MD.removeInstruction(M);
M->eraseFromParent();
+ NumMemCpyInstr++;
return true;
}
+ // Otherwise, there was no point in doing this, so we remove the call we
+ // inserted and act like nothing happened.
MD.removeInstruction(C);
C->eraseFromParent();
-
- NumMemCpyInstr++;
-
return false;
}
if (StoreInst *SI = dyn_cast<StoreInst>(I))
changed_function |= processStore(SI, BI);
-
- if (MemCpyInst* M = dyn_cast<MemCpyInst>(I)) {
+ else if (MemCpyInst* M = dyn_cast<MemCpyInst>(I)) {
changed_function |= processMemCpy(M);
}
}