//
// The LLVM Compiler Infrastructure
//
-// This file was developed by the Owen Anderson and is distributed under
-// the University of Illinois Open Source License. See LICENSE.TXT for details.
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
#include "llvm/Constants.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Function.h"
+#include "llvm/IntrinsicInst.h"
#include "llvm/Instructions.h"
+#include "llvm/ParameterAttributes.h"
#include "llvm/Value.h"
-#include "llvm/Analysis/Dominators.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/MemoryDependenceAnalysis.h"
#include "llvm/Support/CFG.h"
#include "llvm/Support/Compiler.h"
+#include "llvm/Target/TargetData.h"
using namespace llvm;
//===----------------------------------------------------------------------===//
FCMPULT, FCMPULE, FCMPUNE, EXTRACT, INSERT,
SHUFFLE, SELECT, TRUNC, ZEXT, SEXT, FPTOUI,
FPTOSI, UITOFP, SITOFP, FPTRUNC, FPEXT,
- PTRTOINT, INTTOPTR, BITCAST, GEP, EMPTY,
+ PTRTOINT, INTTOPTR, BITCAST, GEP, CALL, EMPTY,
TOMBSTONE };
ExpressionOpcode opcode;
uint32_t secondVN;
uint32_t thirdVN;
SmallVector<uint32_t, 4> varargs;
+ Value* function;
Expression() { }
Expression(ExpressionOpcode o) : opcode(o) { }
return true;
else if (type != other.type)
return false;
+ else if (function != other.function)
+ return false;
else if (firstVN != other.firstVN)
return false;
else if (secondVN != other.secondVN)
return false;
else if (type != other.type)
return true;
+ else if (function != other.function)
+ return true;
else if (firstVN != other.firstVN)
return true;
else if (secondVN != other.secondVN)
private:
DenseMap<Value*, uint32_t> valueNumbering;
DenseMap<Expression, uint32_t> expressionNumbering;
+ AliasAnalysis* AA;
uint32_t nextValueNumber;
Expression create_expression(SelectInst* V);
Expression create_expression(CastInst* C);
Expression create_expression(GetElementPtrInst* G);
+ Expression create_expression(CallInst* C);
public:
- ValueTable() { nextValueNumber = 1; }
+ ValueTable() : nextValueNumber(1) { }
uint32_t lookup_or_add(Value* V);
uint32_t lookup(Value* V) const;
void add(Value* V, uint32_t num);
void clear();
void erase(Value* v);
unsigned size();
+ void setAliasAnalysis(AliasAnalysis* A) { AA = A; }
+ uint32_t hash_operand(Value* v);
};
}
E = e.varargs.end(); I != E; ++I)
hash = *I + hash * 37;
+ hash = (unsigned)((uintptr_t)e.function >> 4) ^
+ (unsigned)((uintptr_t)e.function >> 9) +
+ hash * 37;
+
return hash;
}
static bool isEqual(const Expression &LHS, const Expression &RHS) {
}
}
+uint32_t ValueTable::hash_operand(Value* v) {
+ if (CallInst* CI = dyn_cast<CallInst>(v))
+ if (!AA->doesNotAccessMemory(CI))
+ return nextValueNumber++;
+
+ return lookup_or_add(v);
+}
+
+Expression ValueTable::create_expression(CallInst* C) {
+ Expression e;
+
+ e.type = C->getType();
+ e.firstVN = 0;
+ e.secondVN = 0;
+ e.thirdVN = 0;
+ e.function = C->getCalledFunction();
+ e.opcode = Expression::CALL;
+
+ for (CallInst::op_iterator I = C->op_begin()+1, E = C->op_end();
+ I != E; ++I)
+ e.varargs.push_back(hash_operand(*I));
+
+ return e;
+}
+
Expression ValueTable::create_expression(BinaryOperator* BO) {
Expression e;
- e.firstVN = lookup_or_add(BO->getOperand(0));
- e.secondVN = lookup_or_add(BO->getOperand(1));
+ e.firstVN = hash_operand(BO->getOperand(0));
+ e.secondVN = hash_operand(BO->getOperand(1));
e.thirdVN = 0;
+ e.function = 0;
e.type = BO->getType();
e.opcode = getOpcode(BO);
Expression ValueTable::create_expression(CmpInst* C) {
Expression e;
- e.firstVN = lookup_or_add(C->getOperand(0));
- e.secondVN = lookup_or_add(C->getOperand(1));
+ e.firstVN = hash_operand(C->getOperand(0));
+ e.secondVN = hash_operand(C->getOperand(1));
e.thirdVN = 0;
+ e.function = 0;
e.type = C->getType();
e.opcode = getOpcode(C);
Expression ValueTable::create_expression(CastInst* C) {
Expression e;
- e.firstVN = lookup_or_add(C->getOperand(0));
+ e.firstVN = hash_operand(C->getOperand(0));
e.secondVN = 0;
e.thirdVN = 0;
+ e.function = 0;
e.type = C->getType();
e.opcode = getOpcode(C);
Expression ValueTable::create_expression(ShuffleVectorInst* S) {
Expression e;
- e.firstVN = lookup_or_add(S->getOperand(0));
- e.secondVN = lookup_or_add(S->getOperand(1));
- e.thirdVN = lookup_or_add(S->getOperand(2));
+ e.firstVN = hash_operand(S->getOperand(0));
+ e.secondVN = hash_operand(S->getOperand(1));
+ e.thirdVN = hash_operand(S->getOperand(2));
+ e.function = 0;
e.type = S->getType();
e.opcode = Expression::SHUFFLE;
Expression ValueTable::create_expression(ExtractElementInst* E) {
Expression e;
- e.firstVN = lookup_or_add(E->getOperand(0));
- e.secondVN = lookup_or_add(E->getOperand(1));
+ e.firstVN = hash_operand(E->getOperand(0));
+ e.secondVN = hash_operand(E->getOperand(1));
e.thirdVN = 0;
+ e.function = 0;
e.type = E->getType();
e.opcode = Expression::EXTRACT;
Expression ValueTable::create_expression(InsertElementInst* I) {
Expression e;
- e.firstVN = lookup_or_add(I->getOperand(0));
- e.secondVN = lookup_or_add(I->getOperand(1));
- e.thirdVN = lookup_or_add(I->getOperand(2));
+ e.firstVN = hash_operand(I->getOperand(0));
+ e.secondVN = hash_operand(I->getOperand(1));
+ e.thirdVN = hash_operand(I->getOperand(2));
+ e.function = 0;
e.type = I->getType();
e.opcode = Expression::INSERT;
Expression ValueTable::create_expression(SelectInst* I) {
Expression e;
- e.firstVN = lookup_or_add(I->getCondition());
- e.secondVN = lookup_or_add(I->getTrueValue());
- e.thirdVN = lookup_or_add(I->getFalseValue());
+ e.firstVN = hash_operand(I->getCondition());
+ e.secondVN = hash_operand(I->getTrueValue());
+ e.thirdVN = hash_operand(I->getFalseValue());
+ e.function = 0;
e.type = I->getType();
e.opcode = Expression::SELECT;
Expression ValueTable::create_expression(GetElementPtrInst* G) {
Expression e;
- e.firstVN = lookup_or_add(G->getPointerOperand());
+ e.firstVN = hash_operand(G->getPointerOperand());
e.secondVN = 0;
e.thirdVN = 0;
+ e.function = 0;
e.type = G->getType();
e.opcode = Expression::GEP;
for (GetElementPtrInst::op_iterator I = G->idx_begin(), E = G->idx_end();
I != E; ++I)
- e.varargs.push_back(lookup_or_add(*I));
+ e.varargs.push_back(hash_operand(*I));
return e;
}
if (VI != valueNumbering.end())
return VI->second;
-
- if (BinaryOperator* BO = dyn_cast<BinaryOperator>(V)) {
+ if (CallInst* C = dyn_cast<CallInst>(V)) {
+ if (AA->onlyReadsMemory(C)) { // includes doesNotAccessMemory
+ Expression e = create_expression(C);
+
+ DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e);
+ if (EI != expressionNumbering.end()) {
+ valueNumbering.insert(std::make_pair(V, EI->second));
+ return EI->second;
+ } else {
+ expressionNumbering.insert(std::make_pair(e, nextValueNumber));
+ valueNumbering.insert(std::make_pair(V, nextValueNumber));
+
+ return nextValueNumber++;
+ }
+ } else {
+ valueNumbering.insert(std::make_pair(V, nextValueNumber));
+ return nextValueNumber++;
+ }
+ } else if (BinaryOperator* BO = dyn_cast<BinaryOperator>(V)) {
Expression e = create_expression(BO);
DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e);
AU.setPreservesCFG();
AU.addRequired<DominatorTree>();
AU.addRequired<MemoryDependenceAnalysis>();
+ AU.addRequired<AliasAnalysis>();
+ AU.addRequired<TargetData>();
+ AU.addPreserved<AliasAnalysis>();
AU.addPreserved<MemoryDependenceAnalysis>();
+ AU.addPreserved<TargetData>();
}
// Helper fuctions
SmallVector<Instruction*, 4>& toErase);
bool processNonLocalLoad(LoadInst* L,
SmallVector<Instruction*, 4>& toErase);
+ bool processMemCpy(MemCpyInst* M, MemCpyInst* MDep,
+ SmallVector<Instruction*, 4>& toErase);
+ bool performReturnSlotOptzn(MemCpyInst* cpy, CallInst* C,
+ SmallVector<Instruction*, 4>& toErase);
Value *GetValueForBlock(BasicBlock *BB, LoadInst* orig,
DenseMap<BasicBlock*, Value*> &Phis,
bool top_level = false);
PN->addIncoming(val, *PI);
}
+ AliasAnalysis& AA = getAnalysis<AliasAnalysis>();
+ AA.copyValue(orig, PN);
// Attempt to collapse PHI nodes that are trivially redundant
Value* v = CollapsePhi(PN);
dep = MD.getDependency(L, dep);
}
}
-
+
+ if (dep != MemoryDependenceAnalysis::None &&
+ dep != MemoryDependenceAnalysis::NonLocal &&
+ isa<AllocationInst>(dep)) {
+ // Check that this load is actually from the
+ // allocation we found
+ Value* v = L->getOperand(0);
+ while (true) {
+ if (BitCastInst *BC = dyn_cast<BitCastInst>(v))
+ v = BC->getOperand(0);
+ else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(v))
+ v = GEP->getOperand(0);
+ else
+ break;
+ }
+ if (v == dep) {
+ // If this load depends directly on an allocation, there isn't
+ // anything stored there; therefore, we can optimize this load
+ // to undef.
+ MD.removeInstruction(L);
+
+ L->replaceAllUsesWith(UndefValue::get(L->getType()));
+ toErase.push_back(L);
+ deletedLoad = true;
+ NumGVNLoad++;
+ }
+ }
+
if (!deletedLoad)
last = L;
return deletedLoad;
}
+/// isReturnSlotOptznProfitable - Determine if performing a return slot
+/// fusion with the slot dest is profitable
+static bool isReturnSlotOptznProfitable(Value* dest, MemCpyInst* cpy) {
+ // We currently consider it profitable if dest is otherwise dead.
+ SmallVector<User*, 8> useList(dest->use_begin(), dest->use_end());
+ while (!useList.empty()) {
+ User* UI = useList.back();
+
+ if (isa<GetElementPtrInst>(UI) || isa<BitCastInst>(UI)) {
+ useList.pop_back();
+ for (User::use_iterator I = UI->use_begin(), E = UI->use_end();
+ I != E; ++I)
+ useList.push_back(*I);
+ } else if (UI == cpy)
+ useList.pop_back();
+ else
+ return false;
+ }
+
+ return true;
+}
+
+/// performReturnSlotOptzn - takes a memcpy and a call that it depends on,
+/// and checks for the possibility of a return slot optimization by having
+/// the call write its result directly into the callees return parameter
+/// rather than using memcpy
+bool GVN::performReturnSlotOptzn(MemCpyInst* cpy, CallInst* C,
+ SmallVector<Instruction*, 4>& toErase) {
+ // Deliberately get the source and destination with bitcasts stripped away,
+ // because we'll need to do type comparisons based on the underlying type.
+ Value* cpyDest = cpy->getDest();
+ Value* cpySrc = cpy->getSource();
+ CallSite CS = CallSite::get(C);
+
+ // Since this is a return slot optimization, we need to make sure that
+ // the value being copied is, in fact, in a return slot. We also need to
+ // check that the return slot parameter is marked noalias, so that we can
+ // be sure that changing it will not cause unexpected behavior changes due
+ // to it being accessed through a global or another parameter.
+ if (CS.arg_size() == 0 ||
+ cpySrc != CS.getArgument(0) ||
+ !CS.paramHasAttr(1, ParamAttr::NoAlias | ParamAttr::StructRet))
+ return false;
+
+ // Check that something sneaky is not happening involving casting
+ // return slot types around.
+ if (CS.getArgument(0)->getType() != cpyDest->getType())
+ return false;
+ // sret --> pointer
+ const PointerType* PT = cast<PointerType>(cpyDest->getType());
+
+ // We can only perform the transformation if the size of the memcpy
+ // is constant and equal to the size of the structure.
+ ConstantInt* cpyLength = dyn_cast<ConstantInt>(cpy->getLength());
+ if (!cpyLength)
+ return false;
+
+ TargetData& TD = getAnalysis<TargetData>();
+ if (TD.getTypeStoreSize(PT->getElementType()) != cpyLength->getZExtValue())
+ return false;
+
+ // We only perform the transformation if it will be profitable.
+ if (!isReturnSlotOptznProfitable(cpyDest, cpy))
+ return false;
+
+ // In addition to knowing that the call does not access the return slot
+ // in some unexpected manner, which we derive from the noalias attribute,
+ // we also need to know that it does not sneakily modify the destination
+ // slot in the caller. We don't have parameter attributes to go by
+ // for this one, so we just rely on AA to figure it out for us.
+ AliasAnalysis& AA = getAnalysis<AliasAnalysis>();
+ if (AA.getModRefInfo(C, cpy->getRawDest(), cpyLength->getZExtValue()) !=
+ AliasAnalysis::NoModRef)
+ return false;
+
+ // If all the checks have passed, then we're alright to do the transformation.
+ CS.setArgument(0, cpyDest);
+
+ // Drop any cached information about the call, because we may have changed
+ // its dependence information by changing its parameter.
+ MemoryDependenceAnalysis& MD = getAnalysis<MemoryDependenceAnalysis>();
+ MD.dropInstruction(C);
+
+ // Remove the memcpy
+ MD.removeInstruction(cpy);
+ toErase.push_back(cpy);
+
+ return true;
+}
+
+/// processMemCpy - perform simplication of memcpy's. If we have memcpy A which
+/// copies X to Y, and memcpy B which copies Y to Z, then we can rewrite B to be
+/// a memcpy from X to Z (or potentially a memmove, depending on circumstances).
+/// This allows later passes to remove the first memcpy altogether.
+bool GVN::processMemCpy(MemCpyInst* M, MemCpyInst* MDep,
+ SmallVector<Instruction*, 4>& toErase) {
+ // We can only transforms memcpy's where the dest of one is the source of the
+ // other
+ if (M->getSource() != MDep->getDest())
+ return false;
+
+ // Second, the length of the memcpy's must be the same, or the preceeding one
+ // must be larger than the following one.
+ ConstantInt* C1 = dyn_cast<ConstantInt>(MDep->getLength());
+ ConstantInt* C2 = dyn_cast<ConstantInt>(M->getLength());
+ if (!C1 || !C2)
+ return false;
+
+ uint64_t CpySize = C1->getValue().getZExtValue();
+ uint64_t DepSize = C2->getValue().getZExtValue();
+
+ if (DepSize < CpySize)
+ return false;
+
+ // Finally, we have to make sure that the dest of the second does not
+ // alias the source of the first
+ AliasAnalysis& AA = getAnalysis<AliasAnalysis>();
+ if (AA.alias(M->getRawDest(), CpySize, MDep->getRawSource(), DepSize) !=
+ AliasAnalysis::NoAlias)
+ return false;
+ else if (AA.alias(M->getRawDest(), CpySize, M->getRawSource(), CpySize) !=
+ AliasAnalysis::NoAlias)
+ return false;
+ else if (AA.alias(MDep->getRawDest(), DepSize, MDep->getRawSource(), DepSize)
+ != AliasAnalysis::NoAlias)
+ return false;
+
+ // If all checks passed, then we can transform these memcpy's
+ Function* MemCpyFun = Intrinsic::getDeclaration(
+ M->getParent()->getParent()->getParent(),
+ M->getIntrinsicID());
+
+ std::vector<Value*> args;
+ args.push_back(M->getRawDest());
+ args.push_back(MDep->getRawSource());
+ args.push_back(M->getLength());
+ args.push_back(M->getAlignment());
+
+ CallInst* C = new CallInst(MemCpyFun, args.begin(), args.end(), "", M);
+
+ MemoryDependenceAnalysis& MD = getAnalysis<MemoryDependenceAnalysis>();
+ if (MD.getDependency(C) == MDep) {
+ MD.dropInstruction(M);
+ toErase.push_back(M);
+ return true;
+ } else {
+ MD.removeInstruction(C);
+ toErase.push_back(C);
+ return false;
+ }
+}
+
/// processInstruction - When calculating availability, handle an instruction
/// by inserting it into the appropriate sets
bool GVN::processInstruction(Instruction* I,
SmallVector<Instruction*, 4>& toErase) {
if (LoadInst* L = dyn_cast<LoadInst>(I)) {
return processLoad(L, lastSeenLoad, toErase);
+ } else if (MemCpyInst* M = dyn_cast<MemCpyInst>(I)) {
+ MemoryDependenceAnalysis& MD = getAnalysis<MemoryDependenceAnalysis>();
+
+ // The are two possible optimizations we can do for memcpy:
+ // a) memcpy-memcpy xform which exposes redundance for DSE
+ // b) call-memcpy xform for sret return slot optimization
+ Instruction* dep = MD.getDependency(M);
+ if (dep == MemoryDependenceAnalysis::None ||
+ dep == MemoryDependenceAnalysis::NonLocal)
+ return false;
+ if (MemCpyInst *MemCpy = dyn_cast<MemCpyInst>(dep))
+ return processMemCpy(M, MemCpy, toErase);
+ if (CallInst* C = dyn_cast<CallInst>(dep))
+ return performReturnSlotOptzn(M, C, toErase);
+ return false;
}
unsigned num = VN.lookup_or_add(I);
} else if (currAvail.test(num)) {
Value* repl = find_leader(currAvail, num);
+ if (CallInst* CI = dyn_cast<CallInst>(I)) {
+ AliasAnalysis& AA = getAnalysis<AliasAnalysis>();
+ if (!AA.doesNotAccessMemory(CI)) {
+ MemoryDependenceAnalysis& MD = getAnalysis<MemoryDependenceAnalysis>();
+ if (cast<Instruction>(repl)->getParent() != CI->getParent() ||
+ MD.getDependency(CI) != MD.getDependency(cast<CallInst>(repl))) {
+ // There must be an intervening may-alias store, so nothing from
+ // this point on will be able to be replaced with the preceding call
+ currAvail.erase(repl);
+ currAvail.insert(I);
+
+ return false;
+ }
+ }
+ }
+
+ // Remove it!
+ MemoryDependenceAnalysis& MD = getAnalysis<MemoryDependenceAnalysis>();
+ MD.removeInstruction(I);
+
VN.erase(I);
I->replaceAllUsesWith(repl);
toErase.push_back(I);
// function.
//
bool GVN::runOnFunction(Function& F) {
+ VN.setAliasAnalysis(&getAnalysis<AliasAnalysis>());
+
bool changed = false;
bool shouldContinue = true;
// Avoid iterator invalidation
++BI;
-
+
for (SmallVector<Instruction*, 4>::iterator I = toErase.begin(),
- E = toErase.end(); I != E; ++I)
+ E = toErase.end(); I != E; ++I) {
(*I)->eraseFromParent();
-
+ }
+
toErase.clear();
}
}