X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTransforms%2FScalar%2FGVN.cpp;h=a9445dd853024f9fd8ac8a0c321dcb9a09a1d147;hb=88ffdddcc42d80972644643da1096793dabae802;hp=077fa67e0bd0ae16a68fcacad662b4260fd04dd8;hpb=88365bb4047a91d42af2aa42839b21e701ea1b03;p=oota-llvm.git diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp index 077fa67e0bd..a9445dd8530 100644 --- a/lib/Transforms/Scalar/GVN.cpp +++ b/lib/Transforms/Scalar/GVN.cpp @@ -13,7 +13,6 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "gvn" - #include "llvm/Transforms/Scalar.h" #include "llvm/BasicBlock.h" #include "llvm/Constants.h" @@ -33,10 +32,23 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/MemoryDependenceAnalysis.h" #include "llvm/Support/CFG.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" +#include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Target/TargetData.h" using namespace llvm; +STATISTIC(NumGVNInstr, "Number of instructions deleted"); +STATISTIC(NumGVNLoad, "Number of loads deleted"); +STATISTIC(NumMemSetInfer, "Number of memsets inferred"); + +namespace { + cl::opt + FormMemSet("form-memset-from-stores", + cl::desc("Transform straight-line stores to memsets"), + cl::init(false), cl::Hidden); +} + //===----------------------------------------------------------------------===// // ValueTable Class //===----------------------------------------------------------------------===// @@ -664,18 +676,19 @@ namespace { Value* find_leader(ValueNumberedSet& vals, uint32_t v) ; void val_insert(ValueNumberedSet& s, Value* v); bool processLoad(LoadInst* L, - DenseMap& lastLoad, - SmallVector& toErase); + DenseMap &lastLoad, + SmallVectorImpl &toErase); + bool processStore(StoreInst *SI, SmallVectorImpl &toErase); bool processInstruction(Instruction* I, ValueNumberedSet& currAvail, DenseMap& lastSeenLoad, - SmallVector& toErase); + SmallVectorImpl &toErase); bool processNonLocalLoad(LoadInst* L, - SmallVector& toErase); + SmallVectorImpl &toErase); bool processMemCpy(MemCpyInst* M, MemCpyInst* MDep, - SmallVector& toErase); + SmallVectorImpl &toErase); bool performCallSlotOptzn(MemCpyInst* cpy, CallInst* C, - SmallVector& toErase); + SmallVectorImpl &toErase); Value *GetValueForBlock(BasicBlock *BB, LoadInst* orig, DenseMap &Phis, bool top_level = false); @@ -694,9 +707,6 @@ FunctionPass *llvm::createGVNPass() { return new GVN(); } static RegisterPass X("gvn", "Global Value Numbering"); -STATISTIC(NumGVNInstr, "Number of instructions deleted"); -STATISTIC(NumGVNLoad, "Number of loads deleted"); - /// find_leader - Given a set and a value number, return the first /// element of the set with that value number, or 0 if no such element /// is present @@ -824,7 +834,7 @@ Value *GVN::GetValueForBlock(BasicBlock *BB, LoadInst* orig, /// processNonLocalLoad - Attempt to eliminate a load whose dependencies are /// non-local by performing PHI construction. bool GVN::processNonLocalLoad(LoadInst* L, - SmallVector& toErase) { + SmallVectorImpl &toErase) { MemoryDependenceAnalysis& MD = getAnalysis(); // Find the non-local dependencies of the load @@ -884,9 +894,8 @@ bool GVN::processNonLocalLoad(LoadInst* L, /// processLoad - Attempt to eliminate a load, first by eliminating it /// locally, and then attempting non-local elimination if that fails. -bool GVN::processLoad(LoadInst* L, - DenseMap& lastLoad, - SmallVector& toErase) { +bool GVN::processLoad(LoadInst *L, DenseMap &lastLoad, + SmallVectorImpl &toErase) { if (L->isVolatile()) { lastLoad[L->getPointerOperand()] = L; return false; @@ -984,11 +993,256 @@ bool GVN::processLoad(LoadInst* L, return deletedLoad; } +/// isBytewiseValue - If the specified value can be set by repeating the same +/// byte in memory, return the i8 value that it is represented with. This is +/// true for all i8 values obviously, but is also true for i32 0, i32 -1, +/// i16 0xF0F0, double 0.0 etc. If the value can't be handled with a repeated +/// byte store (e.g. i16 0x1234), return null. +static Value *isBytewiseValue(Value *V) { + // All byte-wide stores are splatable, even of arbitrary variables. + if (V->getType() == Type::Int8Ty) return V; + + // Constant float and double values can be handled as integer values if the + // corresponding integer value is "byteable". An important case is 0.0. + if (ConstantFP *CFP = dyn_cast(V)) { + if (CFP->getType() == Type::FloatTy) + V = ConstantExpr::getBitCast(CFP, Type::Int32Ty); + if (CFP->getType() == Type::DoubleTy) + V = ConstantExpr::getBitCast(CFP, Type::Int64Ty); + // Don't handle long double formats, which have strange constraints. + } + + // We can handle constant integers that are power of two in size and a + // multiple of 8 bits. + if (ConstantInt *CI = dyn_cast(V)) { + unsigned Width = CI->getBitWidth(); + if (isPowerOf2_32(Width) && Width > 8) { + // We can handle this value if the recursive binary decomposition is the + // same at all levels. + APInt Val = CI->getValue(); + APInt Val2; + while (Val.getBitWidth() != 8) { + unsigned NextWidth = Val.getBitWidth()/2; + Val2 = Val.lshr(NextWidth); + Val2.trunc(Val.getBitWidth()/2); + Val.trunc(Val.getBitWidth()/2); + + // If the top/bottom halves aren't the same, reject it. + if (Val != Val2) + return 0; + } + return ConstantInt::get(Val); + } + } + + // Conceptually, we could handle things like: + // %a = zext i8 %X to i16 + // %b = shl i16 %a, 8 + // %c = or i16 %a, %b + // but until there is an example that actually needs this, it doesn't seem + // worth worrying about. + return 0; +} + +static int64_t GetOffsetFromIndex(const GetElementPtrInst *GEP, unsigned Idx, + bool &VariableIdxFound, TargetData &TD) { + // Skip over the first indices. + gep_type_iterator GTI = gep_type_begin(GEP); + for (unsigned i = 1; i != Idx; ++i, ++GTI) + /*skip along*/; + + // Compute the offset implied by the rest of the indices. + int64_t Offset = 0; + for (unsigned i = Idx, e = GEP->getNumOperands(); i != e; ++i, ++GTI) { + ConstantInt *OpC = dyn_cast(GEP->getOperand(i)); + if (OpC == 0) + return VariableIdxFound = true; + if (OpC->isZero()) continue; // No offset. + + // Handle struct indices, which add their field offset to the pointer. + if (const StructType *STy = dyn_cast(*GTI)) { + Offset += TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue()); + continue; + } + + // Otherwise, we have a sequential type like an array or vector. Multiply + // the index by the ElementSize. + uint64_t Size = TD.getABITypeSize(GTI.getIndexedType()); + Offset += Size*OpC->getSExtValue(); + } + + return Offset; +} + +/// IsPointerAtOffset - Return true if Ptr1 is exactly provably equal to Ptr2 +/// plus the specified constant offset. For example, Ptr1 might be &A[42], and +/// Ptr2 might be &A[40] and Offset might be 8. +static bool IsPointerAtOffset(Value *Ptr1, Value *Ptr2, uint64_t Offset, + TargetData &TD) { + // Right now we handle the case when Ptr1/Ptr2 are both GEPs with an identical + // base. After that base, they may have some number of common (and + // potentially variable) indices. After that they handle some constant + // offset, which determines their offset from each other. At this point, we + // handle no other case. + GetElementPtrInst *GEP1 = dyn_cast(Ptr1); + GetElementPtrInst *GEP2 = dyn_cast(Ptr2); + if (!GEP1 || !GEP2 || GEP1->getOperand(0) != GEP2->getOperand(0)) + return false; + + // Skip any common indices and track the GEP types. + unsigned Idx = 1; + for (; Idx != GEP1->getNumOperands() && Idx != GEP2->getNumOperands(); ++Idx) + if (GEP1->getOperand(Idx) != GEP2->getOperand(Idx)) + break; + + bool VariableIdxFound = false; + int64_t Offset1 = GetOffsetFromIndex(GEP1, Idx, VariableIdxFound, TD); + int64_t Offset2 = GetOffsetFromIndex(GEP2, Idx, VariableIdxFound, TD); + if (VariableIdxFound) return false; + + return Offset1 == Offset2+(int64_t)Offset; +} + + +/// processStore - When GVN is scanning forward over instructions, we look for +/// some other patterns to fold away. In particular, this looks for stores to +/// neighboring locations of memory. If it sees enough consequtive ones +/// (currently 4) it attempts to merge them together into a memcpy/memset. +bool GVN::processStore(StoreInst *SI, SmallVectorImpl &toErase) { + if (!FormMemSet) return false; + if (SI->isVolatile()) return false; + + // There are two cases that are interesting for this code to handle: memcpy + // and memset. Right now we only handle memset. + + // Ensure that the value being stored is something that can be memset'able a + // byte at a time like "0" or "-1" or any width, as well as things like + // 0xA0A0A0A0 and 0.0. + Value *ByteVal = isBytewiseValue(SI->getOperand(0)); + if (!ByteVal) + return false; + + TargetData &TD = getAnalysis(); + AliasAnalysis &AA = getAnalysis(); + + // Okay, so we now have a single store that can be splatable. Try to 'grow' + // this store by looking for neighboring stores to the immediate left or right + // of the store we have so far. While we could in theory handle stores in + // this order: A[0], A[2], A[1] + // in practice, right now we only worry about cases where stores are + // consequtive in increasing or decreasing address order. + uint64_t BytesSoFar = TD.getTypeStoreSize(SI->getOperand(0)->getType()); + uint64_t BytesFromSI = 0; + unsigned StartAlign = SI->getAlignment(); + Value *StartPtr = SI->getPointerOperand(); + SmallVector Stores; + Stores.push_back(SI); + + BasicBlock::iterator BI = SI; + for (++BI; !isa(BI); ++BI) { + if (isa(BI) || isa(BI)) { + // If the call is readnone, ignore it, otherwise bail out. We don't even + // allow readonly here because we don't want something like: + // A[1] = 2; strlen(A); A[2] = 2; -> memcpy(A, ...); strlen(A). + if (AA.getModRefBehavior(CallSite::get(BI)) == + AliasAnalysis::DoesNotAccessMemory) + continue; + + // TODO: If this is a memset, try to join it in. + + break; + } else if (isa(BI) || isa(BI)) + break; + + // If this is a non-store instruction it is fine, ignore it. + StoreInst *NextStore = dyn_cast(BI); + if (NextStore == 0) continue; + + // If this is a store, see if we can merge it in. + if (NextStore->isVolatile()) break; + + // Check to see if this stored value is of the same byte-splattable value. + if (ByteVal != isBytewiseValue(NextStore->getOperand(0))) + break; + + Value *ThisPointer = NextStore->getPointerOperand(); + unsigned AccessSize = TD.getTypeStoreSize(SI->getOperand(0)->getType()); + + // If so, check to see if the store is before the current range or after it + // in either case, extend the range, otherwise reject it. + if (IsPointerAtOffset(ThisPointer, StartPtr, BytesSoFar, TD)) { + // Okay, this extends the stored area on the end, just add to the bytes + // so far and remember this store. + BytesSoFar += AccessSize; + Stores.push_back(NextStore); + continue; + } + + if (IsPointerAtOffset(StartPtr, ThisPointer, AccessSize, TD)) { + // Okay, the store is before the current range. Reset our start pointer + // and get new alignment info etc. + BytesSoFar += AccessSize; + BytesFromSI += AccessSize; + Stores.push_back(NextStore); + StartPtr = ThisPointer; + StartAlign = NextStore->getAlignment(); + continue; + } + + // Otherwise, this store wasn't contiguous with our current range, bail out. + break; + } + + // If we found less than 4 stores to merge, bail out, it isn't worth losing + // type information in llvm IR to do the transformation. + if (Stores.size() < 4) + return false; + + // Otherwise, we do want to transform this! Create a new memset. We put the + // memset right after the first store that we found in this block. This + // ensures that the caller will increment the iterator to the memset before + // it deletes all the stores. + BasicBlock::iterator InsertPt = SI; ++InsertPt; + + Function *F = Intrinsic::getDeclaration(SI->getParent()->getParent() + ->getParent(), Intrinsic::memset_i64); + + // StartPtr may not dominate the starting point. Instead of using it, base + // the destination pointer off the input to the first store in the block. + StartPtr = SI->getPointerOperand(); + + // Cast the start ptr to be i8* as memset requires. + const Type *i8Ptr = PointerType::getUnqual(Type::Int8Ty); + if (StartPtr->getType() != i8Ptr) + StartPtr = new BitCastInst(StartPtr, i8Ptr, StartPtr->getNameStart(), + InsertPt); + + // Offset the pointer if needed. + if (BytesFromSI) + StartPtr = new GetElementPtrInst(StartPtr, ConstantInt::get(Type::Int64Ty, + -BytesFromSI), + "ptroffset", InsertPt); + + Value *Ops[] = { + StartPtr, ByteVal, // Start, value + ConstantInt::get(Type::Int64Ty, BytesSoFar), // size + ConstantInt::get(Type::Int32Ty, StartAlign) // align + }; + new CallInst(F, Ops, Ops+4, "", InsertPt); + + // Zap all the stores. + toErase.append(Stores.begin(), Stores.end()); + + ++NumMemSetInfer; + return true; +} + + /// performCallSlotOptzn - takes a memcpy and a call that it depends on, /// and checks for the possibility of a call slot optimization by having /// the call write its result directly into the destination of the memcpy. -bool GVN::performCallSlotOptzn(MemCpyInst* cpy, CallInst* C, - SmallVector& toErase) { +bool GVN::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C, + SmallVectorImpl &toErase) { // The general transformation to keep in mind is // // call @func(..., src, ...) @@ -1066,7 +1320,6 @@ bool GVN::performCallSlotOptzn(MemCpyInst* cpy, CallInst* C, // guarantees that it holds only undefined values when passed in (so the final // memcpy can be dropped), that it is not read or written between the call and // the memcpy, and that writing beyond the end of it is undefined. - SmallVector srcUseList(srcAlloca->use_begin(), srcAlloca->use_end()); while (!srcUseList.empty()) { @@ -1124,7 +1377,7 @@ bool GVN::performCallSlotOptzn(MemCpyInst* cpy, CallInst* C, /// a memcpy from X to Z (or potentially a memmove, depending on circumstances). /// This allows later passes to remove the first memcpy altogether. bool GVN::processMemCpy(MemCpyInst* M, MemCpyInst* MDep, - SmallVector& toErase) { + SmallVectorImpl &toErase) { // We can only transforms memcpy's where the dest of one is the source of the // other if (M->getSource() != MDep->getDest()) @@ -1183,13 +1436,16 @@ bool GVN::processMemCpy(MemCpyInst* M, MemCpyInst* MDep, /// processInstruction - When calculating availability, handle an instruction /// by inserting it into the appropriate sets -bool GVN::processInstruction(Instruction* I, - ValueNumberedSet& currAvail, - DenseMap& lastSeenLoad, - SmallVector& toErase) { - if (LoadInst* L = dyn_cast(I)) { +bool GVN::processInstruction(Instruction *I, ValueNumberedSet &currAvail, + DenseMap &lastSeenLoad, + SmallVectorImpl &toErase) { + if (LoadInst* L = dyn_cast(I)) return processLoad(L, lastSeenLoad, toErase); - } else if (MemCpyInst* M = dyn_cast(I)) { + + if (StoreInst *SI = dyn_cast(I)) + return processStore(SI, toErase); + + if (MemCpyInst* M = dyn_cast(I)) { MemoryDependenceAnalysis& MD = getAnalysis(); // The are two possible optimizations we can do for memcpy: @@ -1287,15 +1543,16 @@ bool GVN::iterateOnFunction(Function &F) { DominatorTree &DT = getAnalysis(); SmallVector toErase; - + DenseMap lastSeenLoad; + // Top-down walk of the dominator tree for (df_iterator DI = df_begin(DT.getRootNode()), E = df_end(DT.getRootNode()); DI != E; ++DI) { // Get the set to update for this block ValueNumberedSet& currAvail = availableOut[DI->getBlock()]; - DenseMap lastSeenLoad; - + lastSeenLoad.clear(); + BasicBlock* BB = DI->getBlock(); // A block inherits AVAIL_OUT from its dominator