Add comment.

[oota-llvm.git] / lib / Transforms / Scalar / GVN.cpp
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp

index 4de0f3dc7254109ea654a5671e07ee7d51889517..8bb811c45b61976707373e921e733d5789e51b98 100644 (file)
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -34,6 +34,7 @@
  #include "llvm/Analysis/MemoryDependenceAnalysis.h"
  #include "llvm/Support/CFG.h"
  #include "llvm/Support/Compiler.h"
+#include "llvm/Target/TargetData.h"
  using namespace llvm;
  
  //===----------------------------------------------------------------------===//
@@ -173,17 +174,17 @@ template <> struct DenseMapInfo<Expression> {
      hash = e.secondVN + hash * 37;
      hash = e.thirdVN + hash * 37;
      
-    hash = (unsigned)((uintptr_t)e.type >> 4) ^
-            (unsigned)((uintptr_t)e.type >> 9) +
-            hash * 37;
+    hash = ((unsigned)((uintptr_t)e.type >> 4) ^
+            (unsigned)((uintptr_t)e.type >> 9)) +
+           hash * 37;
      
      for (SmallVector<uint32_t, 4>::const_iterator I = e.varargs.begin(),
           E = e.varargs.end(); I != E; ++I)
        hash = *I + hash * 37;
      
-    hash = (unsigned)((uintptr_t)e.function >> 4) ^
-            (unsigned)((uintptr_t)e.function >> 9) +
-            hash * 37;
+    hash = ((unsigned)((uintptr_t)e.function >> 4) ^
+            (unsigned)((uintptr_t)e.function >> 9)) +
+           hash * 37;
      
      return hash;
    }
@@ -721,8 +722,10 @@ namespace {
        AU.addRequired<DominatorTree>();
        AU.addRequired<MemoryDependenceAnalysis>();
        AU.addRequired<AliasAnalysis>();
+      AU.addRequired<TargetData>();
        AU.addPreserved<AliasAnalysis>();
        AU.addPreserved<MemoryDependenceAnalysis>();
+      AU.addPreserved<TargetData>();
      }
    
      // Helper fuctions
@@ -749,6 +752,8 @@ namespace {
      bool iterateOnFunction(Function &F);
      Value* CollapsePhi(PHINode* p);
      bool isSafeReplacement(PHINode* p, Instruction* inst);
+    bool valueHasOnlyOneUseAfter(Value* val, MemCpyInst* use,
+                                 Instruction* cutoff);
    };
    
    char GVN::ID = 0;
@@ -1052,52 +1057,114 @@ bool GVN::processLoad(LoadInst* L,
    return deletedLoad;
  }
  
+/// valueHasOnlyOneUse - Returns true if a value has only one use after the
+/// cutoff that is in the current same block and is the same as the use
+/// parameter.
+bool GVN::valueHasOnlyOneUseAfter(Value* val, MemCpyInst* use,
+                                  Instruction* cutoff) {
+  DominatorTree& DT = getAnalysis<DominatorTree>();
+  
+  SmallVector<User*, 8> useList(val->use_begin(), val->use_end());
+  while (!useList.empty()) {
+    User* UI = useList.back();
+    
+    
+    if (isa<GetElementPtrInst>(UI) || isa<BitCastInst>(UI)) {
+      useList.pop_back();
+      for (User::use_iterator I = UI->use_begin(), E = UI->use_end();
+           I != E; ++I)
+        useList.push_back(*I);
+    } else if (UI == use) {
+      useList.pop_back();
+    } else if (Instruction* inst = dyn_cast<Instruction>(UI)) {
+      if (inst->getParent() == use->getParent() &&
+          (inst == cutoff || !DT.dominates(cutoff, inst))) {
+        useList.pop_back();
+      } else
+        return false;
+    } else
+      return false;
+  }
+  
+  return true;
+}
+
  /// performReturnSlotOptzn - takes a memcpy and a call that it depends on,
  /// and checks for the possibility of a return slot optimization by having
  /// the call write its result directly into the callees return parameter
  /// rather than using memcpy
  bool GVN::performReturnSlotOptzn(MemCpyInst* cpy, CallInst* C,
                                   SmallVector<Instruction*, 4>& toErase) {
-  // Check that we're copying to an argument...
+  // Deliberately get the source and destination with bitcasts stripped away,
+  // because we'll need to do type comparisons based on the underlying type.
    Value* cpyDest = cpy->getDest();
-  if (!isa<Argument>(cpyDest))
-    return false;
+  Value* cpySrc = cpy->getSource();
+  CallSite CS = CallSite::get(C);
    
-  // And that the argument is the return slot
-  Argument* sretArg = cast<Argument>(cpyDest);
-  if (!sretArg->hasStructRetAttr())
+  // Since this is a return slot optimization, we need to make sure that
+  // the value being copied is, in fact, in a return slot.  We also need to
+  // check that the return slot parameter is marked noalias, so that we can
+  // be sure that changing it will not cause unexpected behavior changes due
+  // to it being accessed through a global or another parameter.
+  if (CS.arg_size() == 0 ||
+      cpySrc != CS.getArgument(0) ||
+      !CS.paramHasAttr(1, ParamAttr::NoAlias | ParamAttr::StructRet))
      return false;
    
-  // Make sure the return slot is otherwise dead
-  std::set<User*> useList(sretArg->use_begin(), sretArg->use_end());
-  while (!useList.empty()) {
-    User* UI = *useList.begin();
-    
-    if (isa<GetElementPtrInst>(UI) || isa<BitCastInst>(UI)) {
-      useList.insert(UI->use_begin(), UI->use_end());
-      useList.erase(UI);
-    } else if (UI == cpy)
-      useList.erase(UI);
-    else
+  // Since we're changing the parameter to the callsite, we need to make sure
+  // that what would be the new parameter dominates the callsite.
+  DominatorTree& DT = getAnalysis<DominatorTree>();
+  if (Instruction* cpyDestInst = dyn_cast<Instruction>(cpyDest))
+    if (!DT.dominates(cpyDestInst, C))
        return false;
-  }
    
-  // Make sure the call cannot modify the return slot in some unpredicted way
-  AliasAnalysis& AA = getAnalysis<AliasAnalysis>();
-  if (AA.getModRefInfo(C, cpy->getRawDest(), ~0UL) != AliasAnalysis::NoModRef)
+  // Check that something sneaky is not happening involving casting
+  // return slot types around.
+  if (CS.getArgument(0)->getType() != cpyDest->getType())
      return false;
+  // sret --> pointer
+  const PointerType* PT = cast<PointerType>(cpyDest->getType()); 
    
-  // If all checks passed, then we can perform the transformation
-  CallSite CS = CallSite::get(C);
-  if (CS.getArgument(0)->getType() != cpyDest->getType())
+  // We can only perform the transformation if the size of the memcpy
+  // is constant and equal to the size of the structure.
+  ConstantInt* cpyLength = dyn_cast<ConstantInt>(cpy->getLength());
+  if (!cpyLength)
      return false;
-      
+  
+  TargetData& TD = getAnalysis<TargetData>();
+  if (TD.getTypeStoreSize(PT->getElementType()) != cpyLength->getZExtValue())
+    return false;
+  
+  // For safety, we must ensure that the output parameter of the call only has
+  // a single use, the memcpy.  Otherwise this can introduce an invalid
+  // transformation.
+  if (!valueHasOnlyOneUseAfter(CS.getArgument(0), cpy, C))
+    return false;
+  
+  // We only perform the transformation if it will be profitable. 
+  if (!valueHasOnlyOneUseAfter(cpyDest, cpy, C))
+    return false;
+  
+  // In addition to knowing that the call does not access the return slot
+  // in some unexpected manner, which we derive from the noalias attribute,
+  // we also need to know that it does not sneakily modify the destination
+  // slot in the caller.  We don't have parameter attributes to go by
+  // for this one, so we just rely on AA to figure it out for us.
+  AliasAnalysis& AA = getAnalysis<AliasAnalysis>();
+  if (AA.getModRefInfo(C, cpy->getRawDest(), cpyLength->getZExtValue()) !=
+      AliasAnalysis::NoModRef)
+    return false;
+  
+  // If all the checks have passed, then we're alright to do the transformation.
    CS.setArgument(0, cpyDest);
    
+  // Drop any cached information about the call, because we may have changed
+  // its dependence information by changing its parameter.
    MemoryDependenceAnalysis& MD = getAnalysis<MemoryDependenceAnalysis>();
    MD.dropInstruction(C);
    
    // Remove the memcpy
+  MD.removeInstruction(cpy);
    toErase.push_back(cpy);
    
    return true;
@@ -1121,8 +1188,8 @@ bool GVN::processMemCpy(MemCpyInst* M, MemCpyInst* MDep,
    if (!C1 || !C2)
      return false;
    
-  uint64_t CpySize = C1->getValue().getZExtValue();
-  uint64_t DepSize = C2->getValue().getZExtValue();
+  uint64_t DepSize = C1->getValue().getZExtValue();
+  uint64_t CpySize = C2->getValue().getZExtValue();
    
    if (DepSize < CpySize)
      return false;
@@ -1183,13 +1250,11 @@ bool GVN::processInstruction(Instruction* I,
      if (dep == MemoryDependenceAnalysis::None ||
          dep == MemoryDependenceAnalysis::NonLocal)
        return false;
-    else if (CallInst* C = dyn_cast<CallInst>(dep)) {
-      if (!isa<MemCpyInst>(C))
-        return performReturnSlotOptzn(M, C, toErase);
-    } else if (!isa<MemCpyInst>(dep))
-      return false;
-    
-    return processMemCpy(M, cast<MemCpyInst>(dep), toErase);
+    if (MemCpyInst *MemCpy = dyn_cast<MemCpyInst>(dep))
+      return processMemCpy(M, MemCpy, toErase);
+    if (CallInst* C = dyn_cast<CallInst>(dep))
+      return performReturnSlotOptzn(M, C, toErase);
+    return false;
    }
    
    unsigned num = VN.lookup_or_add(I);