[SeparateConstOffsetFromGEP] sext(a)+sext(b) => sext(a+b) when a+b can't sign-overflow.

author Jingyue Wu <jingyue@google.com>

Fri, 14 Aug 2015 02:02:05 +0000 (02:02 +0000)

committer Jingyue Wu <jingyue@google.com>

Fri, 14 Aug 2015 02:02:05 +0000 (02:02 +0000)
author Jingyue Wu <jingyue@google.com>
Fri, 14 Aug 2015 02:02:05 +0000 (02:02 +0000)
committer Jingyue Wu <jingyue@google.com>
Fri, 14 Aug 2015 02:02:05 +0000 (02:02 +0000)
diff --git a/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp

index ad86c8c282880f7ebb31b1496e914009a0b6486e..97950caa3deafa1ade92ad93e3e2c5c1f793f6bb 100644 (file)
--- a/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
+++ b/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
@@ -156,6 +156,7 @@
  //
  //===----------------------------------------------------------------------===//
  
+#include "llvm/Analysis/ScalarEvolution.h"
  #include "llvm/Analysis/TargetTransformInfo.h"
  #include "llvm/Analysis/ValueTracking.h"
  #include "llvm/IR/Constants.h"
@@ -164,6 +165,7 @@
  #include "llvm/IR/Instructions.h"
  #include "llvm/IR/LLVMContext.h"
  #include "llvm/IR/Module.h"
+#include "llvm/IR/PatternMatch.h"
  #include "llvm/IR/Operator.h"
  #include "llvm/Support/CommandLine.h"
  #include "llvm/Support/raw_ostream.h"
@@ -174,6 +176,7 @@
  #include "llvm/IR/IRBuilder.h"
  
  using namespace llvm;
+using namespace llvm::PatternMatch;
  
  static cl::opt<bool> DisableSeparateConstOffsetFromGEP(
      "disable-separate-const-offset-from-gep", cl::init(false),
@@ -319,6 +322,7 @@ public:
  
    void getAnalysisUsage(AnalysisUsage &AU) const override {
      AU.addRequired<DominatorTreeWrapperPass>();
+    AU.addRequired<ScalarEvolution>();
      AU.addRequired<TargetTransformInfoWrapperPass>();
      AU.setPreservesCFG();
    }
@@ -373,15 +377,32 @@ private:
    ///
    /// Verified in @i32_add in split-gep.ll
    bool canonicalizeArrayIndicesToPointerSize(GetElementPtrInst *GEP);
+  /// Optimize sext(a)+sext(b) to sext(a+b) when a+b can't sign overflow.
+  /// SeparateConstOffsetFromGEP distributes a sext to leaves before extracting
+  /// the constant offset. After extraction, it becomes desirable to reunion the
+  /// distributed sexts. For example,
+  ///
+  ///                              &a[sext(i +nsw (j +nsw 5)]
+  ///   => distribute              &a[sext(i) +nsw (sext(j) +nsw 5)]
+  ///   => constant extraction     &a[sext(i) + sext(j)] + 5
+  ///   => reunion                 &a[sext(i +nsw j)] + 5
+  bool reuniteExts(Function &F);
+  /// A helper that reunites sexts in an instruction.
+  bool reuniteExts(Instruction *I);
+  /// Find the closest dominator of <Dominatee> that is equivalent to <Key>.
+  Instruction *findClosestMatchingDominator(const SCEV *Key,
+                                            Instruction *Dominatee);
    /// Verify F is free of dead code.
    void verifyNoDeadCode(Function &F);
  
    const DataLayout *DL;
-  const DominatorTree *DT;
+  DominatorTree *DT;
+  ScalarEvolution *SE;
    const TargetMachine *TM;
    /// Whether to lower a GEP with multiple indices into arithmetic operations or
    /// multiple GEPs with a single index.
    bool LowerGEP;
+  DenseMap<const SCEV *, SmallVector<Instruction *, 2>> DominatingExprs;
  };
  }  // anonymous namespace
  
@@ -391,6 +412,7 @@ INITIALIZE_PASS_BEGIN(
      "Split GEPs to a variadic base and a constant offset for better CSE", false,
      false)
  INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
  INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
  INITIALIZE_PASS_END(
      SeparateConstOffsetFromGEP, "separate-const-offset-from-gep",
@@ -891,13 +913,13 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
    // Clear the inbounds attribute because the new index may be off-bound.
    // e.g.,
    //
-  // b     = add i64 a, 5
-  // addr  = gep inbounds float, float* p, i64 b
+  //   b     = add i64 a, 5
+  //   addr  = gep inbounds float, float* p, i64 b
    //
    // is transformed to:
    //
-  // addr2 = gep float, float* p, i64 a ; inbounds removed
-  // addr  = gep inbounds float, float* addr2, i64 5
+  //   addr2 = gep float, float* p, i64 a ; inbounds removed
+  //   addr  = gep inbounds float, float* addr2, i64 5
    //
    // If a is -4, although the old index b is in bounds, the new index a is
    // off-bound. http://llvm.org/docs/LangRef.html#id181 says "if the
@@ -1013,6 +1035,7 @@ bool SeparateConstOffsetFromGEP::runOnFunction(Function &F) {
      return false;
  
    DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+  SE = &getAnalysis<ScalarEvolution>();
  
    bool Changed = false;
    for (Function::iterator B = F.begin(), BE = F.end(); B != BE; ++B) {
@@ -1025,12 +1048,84 @@ bool SeparateConstOffsetFromGEP::runOnFunction(Function &F) {
      }
    }
  
+  Changed |= reuniteExts(F);
+
    if (VerifyNoDeadCode)
      verifyNoDeadCode(F);
  
    return Changed;
  }
  
+Instruction *SeparateConstOffsetFromGEP::findClosestMatchingDominator(
+    const SCEV *Key, Instruction *Dominatee) {
+  auto Pos = DominatingExprs.find(Key);
+  if (Pos == DominatingExprs.end())
+    return nullptr;
+
+  auto &Candidates = Pos->second;
+  // Because we process the basic blocks in pre-order of the dominator tree, a
+  // candidate that doesn't dominate the current instruction won't dominate any
+  // future instruction either. Therefore, we pop it out of the stack. This
+  // optimization makes the algorithm O(n).
+  while (!Candidates.empty()) {
+    Instruction *Candidate = Candidates.back();
+    if (DT->dominates(Candidate, Dominatee))
+      return Candidate;
+    Candidates.pop_back();
+  }
+  return nullptr;
+}
+
+bool SeparateConstOffsetFromGEP::reuniteExts(Instruction *I) {
+  if (!SE->isSCEVable(I->getType()))
+    return false;
+
+  //   Dom: LHS+RHS
+  //   I: sext(LHS)+sext(RHS)
+  // If Dom can't sign overflow and Dom dominates I, optimize I to sext(Dom).
+  // TODO: handle zext
+  Value *LHS = nullptr, *RHS = nullptr;
+  if (match(I, m_Add(m_SExt(m_Value(LHS)), m_SExt(m_Value(RHS)))) ||
+      match(I, m_Sub(m_SExt(m_Value(LHS)), m_SExt(m_Value(RHS))))) {
+    if (LHS->getType() == RHS->getType()) {
+      const SCEV *Key =
+          SE->getAddExpr(SE->getUnknown(LHS), SE->getUnknown(RHS));
+      if (auto *Dom = findClosestMatchingDominator(Key, I)) {
+        Instruction *NewSExt = new SExtInst(Dom, I->getType(), "", I);
+        NewSExt->takeName(I);
+        I->replaceAllUsesWith(NewSExt);
+        RecursivelyDeleteTriviallyDeadInstructions(I);
+        return true;
+      }
+    }
+  }
+
+  // Add I to DominatingExprs if it's an add/sub that can't sign overflow.
+  if (match(I, m_NSWAdd(m_Value(LHS), m_Value(RHS))) ||
+      match(I, m_NSWSub(m_Value(LHS), m_Value(RHS)))) {
+    if (isKnownNotFullPoison(I)) {
+      const SCEV *Key =
+          SE->getAddExpr(SE->getUnknown(LHS), SE->getUnknown(RHS));
+      DominatingExprs[Key].push_back(I);
+    }
+  }
+  return false;
+}
+
+bool SeparateConstOffsetFromGEP::reuniteExts(Function &F) {
+  bool Changed = false;
+  DominatingExprs.clear();
+  for (auto Node = GraphTraits<DominatorTree *>::nodes_begin(DT);
+       Node != GraphTraits<DominatorTree *>::nodes_end(DT); ++Node) {
+    BasicBlock *BB = Node->getBlock();
+    for (auto I = BB->begin(); I != BB->end(); ) {
+      Instruction *Cur = I++;
+      Changed |= reuniteExts(Cur);
+    }
+  }
+  return Changed;
+}
+
  void SeparateConstOffsetFromGEP::verifyNoDeadCode(Function &F) {
    for (auto &B : F) {
      for (auto &I : B) {
diff --git a/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep-and-gvn.ll b/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep-and-gvn.ll

index a0410024f6e7adc3d8bf01ce5d461bdd7d337b8f..e7b3545839c37fb1f46b1b851eea6e92e868c43f 100644 (file)
--- a/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep-and-gvn.ll
+++ b/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep-and-gvn.ll
@@ -194,3 +194,41 @@ define void @sum_of_array4(i32 %x, i32 %y, float* nocapture %output) {
  ; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i64 1
  ; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i64 32
  ; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i64 33
+
+
+; The source code is:
+;   p0 = &input[sext(x + y)];
+;   p1 = &input[sext(x + (y + 5))];
+;
+; Without reuniting extensions, SeparateConstOffsetFromGEP would emit
+;   p0 = &input[sext(x + y)];
+;   t1 = &input[sext(x) + sext(y)];
+;   p1 = &t1[5];
+;
+; With reuniting extensions, it merges p0 and t1 and thus emits
+;   p0 = &input[sext(x + y)];
+;   p1 = &p0[5];
+define void @reunion(i32 %x, i32 %y, float* %input) {
+; IR-LABEL: @reunion(
+; PTX-LABEL: reunion(
+entry:
+  %xy = add nsw i32 %x, %y
+  %0 = sext i32 %xy to i64
+  %p0 = getelementptr inbounds float, float* %input, i64 %0
+  %v0 = load float, float* %p0, align 4
+; PTX: ld.f32 %f{{[0-9]+}}, {{\[}}[[p0:%rd[0-9]+]]{{\]}}
+  call void @use(float %v0)
+
+  %y5 = add nsw i32 %y, 5
+  %xy5 = add nsw i32 %x, %y5
+  %1 = sext i32 %xy5 to i64
+  %p1 = getelementptr inbounds float, float* %input, i64 %1
+; IR: getelementptr inbounds float, float* %p0, i64 5
+  %v1 = load float, float* %p1, align 4
+; PTX: ld.f32 %f{{[0-9]+}}, {{\[}}[[p0]]+20{{\]}}
+  call void @use(float %v1)
+
+  ret void
+}
+
+declare void @use(float)
author	Jingyue Wu <jingyue@google.com>
	Fri, 14 Aug 2015 02:02:05 +0000 (02:02 +0000)
committer	Jingyue Wu <jingyue@google.com>
	Fri, 14 Aug 2015 02:02:05 +0000 (02:02 +0000)
lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp		patch \| blob \| history
test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep-and-gvn.ll		patch \| blob \| history