//===----------------------------------------------------------------------===//
#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Constants.h"
AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<ScalarEvolutionWrapperPass>();
AU.addRequired<TargetTransformInfoWrapperPass>();
+ AU.addRequired<LoopInfoWrapperPass>();
AU.setPreservesCFG();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
}
bool doInitialization(Module &M) override {
/// Verify F is free of dead code.
void verifyNoDeadCode(Function &F);
+ bool hasMoreThanOneUseInLoop(Value *v, Loop *L);
+ // Swap the index operand of two GEP.
+ void swapGEPOperand(GetElementPtrInst *First, GetElementPtrInst *Second);
+ // Check if it is safe to swap operand of two GEP.
+ bool isLegalToSwapOperand(GetElementPtrInst *First, GetElementPtrInst *Second,
+ Loop *CurLoop);
+
const DataLayout *DL;
DominatorTree *DT;
ScalarEvolution *SE;
const TargetMachine *TM;
+
+ LoopInfo *LI;
+ TargetLibraryInfo *TLI;
/// Whether to lower a GEP with multiple indices into arithmetic operations or
/// multiple GEPs with a single index.
bool LowerGEP;
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_END(
SeparateConstOffsetFromGEP, "separate-const-offset-from-gep",
"Split GEPs to a variadic base and a constant offset for better CSE", false,
Type *I8PtrTy =
Builder.getInt8PtrTy(Variadic->getType()->getPointerAddressSpace());
Value *ResultPtr = Variadic->getOperand(0);
+ Loop *L = LI->getLoopFor(Variadic->getParent());
+ // Check if the base is not loop invariant or used more than once.
+ bool isSwapCandidate =
+ L && L->isLoopInvariant(ResultPtr) &&
+ !hasMoreThanOneUseInLoop(ResultPtr, L);
+ Value *FirstResult = nullptr;
+
if (ResultPtr->getType() != I8PtrTy)
ResultPtr = Builder.CreateBitCast(ResultPtr, I8PtrTy);
// Create an ugly GEP with a single index for each index.
ResultPtr =
Builder.CreateGEP(Builder.getInt8Ty(), ResultPtr, Idx, "uglygep");
+ if (FirstResult == nullptr)
+ FirstResult = ResultPtr;
}
}
Value *Offset = ConstantInt::get(IntPtrTy, AccumulativeByteOffset);
ResultPtr =
Builder.CreateGEP(Builder.getInt8Ty(), ResultPtr, Offset, "uglygep");
- }
+ } else
+ isSwapCandidate = false;
+
+ // If we created a GEP with constant index, and the base is loop invariant,
+ // then we swap the first one with it, so LICM can move constant GEP out
+ // later.
+ GetElementPtrInst *FirstGEP = dyn_cast<GetElementPtrInst>(FirstResult);
+ GetElementPtrInst *SecondGEP = dyn_cast<GetElementPtrInst>(ResultPtr);
+ if (isSwapCandidate && isLegalToSwapOperand(FirstGEP, SecondGEP, L))
+ swapGEPOperand(FirstGEP, SecondGEP);
+
if (ResultPtr->getType() != Variadic->getType())
ResultPtr = Builder.CreateBitCast(ResultPtr, Variadic->getType());
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
-
+ LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
bool Changed = false;
for (Function::iterator B = F.begin(), BE = F.end(); B != BE; ++B) {
- for (BasicBlock::iterator I = B->begin(), IE = B->end(); I != IE; ) {
- if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I++)) {
+ for (BasicBlock::iterator I = B->begin(), IE = B->end(); I != IE;)
+ if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I++))
Changed |= splitGEP(GEP);
- }
- // No need to split GEP ConstantExprs because all its indices are constant
- // already.
- }
+ // No need to split GEP ConstantExprs because all its indices are constant
+ // already.
}
Changed |= reuniteExts(F);
}
}
}
+
+bool SeparateConstOffsetFromGEP::isLegalToSwapOperand(
+ GetElementPtrInst *FirstGEP, GetElementPtrInst *SecondGEP, Loop *CurLoop) {
+ if (!FirstGEP || !FirstGEP->hasOneUse())
+ return false;
+
+ if (!SecondGEP || FirstGEP->getParent() != SecondGEP->getParent())
+ return false;
+
+ if (FirstGEP == SecondGEP)
+ return false;
+
+ unsigned FirstNum = FirstGEP->getNumOperands();
+ unsigned SecondNum = SecondGEP->getNumOperands();
+ // Give up if the number of operands are not 2.
+ if (FirstNum != SecondNum || FirstNum != 2)
+ return false;
+
+ Value *FirstBase = FirstGEP->getOperand(0);
+ Value *SecondBase = SecondGEP->getOperand(0);
+ Value *FirstOffset = FirstGEP->getOperand(1);
+ // Give up if the index of the first GEP is loop invariant.
+ if (CurLoop->isLoopInvariant(FirstOffset))
+ return false;
+
+ // Give up if base doesn't have same type.
+ if (FirstBase->getType() != SecondBase->getType())
+ return false;
+
+ Instruction *FirstOffsetDef = dyn_cast<Instruction>(FirstOffset);
+
+ // Check if the second operand of first GEP has constant coefficient.
+ // For an example, for the following code, we won't gain anything by
+ // hoisting the second GEP out because the second GEP can be folded away.
+ // %scevgep.sum.ur159 = add i64 %idxprom48.ur, 256
+ // %67 = shl i64 %scevgep.sum.ur159, 2
+ // %uglygep160 = getelementptr i8* %65, i64 %67
+ // %uglygep161 = getelementptr i8* %uglygep160, i64 -1024
+
+ // Skip constant shift instruction which may be generated by Splitting GEPs.
+ if (FirstOffsetDef && FirstOffsetDef->isShift() &&
+ dyn_cast<ConstantInt>(FirstOffsetDef->getOperand(1)))
+ FirstOffsetDef = dyn_cast<Instruction>(FirstOffsetDef->getOperand(0));
+
+ // Give up if FirstOffsetDef is an Add or Sub with constant.
+ // Because it may not profitable at all due to constant folding.
+ if (FirstOffsetDef)
+ if (BinaryOperator *BO = dyn_cast<BinaryOperator>(FirstOffsetDef)) {
+ unsigned opc = BO->getOpcode();
+ if ((opc == Instruction::Add || opc == Instruction::Sub) &&
+ (dyn_cast<ConstantInt>(BO->getOperand(0)) ||
+ dyn_cast<ConstantInt>(BO->getOperand(1))))
+ return false;
+ }
+ return true;
+}
+
+bool SeparateConstOffsetFromGEP::hasMoreThanOneUseInLoop(Value *V, Loop *L) {
+ int UsesInLoop = 0;
+ for (User *U : V->users()) {
+ if (Instruction *User = dyn_cast<Instruction>(U))
+ if (L->contains(User))
+ if (++UsesInLoop > 1)
+ return true;
+ }
+ return false;
+}
+
+void SeparateConstOffsetFromGEP::swapGEPOperand(GetElementPtrInst *First,
+ GetElementPtrInst *Second) {
+ Value *Offset1 = First->getOperand(1);
+ Value *Offset2 = Second->getOperand(1);
+ First->setOperand(1, Offset2);
+ Second->setOperand(1, Offset1);
+
+ // We changed p+o+c to p+c+o, p+c may not be inbound anymore.
+ const DataLayout &DAL = First->getModule()->getDataLayout();
+ APInt Offset(DAL.getPointerSizeInBits(
+ cast<PointerType>(First->getType())->getAddressSpace()),
+ 0);
+ Value *NewBase =
+ First->stripAndAccumulateInBoundsConstantOffsets(DAL, Offset);
+ uint64_t ObjectSize;
+ if (!getObjectSize(NewBase, ObjectSize, DAL, TLI) ||
+ Offset.ugt(ObjectSize)) {
+ First->setIsInBounds(false);
+ Second->setIsInBounds(false);
+ } else
+ First->setIsInBounds(true);
+}
--- /dev/null
+; RUN: llc -O3 -aarch64-gep-opt=true -print-after=codegenprepare -mcpu=cortex-a53 < %s >%t 2>&1 && FileCheck <%t %s
+; REQUIRES: asserts
+target triple = "aarch64--linux-android"
+
+%typeD = type { i32, i32, [256 x i32], [257 x i32] }
+
+; Function Attrs: noreturn nounwind uwtable
+define i32 @test1(%typeD* nocapture %s) {
+entry:
+; CHECK-LABEL: entry:
+; CHECK: %uglygep = getelementptr i8, i8* %0, i64 1032
+; CHECK: br label %do.body.i
+
+
+ %tPos = getelementptr inbounds %typeD, %typeD* %s, i64 0, i32 0
+ %k0 = getelementptr inbounds %typeD, %typeD* %s, i64 0, i32 1
+ %.pre = load i32, i32* %tPos, align 4
+ br label %do.body.i
+
+do.body.i:
+; CHECK-LABEL: do.body.i:
+; CHECK: %uglygep2 = getelementptr i8, i8* %uglygep, i64 %3
+; CHECK-NEXT: %4 = bitcast i8* %uglygep2 to i32*
+; CHECK-NOT: %uglygep2 = getelementptr i8, i8* %uglygep, i64 1032
+
+
+ %0 = phi i32 [ 256, %entry ], [ %.be, %do.body.i.backedge ]
+ %1 = phi i32 [ 0, %entry ], [ %.be6, %do.body.i.backedge ]
+ %add.i = add nsw i32 %1, %0
+ %shr.i = ashr i32 %add.i, 1
+ %idxprom.i = sext i32 %shr.i to i64
+ %arrayidx.i = getelementptr inbounds %typeD, %typeD* %s, i64 0, i32 3, i64 %idxprom.i
+ %2 = load i32, i32* %arrayidx.i, align 4
+ %cmp.i = icmp sle i32 %2, %.pre
+ %na.1.i = select i1 %cmp.i, i32 %0, i32 %shr.i
+ %nb.1.i = select i1 %cmp.i, i32 %shr.i, i32 %1
+ %sub.i = sub nsw i32 %na.1.i, %nb.1.i
+ %cmp1.i = icmp eq i32 %sub.i, 1
+ br i1 %cmp1.i, label %fooo.exit, label %do.body.i.backedge
+
+do.body.i.backedge:
+ %.be = phi i32 [ %na.1.i, %do.body.i ], [ 256, %fooo.exit ]
+ %.be6 = phi i32 [ %nb.1.i, %do.body.i ], [ 0, %fooo.exit ]
+ br label %do.body.i
+
+fooo.exit: ; preds = %do.body.i
+ store i32 %nb.1.i, i32* %k0, align 4
+ br label %do.body.i.backedge
+}
+