From 264b0361be31a06567513bbd7d9b7d974a4d064e Mon Sep 17 00:00:00 2001 From: Piotr Padlewski Date: Fri, 2 Oct 2015 22:12:22 +0000 Subject: [PATCH] inariant.group handling in GVN MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit The most important part required to make clang devirtualization works ( ͡°͜ʖ ͡°). The code is able to find non local dependencies, but unfortunatelly because the caller can only handle local dependencies, I had to add some restrictions to look for dependencies only in the same BB. http://reviews.llvm.org/D12992 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@249196 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../llvm/Analysis/MemoryDependenceAnalysis.h | 27 +- lib/Analysis/MemoryDependenceAnalysis.cpp | 69 ++++ .../InstCombineLoadStoreAlloca.cpp | 11 +- lib/Transforms/Scalar/GVN.cpp | 17 +- lib/Transforms/Scalar/MemCpyOptimizer.cpp | 8 +- lib/Transforms/Utils/Local.cpp | 15 +- lib/Transforms/Utils/SimplifyCFG.cpp | 9 +- lib/Transforms/Vectorize/BBVectorize.cpp | 9 +- test/Transforms/GVN/assume-equal.ll | 58 +++ test/Transforms/GVN/invariant.group.ll | 337 ++++++++++++++++++ 10 files changed, 521 insertions(+), 39 deletions(-) create mode 100644 test/Transforms/GVN/invariant.group.ll diff --git a/include/llvm/Analysis/MemoryDependenceAnalysis.h b/include/llvm/Analysis/MemoryDependenceAnalysis.h index 5793cf1a3b8..daa1ba91c07 100644 --- a/include/llvm/Analysis/MemoryDependenceAnalysis.h +++ b/include/llvm/Analysis/MemoryDependenceAnalysis.h @@ -397,12 +397,12 @@ namespace llvm { /// critical edges. void invalidateCachedPredecessors(); - /// getPointerDependencyFrom - Return the instruction on which a memory - /// location depends. If isLoad is true, this routine ignores may-aliases - /// with read-only operations. If isLoad is false, this routine ignores - /// may-aliases with reads from read-only locations. If possible, pass - /// the query instruction as well; this function may take advantage of - /// the metadata annotated to the query instruction to refine the result. + /// \brief Return the instruction on which a memory location depends. + /// If isLoad is true, this routine ignores may-aliases with read-only + /// operations. If isLoad is false, this routine ignores may-aliases + /// with reads from read-only locations. If possible, pass the query + /// instruction as well; this function may take advantage of the metadata + /// annotated to the query instruction to refine the result. /// /// Note that this is an uncached query, and thus may be inefficient. /// @@ -412,6 +412,21 @@ namespace llvm { BasicBlock *BB, Instruction *QueryInst = nullptr); + MemDepResult getSimplePointerDependencyFrom(const MemoryLocation &MemLoc, + bool isLoad, + BasicBlock::iterator ScanIt, + BasicBlock *BB, + Instruction *QueryInst); + + /// This analysis looks for other loads and stores with invariant.group + /// metadata and the same pointer operand. Returns Unknown if it does not + /// find anything, and Def if it can be assumed that 2 instructions load or + /// store the same value. + /// FIXME: This analysis works only on single block because of restrictions + /// at the call site. + MemDepResult getInvariantGroupPointerDependency(LoadInst *LI, + BasicBlock *BB); + /// getLoadLoadClobberFullWidthSize - This is a little bit of analysis that /// looks at a memory location for a load (specified by MemLocBase, Offs, /// and Size) and compares it against a load. If the specified load could diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp index 07fd9cfbfc1..ff4d55e9fd4 100644 --- a/lib/Analysis/MemoryDependenceAnalysis.cpp +++ b/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -380,6 +380,75 @@ MemDepResult MemoryDependenceAnalysis::getPointerDependencyFrom( const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt, BasicBlock *BB, Instruction *QueryInst) { + if (QueryInst != nullptr) { + if (auto *LI = dyn_cast(QueryInst)) { + MemDepResult invariantGroupDependency = + getInvariantGroupPointerDependency(LI, BB); + + if (invariantGroupDependency.isDef()) + return invariantGroupDependency; + } + } + return getSimplePointerDependencyFrom(MemLoc, isLoad, ScanIt, BB, QueryInst); +} + +MemDepResult +MemoryDependenceAnalysis::getInvariantGroupPointerDependency(LoadInst *LI, + BasicBlock *BB) { + Value *LoadOperand = LI->getPointerOperand(); + // It's is not safe to walk the use list of global value, because function + // passes aren't allowed to look outside their functions. + if (isa(LoadOperand)) + return MemDepResult::getUnknown(); + + auto *InvariantGroupMD = LI->getMetadata(LLVMContext::MD_invariant_group); + if (!InvariantGroupMD) + return MemDepResult::getUnknown(); + + MemDepResult Result = MemDepResult::getUnknown(); + llvm::SmallSet Seen; + // Queue to process all pointers that are equivalent to load operand. + llvm::SmallVector LoadOperandsQueue; + LoadOperandsQueue.push_back(LoadOperand); + while (!LoadOperandsQueue.empty()) { + Value *Ptr = LoadOperandsQueue.pop_back_val(); + if (isa(Ptr)) + continue; + + if (auto *BCI = dyn_cast(Ptr)) { + if (!Seen.count(BCI->getOperand(0))) { + LoadOperandsQueue.push_back(BCI->getOperand(0)); + Seen.insert(BCI->getOperand(0)); + } + } + + for (Use &Us : Ptr->uses()) { + auto *U = dyn_cast(Us.getUser()); + if (!U || U == LI || !DT->dominates(U, LI)) + continue; + + if (auto *BCI = dyn_cast(U)) { + if (!Seen.count(BCI)) { + LoadOperandsQueue.push_back(BCI); + Seen.insert(BCI); + } + continue; + } + // If we hit load/store with the same invariant.group metadata (and the + // same pointer operand) we can assume that value pointed by pointer + // operand didn't change. + if ((isa(U) || isa(U)) && U->getParent() == BB && + U->getMetadata(LLVMContext::MD_invariant_group) == InvariantGroupMD) + return MemDepResult::getDef(U); + } + } + return Result; +} + +MemDepResult MemoryDependenceAnalysis::getSimplePointerDependencyFrom( + const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt, + BasicBlock *BB, Instruction *QueryInst) { + const Value *MemLocBase = nullptr; int64_t MemLocOffset = 0; unsigned Limit = BlockScanLimit; diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp index 30478f4e7d0..3a955c8eff2 100644 --- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -755,13 +755,10 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { DefMaxInstsToScan, AA, &AATags)) { if (LoadInst *NLI = dyn_cast(AvailableVal)) { unsigned KnownIDs[] = { - LLVMContext::MD_tbaa, - LLVMContext::MD_alias_scope, - LLVMContext::MD_noalias, - LLVMContext::MD_range, - LLVMContext::MD_invariant_load, - LLVMContext::MD_nonnull, - }; + LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope, + LLVMContext::MD_noalias, LLVMContext::MD_range, + LLVMContext::MD_invariant_load, LLVMContext::MD_nonnull, + LLVMContext::MD_invariant_group}; combineMetadata(NLI, &LI, KnownIDs); }; diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp index 433d3038462..b7e2722bf38 100644 --- a/lib/Transforms/Scalar/GVN.cpp +++ b/lib/Transforms/Scalar/GVN.cpp @@ -1669,6 +1669,9 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock, if (Tags) NewLoad->setAAMetadata(Tags); + if (auto *InvGroupMD = LI->getMetadata(LLVMContext::MD_invariant_group)) + NewLoad->setMetadata(LLVMContext::MD_invariant_group, InvGroupMD); + // Transfer DebugLoc. NewLoad->setDebugLoc(LI->getDebugLoc()); @@ -1846,13 +1849,10 @@ static void patchReplacementInstruction(Instruction *I, Value *Repl) { // regions, and so we need a conservative combination of the noalias // scopes. static const unsigned KnownIDs[] = { - LLVMContext::MD_tbaa, - LLVMContext::MD_alias_scope, - LLVMContext::MD_noalias, - LLVMContext::MD_range, - LLVMContext::MD_fpmath, - LLVMContext::MD_invariant_load, - }; + LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope, + LLVMContext::MD_noalias, LLVMContext::MD_range, + LLVMContext::MD_fpmath, LLVMContext::MD_invariant_load, + LLVMContext::MD_invariant_group}; combineMetadata(ReplInst, I, KnownIDs); } } @@ -2106,6 +2106,8 @@ bool GVN::replaceOperandsWithConsts(Instruction *Instr) const { if (it != ReplaceWithConstMap.end()) { assert(!isa(Operand) && "Replacing constants with constants is invalid"); + DEBUG(dbgs() << "GVN replacing: " << *Operand << " with " << *it->second + << " in instruction " << *Instr << '\n'); Instr->setOperand(OpNum, it->second); Changed = true; } @@ -2461,7 +2463,6 @@ bool GVN::runOnFunction(Function& F) { return Changed; } - bool GVN::processBlock(BasicBlock *BB) { // FIXME: Kill off InstrsToErase by doing erasing eagerly in a helper function // (and incrementing BI before processing an instruction). diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp index 58ee7dffcad..4039c6f3534 100644 --- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -744,11 +744,9 @@ bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy, // Update AA metadata // FIXME: MD_tbaa_struct and MD_mem_parallel_loop_access should also be // handled here, but combineMetadata doesn't support them yet - unsigned KnownIDs[] = { - LLVMContext::MD_tbaa, - LLVMContext::MD_alias_scope, - LLVMContext::MD_noalias, - }; + unsigned KnownIDs[] = {LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope, + LLVMContext::MD_noalias, + LLVMContext::MD_invariant_group}; combineMetadata(C, cpy, KnownIDs); // Remove the memcpy. diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp index 1a74544045c..5be5a7df26b 100644 --- a/lib/Transforms/Utils/Local.cpp +++ b/lib/Transforms/Utils/Local.cpp @@ -1386,7 +1386,8 @@ bool llvm::removeUnreachableBlocks(Function &F) { return true; } -void llvm::combineMetadata(Instruction *K, const Instruction *J, ArrayRef KnownIDs) { +void llvm::combineMetadata(Instruction *K, const Instruction *J, + ArrayRef KnownIDs) { SmallVector, 4> Metadata; K->dropUnknownNonDebugMetadata(KnownIDs); K->getAllMetadataOtherThanDebugLoc(Metadata); @@ -1424,8 +1425,20 @@ void llvm::combineMetadata(Instruction *K, const Instruction *J, ArrayRefsetMetadata(Kind, JMD); break; + case LLVMContext::MD_invariant_group: + // Preserve !invariant.group in K. + break; } } + // Set !invariant.group from J if J has it. If both instructions have it + // then we will just pick it from J - even when they are different. + // Also make sure that K is load or store - f.e. combining bitcast with load + // could produce bitcast with invariant.group metadata, which is invalid. + // FIXME: we should try to preserve both invariant.group md if they are + // different, but right now instruction can only have one invariant.group. + if (auto *JMD = J->getMetadata(LLVMContext::MD_invariant_group)) + if (isa(K) || isa(K)) + K->setMetadata(LLVMContext::MD_invariant_group, JMD); } unsigned llvm::replaceDominatedUsesWith(Value *From, Value *To, diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index 403356931ca..e2bd46e85c3 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -1095,12 +1095,9 @@ static bool HoistThenElseCodeToIf(BranchInst *BI, I2->replaceAllUsesWith(I1); I1->intersectOptionalDataWith(I2); unsigned KnownIDs[] = { - LLVMContext::MD_tbaa, - LLVMContext::MD_range, - LLVMContext::MD_fpmath, - LLVMContext::MD_invariant_load, - LLVMContext::MD_nonnull - }; + LLVMContext::MD_tbaa, LLVMContext::MD_range, + LLVMContext::MD_fpmath, LLVMContext::MD_invariant_load, + LLVMContext::MD_nonnull, LLVMContext::MD_invariant_group}; combineMetadata(I1, I2, KnownIDs); I2->eraseFromParent(); Changed = true; diff --git a/lib/Transforms/Vectorize/BBVectorize.cpp b/lib/Transforms/Vectorize/BBVectorize.cpp index 3a36d68c388..deab903cd99 100644 --- a/lib/Transforms/Vectorize/BBVectorize.cpp +++ b/lib/Transforms/Vectorize/BBVectorize.cpp @@ -3120,12 +3120,9 @@ namespace { } else if (!isa(K)) K->mutateType(getVecTypeForPair(L->getType(), H->getType())); - unsigned KnownIDs[] = { - LLVMContext::MD_tbaa, - LLVMContext::MD_alias_scope, - LLVMContext::MD_noalias, - LLVMContext::MD_fpmath - }; + unsigned KnownIDs[] = {LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope, + LLVMContext::MD_noalias, LLVMContext::MD_fpmath, + LLVMContext::MD_invariant_group}; combineMetadata(K, H, KnownIDs); K->intersectOptionalDataWith(H); diff --git a/test/Transforms/GVN/assume-equal.ll b/test/Transforms/GVN/assume-equal.ll index 2f9c3942ca9..f9304a8fc7c 100644 --- a/test/Transforms/GVN/assume-equal.ll +++ b/test/Transforms/GVN/assume-equal.ll @@ -43,6 +43,63 @@ if.end: ; preds = %if.else, %if.then ret void } +; Check integration with invariant.group handling +; CHECK-LABEL: define void @invariantGroupHandling(i1 zeroext %p) { +define void @invariantGroupHandling(i1 zeroext %p) { +entry: + %call = tail call noalias i8* @_Znwm(i64 8) #4 + %0 = bitcast i8* %call to %struct.A* + tail call void @_ZN1AC1Ev(%struct.A* %0) #1 + %1 = bitcast i8* %call to i8*** + %vtable = load i8**, i8*** %1, align 8, !invariant.group !0 + %cmp.vtables = icmp eq i8** %vtable, getelementptr inbounds ([4 x i8*], [4 x i8*]* @_ZTV1A, i64 0, i64 2) + tail call void @llvm.assume(i1 %cmp.vtables) + br i1 %p, label %if.then, label %if.else + +if.then: ; preds = %entry + %vtable1.cast = bitcast i8** %vtable to i32 (%struct.A*)** + %2 = load i32 (%struct.A*)*, i32 (%struct.A*)** %vtable1.cast, align 8 + +; CHECK: call i32 @_ZN1A3fooEv( + %call2 = tail call i32 %2(%struct.A* %0) #1 + %vtable1 = load i8**, i8*** %1, align 8, !invariant.group !0 + %vtable2.cast = bitcast i8** %vtable1 to i32 (%struct.A*)** + %call1 = load i32 (%struct.A*)*, i32 (%struct.A*)** %vtable2.cast, align 8 +; FIXME: those loads could be also direct, but right now the invariant.group +; analysis works only on single block +; CHECK-NOT: call i32 @_ZN1A3fooEv( + %callx = tail call i32 %call1(%struct.A* %0) #1 + + %vtable2 = load i8**, i8*** %1, align 8, !invariant.group !0 + %vtable3.cast = bitcast i8** %vtable2 to i32 (%struct.A*)** + %call4 = load i32 (%struct.A*)*, i32 (%struct.A*)** %vtable3.cast, align 8 +; CHECK-NOT: call i32 @_ZN1A3fooEv( + %cally = tail call i32 %call4(%struct.A* %0) #1 + + %b = bitcast i8* %call to %struct.A** + %vtable3 = load %struct.A*, %struct.A** %b, align 8, !invariant.group !0 + %vtable4.cast = bitcast %struct.A* %vtable3 to i32 (%struct.A*)** + %vfun = load i32 (%struct.A*)*, i32 (%struct.A*)** %vtable4.cast, align 8 +; CHECK-NOT: call i32 @_ZN1A3fooEv( + %unknown = tail call i32 %vfun(%struct.A* %0) #1 + + br label %if.end + +if.else: ; preds = %entry + %vfn47 = getelementptr inbounds i8*, i8** %vtable, i64 1 + %vfn4 = bitcast i8** %vfn47 to i32 (%struct.A*)** + + ; CHECK: call i32 @_ZN1A3barEv( + %3 = load i32 (%struct.A*)*, i32 (%struct.A*)** %vfn4, align 8 + + %call5 = tail call i32 %3(%struct.A* %0) #1 + br label %if.end + +if.end: ; preds = %if.else, %if.then + ret void +} + + ; Checking const propagation in the same BB ; CHECK-LABEL: define i32 @main() @@ -175,3 +232,4 @@ declare void @llvm.assume(i1) declare i32 @_ZN1A3fooEv(%struct.A*) declare i32 @_ZN1A3barEv(%struct.A*) +!0 = !{!"struct A"} diff --git a/test/Transforms/GVN/invariant.group.ll b/test/Transforms/GVN/invariant.group.ll new file mode 100644 index 00000000000..f703fda93f2 --- /dev/null +++ b/test/Transforms/GVN/invariant.group.ll @@ -0,0 +1,337 @@ +; RUN: opt < %s -gvn -S | FileCheck %s + +%struct.A = type { i32 (...)** } +@_ZTV1A = available_externally unnamed_addr constant [3 x i8*] [i8* null, i8* bitcast (i8** @_ZTI1A to i8*), i8* bitcast (void (%struct.A*)* @_ZN1A3fooEv to i8*)], align 8 +@_ZTI1A = external constant i8* + +@unknownPtr = external global i8 + +; CHECK-LABEL: define i8 @simple() { +define i8 @simple() { +entry: + %ptr = alloca i8 + store i8 42, i8* %ptr, !invariant.group !0 + call void @foo(i8* %ptr) + + %a = load i8, i8* %ptr, !invariant.group !0 + %b = load i8, i8* %ptr, !invariant.group !0 + %c = load i8, i8* %ptr, !invariant.group !0 +; CHECK: ret i8 42 + ret i8 %a +} + +; CHECK-LABEL: define i8 @optimizable1() { +define i8 @optimizable1() { +entry: + %ptr = alloca i8 + store i8 42, i8* %ptr, !invariant.group !0 + %ptr2 = call i8* @llvm.invariant.group.barrier(i8* %ptr) + %a = load i8, i8* %ptr, !invariant.group !0 + + call void @foo(i8* %ptr2); call to use %ptr2 +; CHECK: ret i8 42 + ret i8 %a +} + +; CHECK-LABEL: define i8 @optimizable2() { +define i8 @optimizable2() { +entry: + %ptr = alloca i8 + store i8 42, i8* %ptr, !invariant.group !0 + call void @foo(i8* %ptr) + + store i8 13, i8* %ptr ; can't use this store with invariant.group + %a = load i8, i8* %ptr + call void @bar(i8 %a) ; call to use %a + + call void @foo(i8* %ptr) + %b = load i8, i8* %ptr, !invariant.group !0 + +; CHECK: ret i8 42 + ret i8 %b +} + +; CHECK-LABEL: define i8 @unoptimizable1() { +define i8 @unoptimizable1() { +entry: + %ptr = alloca i8 + store i8 42, i8* %ptr + call void @foo(i8* %ptr) + %a = load i8, i8* %ptr, !invariant.group !0 +; CHECK: ret i8 %a + ret i8 %a +} + +; CHECK-LABEL: define void @indirectLoads() { +define void @indirectLoads() { +entry: + %a = alloca %struct.A*, align 8 + %0 = bitcast %struct.A** %a to i8* + + %call = call i8* @getPointer(i8* null) + %1 = bitcast i8* %call to %struct.A* + call void @_ZN1AC1Ev(%struct.A* %1) + %2 = bitcast %struct.A* %1 to i8*** + +; CHECK: %vtable = load {{.*}} !invariant.group + %vtable = load i8**, i8*** %2, align 8, !invariant.group !2 + %cmp.vtables = icmp eq i8** %vtable, getelementptr inbounds ([3 x i8*], [3 x i8*]* @_ZTV1A, i64 0, i64 2) + call void @llvm.assume(i1 %cmp.vtables) + + store %struct.A* %1, %struct.A** %a, align 8 + %3 = load %struct.A*, %struct.A** %a, align 8 + %4 = bitcast %struct.A* %3 to void (%struct.A*)*** + +; CHECK: call void @_ZN1A3fooEv( + %vtable1 = load void (%struct.A*)**, void (%struct.A*)*** %4, align 8, !invariant.group !2 + %vfn = getelementptr inbounds void (%struct.A*)*, void (%struct.A*)** %vtable1, i64 0 + %5 = load void (%struct.A*)*, void (%struct.A*)** %vfn, align 8 + call void %5(%struct.A* %3) + %6 = load %struct.A*, %struct.A** %a, align 8 + %7 = bitcast %struct.A* %6 to void (%struct.A*)*** + +; CHECK: call void @_ZN1A3fooEv( + %vtable2 = load void (%struct.A*)**, void (%struct.A*)*** %7, align 8, !invariant.group !2 + %vfn3 = getelementptr inbounds void (%struct.A*)*, void (%struct.A*)** %vtable2, i64 0 + %8 = load void (%struct.A*)*, void (%struct.A*)** %vfn3, align 8 + + call void %8(%struct.A* %6) + %9 = load %struct.A*, %struct.A** %a, align 8 + %10 = bitcast %struct.A* %9 to void (%struct.A*)*** + + %vtable4 = load void (%struct.A*)**, void (%struct.A*)*** %10, align 8, !invariant.group !2 + %vfn5 = getelementptr inbounds void (%struct.A*)*, void (%struct.A*)** %vtable4, i64 0 + %11 = load void (%struct.A*)*, void (%struct.A*)** %vfn5, align 8 +; CHECK: call void @_ZN1A3fooEv( + call void %11(%struct.A* %9) + + %vtable5 = load i8**, i8*** %2, align 8, !invariant.group !2 + %vfn6 = getelementptr inbounds i8*, i8** %vtable5, i64 0 + %12 = bitcast i8** %vfn6 to void (%struct.A*)** + %13 = load void (%struct.A*)*, void (%struct.A*)** %12, align 8 +; CHECK: call void @_ZN1A3fooEv( + call void %13(%struct.A* %9) + + ret void +} + +; CHECK-LABEL: define void @combiningBitCastWithLoad() { +define void @combiningBitCastWithLoad() { +entry: + %a = alloca %struct.A*, align 8 + %0 = bitcast %struct.A** %a to i8* + + %call = call i8* @getPointer(i8* null) + %1 = bitcast i8* %call to %struct.A* + call void @_ZN1AC1Ev(%struct.A* %1) + %2 = bitcast %struct.A* %1 to i8*** + +; CHECK: %vtable = load {{.*}} !invariant.group + %vtable = load i8**, i8*** %2, align 8, !invariant.group !2 + %cmp.vtables = icmp eq i8** %vtable, getelementptr inbounds ([3 x i8*], [3 x i8*]* @_ZTV1A, i64 0, i64 2) + + store %struct.A* %1, %struct.A** %a, align 8 +; CHECK-NOT: !invariant.group + %3 = load %struct.A*, %struct.A** %a, align 8 + %4 = bitcast %struct.A* %3 to void (%struct.A*)*** + + %vtable1 = load void (%struct.A*)**, void (%struct.A*)*** %4, align 8, !invariant.group !2 + %vfn = getelementptr inbounds void (%struct.A*)*, void (%struct.A*)** %vtable1, i64 0 + %5 = load void (%struct.A*)*, void (%struct.A*)** %vfn, align 8 + call void %5(%struct.A* %3) + + ret void +} + +; CHECK-LABEL:define void @loadCombine() { +define void @loadCombine() { +enter: + %ptr = alloca i8 + store i8 42, i8* %ptr + call void @foo(i8* %ptr) +; CHECK: %[[A:.*]] = load i8, i8* %ptr, !invariant.group + %a = load i8, i8* %ptr, !invariant.group !0 +; CHECK-NOT: load + %b = load i8, i8* %ptr, !invariant.group !1 +; CHECK: call void @bar(i8 %[[A]]) + call void @bar(i8 %a) +; CHECK: call void @bar(i8 %[[A]]) + call void @bar(i8 %b) + ret void +} + +; CHECK-LABEL: define void @loadCombine1() { +define void @loadCombine1() { +enter: + %ptr = alloca i8 + store i8 42, i8* %ptr + call void @foo(i8* %ptr) +; CHECK: %[[D:.*]] = load i8, i8* %ptr, !invariant.group + %c = load i8, i8* %ptr +; CHECK-NOT: load + %d = load i8, i8* %ptr, !invariant.group !1 +; CHECK: call void @bar(i8 %[[D]]) + call void @bar(i8 %c) +; CHECK: call void @bar(i8 %[[D]]) + call void @bar(i8 %d) + ret void +} + +; CHECK-LABEL: define void @loadCombine2() { +define void @loadCombine2() { +enter: + %ptr = alloca i8 + store i8 42, i8* %ptr + call void @foo(i8* %ptr) +; CHECK: %[[E:.*]] = load i8, i8* %ptr, !invariant.group + %e = load i8, i8* %ptr, !invariant.group !1 +; CHECK-NOT: load + %f = load i8, i8* %ptr +; CHECK: call void @bar(i8 %[[E]]) + call void @bar(i8 %e) +; CHECK: call void @bar(i8 %[[E]]) + call void @bar(i8 %f) + ret void +} + +; CHECK-LABEL: define void @loadCombine3() { +define void @loadCombine3() { +enter: + %ptr = alloca i8 + store i8 42, i8* %ptr + call void @foo(i8* %ptr) +; CHECK: %[[E:.*]] = load i8, i8* %ptr, !invariant.group ![[OneMD:[0-9]]] + %e = load i8, i8* %ptr, !invariant.group !1 +; CHECK-NOT: load + %f = load i8, i8* %ptr, !invariant.group !1 +; CHECK: call void @bar(i8 %[[E]]) + call void @bar(i8 %e) +; CHECK: call void @bar(i8 %[[E]]) + call void @bar(i8 %f) + ret void +} + +; CHECK-LABEL: define i8 @unoptimizable2() { +define i8 @unoptimizable2() { +entry: + %ptr = alloca i8 + store i8 42, i8* %ptr + call void @foo(i8* %ptr) + %a = load i8, i8* %ptr + call void @foo(i8* %ptr) + %b = load i8, i8* %ptr, !invariant.group !0 + +; CHECK: ret i8 %a + ret i8 %a +} + +; CHECK-LABEL: define i8 @unoptimizable3() { +define i8 @unoptimizable3() { +entry: + %ptr = alloca i8 + store i8 42, i8* %ptr, !invariant.group !0 + %ptr2 = call i8* @getPointer(i8* %ptr) + %a = load i8, i8* %ptr2, !invariant.group !0 + +; CHECK: ret i8 %a + ret i8 %a +} + +; CHECK-LABEL: define i8 @unoptimizable4() { +define i8 @unoptimizable4() { +entry: + %ptr = alloca i8 + store i8 42, i8* %ptr, !invariant.group !0 + %ptr2 = call i8* @llvm.invariant.group.barrier(i8* %ptr) + %a = load i8, i8* %ptr2, !invariant.group !0 + +; CHECK: ret i8 %a + ret i8 %a +} + +; CHECK-LABEL: define i8 @volatile1() { +define i8 @volatile1() { +entry: + %ptr = alloca i8 + store i8 42, i8* %ptr, !invariant.group !0 + call void @foo(i8* %ptr) + %a = load i8, i8* %ptr, !invariant.group !0 + %b = load volatile i8, i8* %ptr +; CHECK: call void @bar(i8 %b) + call void @bar(i8 %b) + + %c = load volatile i8, i8* %ptr, !invariant.group !0 +; FIXME: we could change %c to 42, preserving volatile load +; CHECK: call void @bar(i8 %c) + call void @bar(i8 %c) +; CHECK: ret i8 42 + ret i8 %a +} + +; CHECK-LABEL: define i8 @volatile2() { +define i8 @volatile2() { +entry: + %ptr = alloca i8 + store i8 42, i8* %ptr, !invariant.group !0 + call void @foo(i8* %ptr) + %a = load i8, i8* %ptr, !invariant.group !0 + %b = load volatile i8, i8* %ptr +; CHECK: call void @bar(i8 %b) + call void @bar(i8 %b) + + %c = load volatile i8, i8* %ptr, !invariant.group !0 +; FIXME: we could change %c to 42, preserving volatile load +; CHECK: call void @bar(i8 %c) + call void @bar(i8 %c) +; CHECK: ret i8 42 + ret i8 %a +} + +; CHECK-LABEL: define i8 @fun() { +define i8 @fun() { +entry: + %ptr = alloca i8 + store i8 42, i8* %ptr, !invariant.group !0 + call void @foo(i8* %ptr) + + %a = load i8, i8* %ptr, !invariant.group !0 ; Can assume that value under %ptr didn't change +; CHECK: call void @bar(i8 42) + call void @bar(i8 %a) + + call void @foo(i8* %ptr) + %b = load i8, i8* %ptr, !invariant.group !1 ; Can't assume anything, because group changed +; CHECK: call void @bar(i8 %b) + call void @bar(i8 %b) + + %newPtr = call i8* @getPointer(i8* %ptr) + %c = load i8, i8* %newPtr, !invariant.group !0 ; Can't assume anything, because we only have information about %ptr +; CHECK: call void @bar(i8 %c) + call void @bar(i8 %c) + + %unknownValue = load i8, i8* @unknownPtr +; FIXME: Can assume that %unknownValue == 42 +; CHECK: store i8 %unknownValue, i8* %ptr, !invariant.group !0 + store i8 %unknownValue, i8* %ptr, !invariant.group !0 + + %newPtr2 = call i8* @llvm.invariant.group.barrier(i8* %ptr) + %d = load i8, i8* %newPtr2, !invariant.group !0 ; Can't step through invariant.group.barrier to get value of %ptr +; CHECK: ret i8 %d + ret i8 %d +} + +declare void @foo(i8*) +declare void @bar(i8) +declare i8* @getPointer(i8*) +declare void @_ZN1A3fooEv(%struct.A*) +declare void @_ZN1AC1Ev(%struct.A*) +declare i8* @llvm.invariant.group.barrier(i8*) + +; Function Attrs: nounwind +declare void @llvm.assume(i1 %cmp.vtables) #0 + + +attributes #0 = { nounwind } +; CHECK: ![[OneMD]] = !{!"other ptr"} +!0 = !{!"magic ptr"} +!1 = !{!"other ptr"} +!2 = !{!"vtable_of_a"} -- 2.34.1