From fc091f40c912e078cee0cff708b8a2c824444bf1 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 24 Sep 2015 07:22:38 +0000 Subject: [PATCH] Use new TokenFactor chain when merging stores If the stores are storing values from loads which partially alias the stores, we could end up placing the merged loads and stores on the same chain which has the potential to break. Each store may have a different chain dependency on only some of the original loads. Create a new TokenFactor to capture all of the required dependencies of the stores rather than assuming all stores can use the same chain. The testcase is a situation where this happens, although it does not have an observable change from this. The DAG nodes just happened to not be reordered before despite this missing chain dependency. This is based on an off-list report for an out of tree target which regressed due to r246307 and I haven't managed to find a case where the nodes do end up reordered with an in tree target. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@248468 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 34 ++++++++++-- .../X86/merge-store-partially-alias-loads.ll | 53 +++++++++++++++++++ 2 files changed, 82 insertions(+), 5 deletions(-) create mode 100644 test/CodeGen/X86/merge-store-partially-alias-loads.ll diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index c9914fa0f17..ad60b8244f1 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -407,6 +407,7 @@ namespace { SDValue getMergedConstantVectorStore(SelectionDAG &DAG, SDLoc SL, ArrayRef Stores, + SmallVectorImpl &Chains, EVT Ty) const; /// This is a helper function for MergeConsecutiveStores. When the source @@ -10817,11 +10818,15 @@ struct BaseIndexOffset { SDValue DAGCombiner::getMergedConstantVectorStore(SelectionDAG &DAG, SDLoc SL, ArrayRef Stores, + SmallVectorImpl &Chains, EVT Ty) const { SmallVector BuildVector; - for (unsigned I = 0, E = Ty.getVectorNumElements(); I != E; ++I) - BuildVector.push_back(cast(Stores[I].MemNode)->getValue()); + for (unsigned I = 0, E = Ty.getVectorNumElements(); I != E; ++I) { + StoreSDNode *St = cast(Stores[I].MemNode); + Chains.push_back(St->getChain()); + BuildVector.push_back(St->getValue()); + } return DAG.getNode(ISD::BUILD_VECTOR, SL, Ty, BuildVector); } @@ -10846,6 +10851,8 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts( LatestNodeUsed = i; } + SmallVector Chains; + // The latest Node in the DAG. LSBaseSDNode *LatestOp = StoreNodes[LatestNodeUsed].MemNode; SDLoc DL(StoreNodes[0].MemNode); @@ -10863,7 +10870,7 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts( assert(TLI.isTypeLegal(Ty) && "Illegal vector store"); if (IsConstantSrc) { - StoredVal = getMergedConstantVectorStore(DAG, DL, StoreNodes, Ty); + StoredVal = getMergedConstantVectorStore(DAG, DL, StoreNodes, Chains, Ty); } else { SmallVector Ops; for (unsigned i = 0; i < NumStores; ++i) { @@ -10873,6 +10880,7 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts( if (Val.getValueType() != MemVT) return false; Ops.push_back(Val); + Chains.push_back(St->getChain()); } // Build the extracted vector elements back into a vector. @@ -10892,6 +10900,8 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts( for (unsigned i = 0; i < NumStores; ++i) { unsigned Idx = IsLE ? (NumStores - 1 - i) : i; StoreSDNode *St = cast(StoreNodes[Idx].MemNode); + Chains.push_back(St->getChain()); + SDValue Val = St->getValue(); StoreInt <<= ElementSizeBytes * 8; if (ConstantSDNode *C = dyn_cast(Val)) { @@ -10908,7 +10918,10 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts( StoredVal = DAG.getConstant(StoreInt, DL, StoreTy); } - SDValue NewStore = DAG.getStore(LatestOp->getChain(), DL, StoredVal, + assert(!Chains.empty()); + + SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains); + SDValue NewStore = DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(), FirstInChain->getPointerInfo(), false, false, @@ -11360,6 +11373,10 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { if (NumElem < 2) return false; + // Collect the chains from all merged stores. + SmallVector MergeStoreChains; + MergeStoreChains.push_back(StoreNodes[0].MemNode->getChain()); + // The latest Node in the DAG. unsigned LatestNodeUsed = 0; for (unsigned i=1; igetChain()); } LSBaseSDNode *LatestOp = StoreNodes[LatestNodeUsed].MemNode; @@ -11386,12 +11405,17 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { SDLoc LoadDL(LoadNodes[0].MemNode); SDLoc StoreDL(StoreNodes[0].MemNode); + // The merged loads are required to have the same chain, so using the first's + // chain is acceptable. SDValue NewLoad = DAG.getLoad( JointMemOpVT, LoadDL, FirstLoad->getChain(), FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(), false, false, false, FirstLoadAlign); + SDValue NewStoreChain = + DAG.getNode(ISD::TokenFactor, StoreDL, MVT::Other, MergeStoreChains); + SDValue NewStore = DAG.getStore( - LatestOp->getChain(), StoreDL, NewLoad, FirstInChain->getBasePtr(), + NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(), FirstInChain->getPointerInfo(), false, false, FirstStoreAlign); // Replace one of the loads with the new load. diff --git a/test/CodeGen/X86/merge-store-partially-alias-loads.ll b/test/CodeGen/X86/merge-store-partially-alias-loads.ll new file mode 100644 index 00000000000..97549b65a99 --- /dev/null +++ b/test/CodeGen/X86/merge-store-partially-alias-loads.ll @@ -0,0 +1,53 @@ +; REQUIRES: asserts +; RUN: llc -march=x86-64 -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck -check-prefix=X86 %s +; RUN: llc -march=x86-64 -mtriple=x86_64-unknown-linux-gnu -debug-only=isel < %s 2>&1 | FileCheck -check-prefix=DBGDAG %s + +; It's OK to merge the load / store of the first 2 components, but +; they must not be placed on the same chain after merging. + +; X86-LABEL: {{^}}merge_store_partial_overlap_load: +; X86-DAG: movw ([[BASEREG:%[a-z]+]]), [[LO2:%[a-z]+]] +; X86-DAG: movb 2([[BASEREG]]), [[HI1:%[a-z]+]] + +; X86-NEXT: movw [[LO2]], 1([[BASEREG]]) +; X86-NEXT: movb [[HI1]], 3([[BASEREG]]) +; X86-NEXT: retq + +; DBGDAG-LABEL: Optimized lowered selection DAG: BB#0 'merge_store_partial_overlap_load:' +; DBGDAG: [[ENTRYTOKEN:t[0-9]+]]: ch = EntryToken +; DBGDAG-DAG: [[TWO:t[0-9]+]]: i64 = Constant<2> +; DBGDAG-DAG: [[BASEPTR:t[0-9]+]]: i64,ch = CopyFromReg [[ENTRYTOKEN]], +; DBGDAG-DAG: [[ADDPTR:t[0-9]+]]: i64 = add [[BASEPTR]], [[TWO]] + +; DBGDAG-DAG: [[LD2:t[0-9]+]]: i16,ch = load [[ENTRYTOKEN]], [[BASEPTR]], t{{[0-9]+}} +; DBGDAG-DAG: [[LD1:t[0-9]+]]: i8,ch = load [[ENTRYTOKEN]], [[ADDPTR]], t{{[0-9]+}} + +; DBGDAG: [[LOADTOKEN:t[0-9]+]]: ch = TokenFactor [[LD2]]:1, [[LD1]]:1 + +; DBGDAG-DAG: [[ST2:t[0-9]+]]: ch = store [[LOADTOKEN]], [[LD2]], t{{[0-9]+}}, t{{[0-9]+}} +; DBGDAG-DAG: [[ST1:t[0-9]+]]: ch = store [[ST2]], [[LD1]], t{{[0-9]+}}, t{{[0-9]+}} +; DBGDAG: X86ISD::RET_FLAG [[ST1]], + +; DBGDAG: Type-legalized selection DAG: BB#0 'merge_store_partial_overlap_load:' +define void @merge_store_partial_overlap_load([4 x i8]* %tmp) { + %tmp8 = getelementptr inbounds [4 x i8], [4 x i8]* %tmp, i32 0, i8 0 + %tmp10 = getelementptr inbounds [4 x i8], [4 x i8]* %tmp, i32 0, i8 1 + %tmp12 = getelementptr inbounds [4 x i8], [4 x i8]* %tmp, i32 0, i8 2 + %tmp14 = getelementptr [4 x i8], [4 x i8]* %tmp, i32 0, i8 3 + + %tmp9 = load i8, i8* %tmp8, align 1 ; base + 0 + %tmp11 = load i8, i8* %tmp10, align 1 ; base + 1 + %tmp13 = load i8, i8* %tmp12, align 1 ; base + 2 + + store i8 %tmp9, i8* %tmp10, align 1 ; base + 1 + store i8 %tmp11, i8* %tmp12, align 1 ; base + 2 + store i8 %tmp13, i8* %tmp14, align 1 ; base + 3 + +; Should emit +; load base + 0, base + 1 +; store base + 1, base + 2 +; load base + 2 +; store base + 3 + + ret void +} -- 2.34.1