From 823b3b2fdb3c875231d5f2a11a7a771d8498f7af Mon Sep 17 00:00:00 2001 From: Geoff Berry Date: Wed, 6 Jan 2016 18:14:26 +0000 Subject: [PATCH] ScheduleDAGInstrs: Bug fix for missed memory dependency. Summary: In buildSchedGraph(), when adding memory dependencies for loads, move the call to adjustChainDeps() after the call to addChainDependency(AliasChain) to handle the case where addChainDependency(AliasChain) ends up not adding a dependency and instead putting the SU on the RejectMemNodes list. The call to adjustChainDeps() must be done after the call to addChainDependency() in order to process the SU added to the RejectMemNodes list to create memory dependencies for it. Reviewers: hfinkel, atrick, jonpa, resistor Subscribers: mcrosier, llvm-commits Differential Revision: http://reviews.llvm.org/D15927 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@256950 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/ScheduleDAGInstrs.cpp | 15 ++++++++++--- .../AArch64/arm64-misched-memdep-bug.ll | 22 +++++++++++++++++++ 2 files changed, 34 insertions(+), 3 deletions(-) create mode 100644 test/CodeGen/AArch64/arm64-misched-memdep-bug.ll diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp index fb82ab7a555..ea2361bfcb3 100644 --- a/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -1005,6 +1005,9 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU, I->second[i], RejectMemNodes, TrueMemOrderLatency); } + // This call must come after calls to addChainDependency() since it + // consumes the 'RejectMemNodes' list that addChainDependency() possibly + // adds to. adjustChainDeps(AA, MFI, MF.getDataLayout(), SU, &ExitSU, RejectMemNodes, TrueMemOrderLatency); PendingLoads.clear(); @@ -1086,6 +1089,9 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU, AliasChain, RejectMemNodes); } + // This call must come after calls to addChainDependency() since it + // consumes the 'RejectMemNodes' list that addChainDependency() possibly + // adds to. adjustChainDeps(AA, MFI, MF.getDataLayout(), SU, &ExitSU, RejectMemNodes, TrueMemOrderLatency); } else if (MI->mayLoad()) { @@ -1133,13 +1139,16 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, else NonAliasMemUses[V].push_back(SU); } - if (MayAlias) - adjustChainDeps(AA, MFI, MF.getDataLayout(), SU, &ExitSU, - RejectMemNodes, /*Latency=*/0); // Add dependencies on alias and barrier chains, if needed. if (MayAlias && AliasChain) addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU, AliasChain, RejectMemNodes); + if (MayAlias) + // This call must come after calls to addChainDependency() since it + // consumes the 'RejectMemNodes' list that addChainDependency() + // possibly adds to. + adjustChainDeps(AA, MFI, MF.getDataLayout(), SU, &ExitSU, + RejectMemNodes, /*Latency=*/0); if (BarrierChain) BarrierChain->addPred(SDep(SU, SDep::Barrier)); } diff --git a/test/CodeGen/AArch64/arm64-misched-memdep-bug.ll b/test/CodeGen/AArch64/arm64-misched-memdep-bug.ll new file mode 100644 index 00000000000..770521b7528 --- /dev/null +++ b/test/CodeGen/AArch64/arm64-misched-memdep-bug.ll @@ -0,0 +1,22 @@ +; REQUIRES: asserts +; RUN: llc < %s -mtriple=arm64-linux-gnu -mcpu=cortex-a57 -enable-misched -verify-misched -debug-only=misched -o - 2>&1 > /dev/null | FileCheck %s +; +; Test for bug in misched memory dependency calculation. +; +; CHECK: ********** MI Scheduling ********** +; CHECK: misched_bug:BB#0 entry +; CHECK: SU(2): %vreg2 = LDRWui %vreg0, 1; mem:LD4[%ptr1_plus1] GPR32:%vreg2 GPR64common:%vreg0 +; CHECK: Successors: +; CHECK-NEXT: val SU(5): Latency=4 Reg=%vreg2 +; CHECK-NEXT: ch SU(4): Latency=0 +; CHECK: SU(4): STRWui %WZR, %vreg1, 0; mem:ST4[%ptr2] GPR64common:%vreg1 +; CHECK: SU(5): %W0 = COPY %vreg2; GPR32:%vreg2 +; CHECK: ** ScheduleDAGMI::schedule picking next node +define i32 @misched_bug(i32* %ptr1, i32* %ptr2) { +entry: + %ptr1_plus1 = getelementptr inbounds i32, i32* %ptr1, i64 1 + %val1 = load i32, i32* %ptr1_plus1, align 4 + store i32 0, i32* %ptr1, align 4 + store i32 0, i32* %ptr2, align 4 + ret i32 %val1 +} -- 2.34.1