From 73282018a19a0a4d4fc8ac44d6a2040dc05fcadb Mon Sep 17 00:00:00 2001 From: Adam Nemet Date: Mon, 12 May 2014 23:00:03 +0000 Subject: [PATCH] [DAGCombiner] Split up an indexed load if only the base pointer value is live Right now the load may not get DCE'd because of the side-effect of updating the base pointer. This can happen if we lower a read-modify-write of an illegal larger type (e.g. i48) such that the modification only affects one of the subparts (the lower i32 part but not the higher i16 part). See the testcase. In order to spot the dead load we need to revisit it when SimplifyDemandedBits decided that the value of the load is masked off. This is the CommitTargetLoweringOpt piece. I checked compile time with ARM64 by sending SPEC bitcode files through llc. No measurable change. Fixes git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@208640 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 37 +++++++++++++++---- .../ARM64/dagcombiner-dead-indexed-load.ll | 29 +++++++++++++++ 2 files changed, 59 insertions(+), 7 deletions(-) create mode 100644 test/CodeGen/ARM64/dagcombiner-dead-indexed-load.ll diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index de715e4791e..26013cdb0b8 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -167,6 +167,7 @@ namespace { bool CombineToPreIndexedLoadStore(SDNode *N); bool CombineToPostIndexedLoadStore(SDNode *N); + SDValue SplitIndexingFromLoad(LoadSDNode *LD); bool SliceUpLoad(SDNode *N); void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad); @@ -761,10 +762,14 @@ CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) { // If the operands of this node are only used by the node, they will now // be dead. Make sure to visit them first to delete dead nodes early. - for (unsigned i = 0, e = TLO.Old.getNode()->getNumOperands(); i != e; ++i) - if (TLO.Old.getNode()->getOperand(i).getNode()->hasOneUse()) - AddToWorkList(TLO.Old.getNode()->getOperand(i).getNode()); - + for (unsigned i = 0, e = TLO.Old.getNode()->getNumOperands(); i != e; ++i) { + SDNode *Op = TLO.Old.getNode()->getOperand(i).getNode(); + // For an operand generating multiple values, one of the values may + // become dead allowing further simplification (e.g. split index + // arithmetic from an indexed load). + if (Op->hasOneUse() || Op->getNumValues() > 1) + AddToWorkList(Op); + } DAG.DeleteNode(TLO.Old.getNode()); } } @@ -7844,6 +7849,17 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { return false; } +/// \brief Return the base-pointer arithmetic from an indexed \p LD. +SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) { + ISD::MemIndexedMode AM = LD->getAddressingMode(); + assert(AM != ISD::UNINDEXED); + SDValue BP = LD->getOperand(1); + SDValue Inc = LD->getOperand(2); + unsigned Opc = + (AM == ISD::PRE_INC || AM == ISD::POST_INC ? ISD::ADD : ISD::SUB); + return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc); +} + SDValue DAGCombiner::visitLOAD(SDNode *N) { LoadSDNode *LD = cast(N); SDValue Chain = LD->getChain(); @@ -7880,8 +7896,16 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { } else { // Indexed loads. assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?"); - if (!N->hasAnyUseOfValue(0) && !N->hasAnyUseOfValue(1)) { + if (!N->hasAnyUseOfValue(0)) { SDValue Undef = DAG.getUNDEF(N->getValueType(0)); + SDValue Index; + if (N->hasAnyUseOfValue(1)) { + Index = SplitIndexingFromLoad(LD); + // Try to fold the base pointer arithmetic into subsequent loads and + // stores. + AddUsersToWorkList(N); + } else + Index = DAG.getUNDEF(N->getValueType(1)); DEBUG(dbgs() << "\nReplacing.7 "; N->dump(&DAG); dbgs() << "\nWith: "; @@ -7889,8 +7913,7 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { dbgs() << " and 2 other values\n"); WorkListRemover DeadNodes(*this); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef); - DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), - DAG.getUNDEF(N->getValueType(1))); + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Index); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain); removeFromWorkList(N); DAG.DeleteNode(N); diff --git a/test/CodeGen/ARM64/dagcombiner-dead-indexed-load.ll b/test/CodeGen/ARM64/dagcombiner-dead-indexed-load.ll new file mode 100644 index 00000000000..2cf01357324 --- /dev/null +++ b/test/CodeGen/ARM64/dagcombiner-dead-indexed-load.ll @@ -0,0 +1,29 @@ +; RUN: llc -mcpu=cyclone < %s | FileCheck %s + +target datalayout = "e-i64:64-n32:64-S128" +target triple = "arm64-apple-ios" + +%"struct.SU" = type { i32, %"struct.SU"*, i32*, i32, i32, %"struct.BO", i32, [5 x i8] } +%"struct.BO" = type { %"struct.RE" } + +%"struct.RE" = type { i32, i32, i32, i32 } + +; This is a read-modify-write of some bifields combined into an i48. It gets +; legalized into i32 and i16 accesses. Only a single store of zero to the low +; i32 part should be live. + +; CHECK-LABEL: test: +; CHECK-NOT: ldr +; CHECK: str wzr +; CHECK-NOT: str +define void @test(%"struct.SU"* nocapture %su) { +entry: + %r1 = getelementptr inbounds %"struct.SU"* %su, i64 1, i32 5 + %r2 = bitcast %"struct.BO"* %r1 to i48* + %r3 = load i48* %r2, align 8 + %r4 = and i48 %r3, -4294967296 + %r5 = or i48 0, %r4 + store i48 %r5, i48* %r2, align 8 + + ret void +} -- 2.34.1