return SDValue();
}
+// Return true is there is a nearyby consecutive load to the one provided
+// (regardless of alignment). We search up and down the chain, looking though
+// token factors and other loads (but nothing else). As a result, a true
+// results indicates that it is safe to create a new consecutive load adjacent
+// to the load provided.
+static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG) {
+ SDValue Chain = LD->getChain();
+ EVT VT = LD->getMemoryVT();
+
+ SmallSet<SDNode *, 16> LoadRoots;
+ SmallVector<SDNode *, 8> Queue(1, Chain.getNode());
+ SmallSet<SDNode *, 16> Visited;
+
+ // First, search up the chain, branching to follow all token-factor operands.
+ // If we find a consecutive load, then we're done, otherwise, record all
+ // nodes just above the top-level loads and token factors.
+ while (!Queue.empty()) {
+ SDNode *ChainNext = Queue.pop_back_val();
+ if (!Visited.insert(ChainNext))
+ continue;
+
+ if (LoadSDNode *ChainLD = dyn_cast<LoadSDNode>(ChainNext)) {
+ if (DAG.isConsecutiveLoad(ChainLD, LD, VT.getStoreSize(), 1))
+ return true;
+
+ if (!Visited.count(ChainLD->getChain().getNode()))
+ Queue.push_back(ChainLD->getChain().getNode());
+ } else if (ChainNext->getOpcode() == ISD::TokenFactor) {
+ for (SDNode::op_iterator O = ChainNext->op_begin(),
+ OE = ChainNext->op_end(); O != OE; ++O)
+ if (!Visited.count(O->getNode()))
+ Queue.push_back(O->getNode());
+ } else
+ LoadRoots.insert(ChainNext);
+ }
+
+ // Second, search down the chain, starting from the top-level nodes recorded
+ // in the first phase. These top-level nodes are the nodes just above all
+ // loads and token factors. Starting with their uses, recursively look though
+ // all loads (just the chain uses) and token factors to find a consecutive
+ // load.
+ Visited.clear();
+ Queue.clear();
+
+ for (SmallSet<SDNode *, 16>::iterator I = LoadRoots.begin(),
+ IE = LoadRoots.end(); I != IE; ++I) {
+ Queue.push_back(*I);
+
+ while (!Queue.empty()) {
+ SDNode *LoadRoot = Queue.pop_back_val();
+ if (!Visited.insert(LoadRoot))
+ continue;
+
+ if (LoadSDNode *ChainLD = dyn_cast<LoadSDNode>(LoadRoot))
+ if (DAG.isConsecutiveLoad(ChainLD, LD, VT.getStoreSize(), 1))
+ return true;
+
+ for (SDNode::use_iterator UI = LoadRoot->use_begin(),
+ UE = LoadRoot->use_end(); UI != UE; ++UI)
+ if (((isa<LoadSDNode>(*UI) &&
+ cast<LoadSDNode>(*UI)->getChain().getNode() == LoadRoot) ||
+ UI->getOpcode() == ISD::TokenFactor) && !Visited.count(*UI))
+ Queue.push_back(*UI);
+ }
+ }
+
+ return false;
+}
+
SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
const TargetMachine &TM = getTargetMachine();
// require the next load to appear to be aligned, even though it
// is actually offset from the base pointer by a lesser amount.
int IncOffset = VT.getSizeInBits() / 8;
- int IncValue = IncOffset - 1;
+ int IncValue = IncOffset;
+
+ // Walk (both up and down) the chain looking for another load at the real
+ // (aligned) offset (the alignment of the other load does not matter in
+ // this case). If found, then do not use the offset reduction trick, as
+ // that will prevent the loads from being later combined (as they would
+ // otherwise be duplicates).
+ if (!findConsecutiveLoad(LD, DAG))
+ --IncValue;
+
SDValue Increment = DAG.getConstant(IncValue, getPointerTy());
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
- // FIXME: We might have another load (with a slightly-different
- // real offset) that we can reuse here.
SDValue ExtraLoad =
DAG.getLoad(VT, dl, Chain, Ptr,
LD->getPointerInfo().getWithOffset(IncOffset),
; RUN: llc < %s -mcpu=g5 | FileCheck %s
-; RUN: llc < %s -mcpu=g5 | FileCheck %s -check-prefix=CHECK-PC
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
target triple = "powerpc64-unknown-linux-gnu"
br i1 %10, label %for.end, label %vector.body
; CHECK: @foo
-; CHECK: lvx [[CNST:[0-9]+]],
-; CHECK-DAG: lvsl [[PC:[0-9]+]], [[B1:[0-9]+]], [[B2:[0-9]+]]
-; CHECK-DAG: lvx [[LD1:[0-9]+]], [[B1]], [[B2]]
-; CHECK-DAG: add [[B3:[0-9]+]], [[B1]], [[B2]]
-; CHECK-DAG: lvx [[LD2:[0-9]+]], [[B3]],
+; CHECK-DAG: li [[C0:[0-9]+]], 0
+; CHECK-DAG: li [[C16:[0-9]+]], 16
+; CHECK-DAG: li [[C31:[0-9]+]], 31
+; CHECK-DAG: lvx [[CNST:[0-9]+]],
+; CHECK: .LBB0_1:
+; CHECK-DAG: lvsl [[PC:[0-9]+]], [[B1:[0-9]+]], [[C0]]
+; CHECK-DAG: lvx [[LD1:[0-9]+]], [[B1]], [[C0]]
+; CHECK-DAG: add [[B3:[0-9]+]], [[B1]], [[C0]]
+; CHECK-DAG: lvx [[LD2:[0-9]+]], [[B3]], [[C16]]
+; CHECK-DAG: lvx [[LD3:[0-9]+]], [[B3]], [[C31]]
; CHECK-DAG: vperm [[R1:[0-9]+]], [[LD1]], [[LD2]], [[PC]]
-; CHECK: vaddfp {{[0-9]+}}, [[R1]], [[CNST]]
+; CHECK-DAG: vperm [[R2:[0-9]+]], [[LD2]], [[LD3]], [[PC]]
+; CHECK-DAG: vaddfp {{[0-9]+}}, [[R1]], [[CNST]]
+; CHECK-DAG: vaddfp {{[0-9]+}}, [[R2]], [[CNST]]
; CHECK: blr
-; CHECK-PC: @foo
-; CHECK-PC: lvsl
-; CHECK-PC-NOT: lvsl
-; CHECK-PC: blr
-
for.end: ; preds = %vector.body
ret void
}