Prefer to duplicate PPC Altivec loads when expanding unaligned loads

author Hal Finkel <hfinkel@anl.gov>

Sun, 26 May 2013 18:08:30 +0000 (18:08 +0000)

committer Hal Finkel <hfinkel@anl.gov>

Sun, 26 May 2013 18:08:30 +0000 (18:08 +0000)
author Hal Finkel <hfinkel@anl.gov>
Sun, 26 May 2013 18:08:30 +0000 (18:08 +0000)
committer Hal Finkel <hfinkel@anl.gov>
Sun, 26 May 2013 18:08:30 +0000 (18:08 +0000)
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp

index f47376cc51b90ead7bcc66060739259caabcf5f6..87b63a8f0f6837ead05e97e9e73178b2180eb857 100644 (file)
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -6781,6 +6781,75 @@ SDValue PPCTargetLowering::DAGCombineFastRecipFSQRT(SDValue Op,
    return SDValue();
  }
  
+// Return true is there is a nearyby consecutive load to the one provided
+// (regardless of alignment). We search up and down the chain, looking though
+// token factors and other loads (but nothing else). As a result, a true
+// results indicates that it is safe to create a new consecutive load adjacent
+// to the load provided.
+static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG) {
+  SDValue Chain = LD->getChain();
+  EVT VT = LD->getMemoryVT();
+
+  SmallSet<SDNode *, 16> LoadRoots;
+  SmallVector<SDNode *, 8> Queue(1, Chain.getNode());
+  SmallSet<SDNode *, 16> Visited;
+
+  // First, search up the chain, branching to follow all token-factor operands.
+  // If we find a consecutive load, then we're done, otherwise, record all
+  // nodes just above the top-level loads and token factors.
+  while (!Queue.empty()) {
+    SDNode *ChainNext = Queue.pop_back_val();
+    if (!Visited.insert(ChainNext))
+      continue;
+
+    if (LoadSDNode *ChainLD = dyn_cast<LoadSDNode>(ChainNext)) {
+      if (DAG.isConsecutiveLoad(ChainLD, LD, VT.getStoreSize(), 1))
+        return true;
+
+      if (!Visited.count(ChainLD->getChain().getNode()))
+        Queue.push_back(ChainLD->getChain().getNode());
+    } else if (ChainNext->getOpcode() == ISD::TokenFactor) {
+      for (SDNode::op_iterator O = ChainNext->op_begin(),
+           OE = ChainNext->op_end(); O != OE; ++O)
+        if (!Visited.count(O->getNode()))
+          Queue.push_back(O->getNode());
+    } else
+      LoadRoots.insert(ChainNext);
+  }
+
+  // Second, search down the chain, starting from the top-level nodes recorded
+  // in the first phase. These top-level nodes are the nodes just above all
+  // loads and token factors. Starting with their uses, recursively look though
+  // all loads (just the chain uses) and token factors to find a consecutive
+  // load.
+  Visited.clear();
+  Queue.clear();
+
+  for (SmallSet<SDNode *, 16>::iterator I = LoadRoots.begin(),
+       IE = LoadRoots.end(); I != IE; ++I) {
+    Queue.push_back(*I);
+       
+    while (!Queue.empty()) {
+      SDNode *LoadRoot = Queue.pop_back_val();
+      if (!Visited.insert(LoadRoot))
+        continue;
+
+      if (LoadSDNode *ChainLD = dyn_cast<LoadSDNode>(LoadRoot))
+        if (DAG.isConsecutiveLoad(ChainLD, LD, VT.getStoreSize(), 1))
+          return true;
+
+      for (SDNode::use_iterator UI = LoadRoot->use_begin(),
+           UE = LoadRoot->use_end(); UI != UE; ++UI)
+        if (((isa<LoadSDNode>(*UI) &&
+            cast<LoadSDNode>(*UI)->getChain().getNode() == LoadRoot) ||
+            UI->getOpcode() == ISD::TokenFactor) && !Visited.count(*UI))
+          Queue.push_back(*UI);
+    }
+  }
+
+  return false;
+}
+
  SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
                                               DAGCombinerInfo &DCI) const {
    const TargetMachine &TM = getTargetMachine();
@@ -7015,12 +7084,19 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
        // require the next load to appear to be aligned, even though it
        // is actually offset from the base pointer by a lesser amount.
        int IncOffset = VT.getSizeInBits() / 8;
-      int IncValue = IncOffset - 1;
+      int IncValue = IncOffset;
+
+      // Walk (both up and down) the chain looking for another load at the real
+      // (aligned) offset (the alignment of the other load does not matter in
+      // this case). If found, then do not use the offset reduction trick, as
+      // that will prevent the loads from being later combined (as they would
+      // otherwise be duplicates).
+      if (!findConsecutiveLoad(LD, DAG))
+        --IncValue;
+
        SDValue Increment = DAG.getConstant(IncValue, getPointerTy());
        Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
  
-      // FIXME: We might have another load (with a slightly-different
-      // real offset) that we can reuse here.
        SDValue ExtraLoad =
          DAG.getLoad(VT, dl, Chain, Ptr,
                      LD->getPointerInfo().getWithOffset(IncOffset),
diff --git a/test/CodeGen/PowerPC/unal-altivec.ll b/test/CodeGen/PowerPC/unal-altivec.ll

index dec93ebf769dd5eef43525aa065ff9adb10be30f..7f333a1c508b43ffb84bddae051a0a6f956d7b90 100644 (file)
--- a/test/CodeGen/PowerPC/unal-altivec.ll
+++ b/test/CodeGen/PowerPC/unal-altivec.ll
@@ -1,5 +1,4 @@
  ; RUN: llc < %s -mcpu=g5 | FileCheck %s
-; RUN: llc < %s -mcpu=g5 | FileCheck %s -check-prefix=CHECK-PC
  target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
  target triple = "powerpc64-unknown-linux-gnu"
  
@@ -30,20 +29,22 @@ vector.body:                                      ; preds = %vector.body, %vecto
    br i1 %10, label %for.end, label %vector.body
  
  ; CHECK: @foo
-; CHECK: lvx [[CNST:[0-9]+]],
-; CHECK-DAG: lvsl [[PC:[0-9]+]], [[B1:[0-9]+]], [[B2:[0-9]+]]
-; CHECK-DAG: lvx [[LD1:[0-9]+]], [[B1]], [[B2]]
-; CHECK-DAG: add [[B3:[0-9]+]], [[B1]], [[B2]]
-; CHECK-DAG: lvx [[LD2:[0-9]+]], [[B3]],
+; CHECK-DAG: li [[C0:[0-9]+]], 0
+; CHECK-DAG: li [[C16:[0-9]+]], 16
+; CHECK-DAG: li [[C31:[0-9]+]], 31
+; CHECK-DAG: lvx [[CNST:[0-9]+]],
+; CHECK: .LBB0_1:
+; CHECK-DAG: lvsl [[PC:[0-9]+]], [[B1:[0-9]+]], [[C0]]
+; CHECK-DAG: lvx [[LD1:[0-9]+]], [[B1]], [[C0]]
+; CHECK-DAG: add [[B3:[0-9]+]], [[B1]], [[C0]]
+; CHECK-DAG: lvx [[LD2:[0-9]+]], [[B3]], [[C16]]
+; CHECK-DAG: lvx [[LD3:[0-9]+]], [[B3]], [[C31]]
  ; CHECK-DAG: vperm [[R1:[0-9]+]], [[LD1]], [[LD2]], [[PC]]
-; CHECK: vaddfp {{[0-9]+}}, [[R1]], [[CNST]]
+; CHECK-DAG: vperm [[R2:[0-9]+]], [[LD2]], [[LD3]], [[PC]]
+; CHECK-DAG: vaddfp {{[0-9]+}}, [[R1]], [[CNST]]
+; CHECK-DAG: vaddfp {{[0-9]+}}, [[R2]], [[CNST]]
  ; CHECK: blr
  
-; CHECK-PC: @foo
-; CHECK-PC: lvsl
-; CHECK-PC-NOT: lvsl
-; CHECK-PC: blr
-
  for.end:                                          ; preds = %vector.body
    ret void
  }
author	Hal Finkel <hfinkel@anl.gov>
	Sun, 26 May 2013 18:08:30 +0000 (18:08 +0000)
committer	Hal Finkel <hfinkel@anl.gov>
	Sun, 26 May 2013 18:08:30 +0000 (18:08 +0000)
lib/Target/PowerPC/PPCISelLowering.cpp		patch \| blob \| history
test/CodeGen/PowerPC/unal-altivec.ll		patch \| blob \| history