[PowerPC] Adjust load/store costs in PPCTTI

author Hal Finkel <hfinkel@anl.gov>

Fri, 4 Apr 2014 23:51:18 +0000 (23:51 +0000)

committer Hal Finkel <hfinkel@anl.gov>

Fri, 4 Apr 2014 23:51:18 +0000 (23:51 +0000)
author Hal Finkel <hfinkel@anl.gov>
Fri, 4 Apr 2014 23:51:18 +0000 (23:51 +0000)
committer Hal Finkel <hfinkel@anl.gov>
Fri, 4 Apr 2014 23:51:18 +0000 (23:51 +0000)
diff --git a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp

index 53b2dd65d0f6cb4935703eeb428800f3396b33aa..ed849b5bc85948c826b3c2b528d7fa84fc393341 100644 (file)
--- a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -216,7 +216,9 @@ unsigned PPCTTI::getVectorInstrCost(unsigned Opcode, Type *Val,
    // experimentally as a minimum needed to prevent unprofitable
    // vectorization for the paq8p benchmark.  It may need to be
    // raised further if other unprofitable cases remain.
-  unsigned LHSPenalty = 12;
+  unsigned LHSPenalty = 2;
+  if (ISD == ISD::INSERT_VECTOR_ELT)
+    LHSPenalty += 7;
  
    // Vector element insert/extract with Altivec is very expensive,
    // because they require store and reload with the attendant
@@ -240,14 +242,32 @@ unsigned PPCTTI::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
    unsigned Cost =
      TargetTransformInfo::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
  
-  // FIXME: Update this for VSX loads/stores that support unaligned access.
+  // VSX loads/stores support unaligned access.
+  if (ST->hasVSX()) {
+    if (LT.second == MVT::v2f64 || LT.second == MVT::v2i64)
+      return Cost;
+  }
+
+  bool UnalignedAltivec =
+    Src->isVectorTy() &&
+    Src->getPrimitiveSizeInBits() >= LT.second.getSizeInBits() &&
+    LT.second.getSizeInBits() == 128 &&
+    Opcode == Instruction::Load;
  
    // PPC in general does not support unaligned loads and stores. They'll need
    // to be decomposed based on the alignment factor.
    unsigned SrcBytes = LT.second.getStoreSize();
-  if (SrcBytes && Alignment && Alignment < SrcBytes)
+  if (SrcBytes && Alignment && Alignment < SrcBytes && !UnalignedAltivec) {
      Cost += LT.first*(SrcBytes/Alignment-1);
  
+    // For a vector type, there is also scalarization overhead (only for
+    // stores, loads are expanded using the vector-load + permutation sequence,
+    // which is much less expensive).
+    if (Src->isVectorTy() && Opcode == Instruction::Store)
+      for (int i = 0, e = Src->getVectorNumElements(); i < e; ++i)
+        Cost += getVectorInstrCost(Instruction::ExtractElement, Src, i);
+  }
+
    return Cost;
  }
  
diff --git a/test/Analysis/CostModel/PowerPC/ext.ll b/test/Analysis/CostModel/PowerPC/ext.ll

index daaa8f5bac31f2dcd61ae7b6b73ce9d63cef5378..7d6a14e93cdf392af787bcabe12ceaed9abe66b3 100644 (file)
--- a/test/Analysis/CostModel/PowerPC/ext.ll
+++ b/test/Analysis/CostModel/PowerPC/ext.ll
@@ -13,7 +13,7 @@ define void @exts() {
    ; CHECK: cost of 1 {{.*}} sext
    %v3 = sext <4 x i16> undef to <4 x i32>
  
-  ; CHECK: cost of 216 {{.*}} sext
+  ; CHECK: cost of 112 {{.*}} sext
    %v4 = sext <8 x i16> undef to <8 x i32>
  
    ret void
diff --git a/test/Analysis/CostModel/PowerPC/insert_extract.ll b/test/Analysis/CostModel/PowerPC/insert_extract.ll

index f51963d56fde381c5dc5106916ae89b2f1ecf1fa..8dc003153a24a88b01f669e2032cf6e63bce487e 100644 (file)
--- a/test/Analysis/CostModel/PowerPC/insert_extract.ll
+++ b/test/Analysis/CostModel/PowerPC/insert_extract.ll
@@ -3,13 +3,13 @@ target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
  target triple = "powerpc64-unknown-linux-gnu"
  
  define i32 @insert(i32 %arg) {
-  ; CHECK: cost of 13 {{.*}} insertelement
+  ; CHECK: cost of 10 {{.*}} insertelement
    %x = insertelement <4 x i32> undef, i32 %arg, i32 0
    ret i32 undef
  }
  
  define i32 @extract(<4 x i32> %arg) {
-  ; CHECK: cost of 13 {{.*}} extractelement
+  ; CHECK: cost of 3 {{.*}} extractelement
    %x = extractelement <4 x i32> %arg, i32 0
    ret i32 %x
  }
diff --git a/test/Analysis/CostModel/PowerPC/load_store.ll b/test/Analysis/CostModel/PowerPC/load_store.ll

index 8145a1dc715c5f422136ec5ab80724747bb64c6b..40862780faf0860bb5e23454ada37e97080491eb 100644 (file)
--- a/test/Analysis/CostModel/PowerPC/load_store.ll
+++ b/test/Analysis/CostModel/PowerPC/load_store.ll
@@ -31,9 +31,12 @@ define i32 @loads(i32 %arg) {
  
    ; FIXME: There actually are sub-vector Altivec loads, and so we could handle
    ; this with a small expense, but we don't currently.
-  ; CHECK: cost of 60 {{.*}} load
+  ; CHECK: cost of 48 {{.*}} load
    load <4 x i16>* undef, align 2
  
+  ; CHECK: cost of 1 {{.*}} load
+  load <4 x i32>* undef, align 4
+
    ret i32 undef
  }
author	Hal Finkel <hfinkel@anl.gov>
	Fri, 4 Apr 2014 23:51:18 +0000 (23:51 +0000)
committer	Hal Finkel <hfinkel@anl.gov>
	Fri, 4 Apr 2014 23:51:18 +0000 (23:51 +0000)
lib/Target/PowerPC/PPCTargetTransformInfo.cpp		patch \| blob \| history
test/Analysis/CostModel/PowerPC/ext.ll		patch \| blob \| history
test/Analysis/CostModel/PowerPC/insert_extract.ll		patch \| blob \| history
test/Analysis/CostModel/PowerPC/load_store.ll		patch \| blob \| history