[PowerPC] Use vector types for memcpy and friends (sometimes)

author Hal Finkel <hfinkel@anl.gov>

Fri, 27 Feb 2015 19:58:28 +0000 (19:58 +0000)

committer Hal Finkel <hfinkel@anl.gov>

Fri, 27 Feb 2015 19:58:28 +0000 (19:58 +0000)
author Hal Finkel <hfinkel@anl.gov>
Fri, 27 Feb 2015 19:58:28 +0000 (19:58 +0000)
committer Hal Finkel <hfinkel@anl.gov>
Fri, 27 Feb 2015 19:58:28 +0000 (19:58 +0000)
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp

index 147e94b560a2333032400e12179c06a8b7262a6b..7de2ae10b8b8c9783cca1b693fde8295dcfe01bd 100644 (file)
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -892,6 +892,13 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
      MaxStoresPerMemcpyOptSize = 8;
      MaxStoresPerMemmove = 32;
      MaxStoresPerMemmoveOptSize = 8;
+  } else if (Subtarget.getDarwinDirective() == PPC::DIR_A2) {
+    // The A2 also benefits from (very) aggressive inlining of memcpy and
+    // friends. The overhead of a the function call, even when warm, can be
+    // over one hundred cycles.
+    MaxStoresPerMemset = 128;
+    MaxStoresPerMemcpy = 128;
+    MaxStoresPerMemmove = 128;
    }
  }
  
@@ -10914,11 +10921,27 @@ EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size,
                                             bool IsMemset, bool ZeroMemset,
                                             bool MemcpyStrSrc,
                                             MachineFunction &MF) const {
+  const Function *F = MF.getFunction();
+  // When expanding a memset, require at least two QPX instructions to cover
+  // the cost of loading the value to be stored from the constant pool.
+  if (Subtarget.hasQPX() && Size >= 32 && (!IsMemset || Size >= 64) &&
+     (!SrcAlign || SrcAlign >= 32) && (!DstAlign || DstAlign >= 32) &&
+      !F->hasFnAttribute(Attribute::NoImplicitFloat)) {
+    return MVT::v4f64;
+  }
+
+  // We should use Altivec/VSX loads and stores when available. For unaligned
+  // addresses, unaligned VSX loads are only fast starting with the P8.
+  if (Subtarget.hasAltivec() && Size >= 16 &&
+      (((!SrcAlign || SrcAlign >= 16) && (!DstAlign || DstAlign >= 16)) ||
+       ((IsMemset && Subtarget.hasVSX()) || Subtarget.hasP8Vector())))
+    return MVT::v4i32;
+
    if (Subtarget.isPPC64()) {
      return MVT::i64;
-  } else {
-    return MVT::i32;
    }
+
+  return MVT::i32;
  }
  
  /// \brief Returns true if it is beneficial to convert a load of a constant
diff --git a/test/CodeGen/PowerPC/memcpy-vec.ll b/test/CodeGen/PowerPC/memcpy-vec.ll

new file mode 100644 (file)

index 0000000..70b8ea9
--- /dev/null
+++ b/test/CodeGen/PowerPC/memcpy-vec.ll
@@ -0,0 +1,110 @@
+; RUN: llc -mcpu=pwr7 < %s | FileCheck %s -check-prefix=PWR7
+; RUN: llc -mcpu=pwr8 < %s | FileCheck %s -check-prefix=PWR8
+; RUN: llc -mcpu=a2q < %s | FileCheck %s -check-prefix=A2Q
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+; Function Attrs: nounwind
+define void @foo1(double* nocapture %x, double* nocapture readonly %y) #0 {
+entry:
+  %0 = bitcast double* %x to i8*
+  %1 = bitcast double* %y to i8*
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 32, i32 8, i1 false)
+  ret void
+
+; PWR7-LABEL: @foo1
+; PWR7-NOT: bl memcpy
+; PWR7: ld {{[0-9]+}}, {{[0-9]+}}(4)
+; PWR7: std {{[0-9]+}}, {{[0-9]+}}(3)
+; PWR7: blr
+
+; PWR8-LABEL: @foo1
+; PWR8: lxvw4x
+; PWR8: stxvw4x
+; PWR8: blr
+
+; A2Q-LABEL: @foo1
+; A2Q-NOT: bl memcpy
+; A2Q: ld {{[0-9]+}}, {{[0-9]+}}(4)
+; A2Q: std {{[0-9]+}}, {{[0-9]+}}(3)
+; A2Q: blr
+}
+
+; Function Attrs: nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) #0
+
+; Function Attrs: nounwind
+define void @foo2(double* nocapture %x, double* nocapture readonly %y) #0 {
+entry:
+  %0 = bitcast double* %x to i8*
+  %1 = bitcast double* %y to i8*
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 128, i32 8, i1 false)
+  ret void
+
+; PWR7-LABEL: @foo2
+; PWR7: bl memcpy
+; PWR7: blr
+
+; PWR8-LABEL: @foo2
+; PWR8: lxvw4x
+; PWR8: stxvw4x
+; PWR8: blr
+
+; A2Q-LABEL: @foo2
+; A2Q-NOT: bl memcpy
+; A2Q: ld {{[0-9]+}}, {{[0-9]+}}(4)
+; A2Q: std {{[0-9]+}}, {{[0-9]+}}(3)
+; A2Q: blr
+}
+
+; Function Attrs: nounwind
+define void @bar1(double* nocapture %x) #0 {
+entry:
+  %0 = bitcast double* %x to i8*
+  tail call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 128, i32 8, i1 false)
+  ret void
+
+; PWR7-LABEL: @bar1
+; PWR7-NOT: bl memset
+; PWR7: stxvw4x
+; PWR7: blr
+
+; PWR8-LABEL: @bar1
+; PWR8-NOT: bl memset
+; PWR8: stxvw4x
+; PWR8: blr
+
+; A2Q-LABEL: @bar1
+; A2Q-NOT: bl memset
+; A2Q: std {{[0-9]+}}, {{[0-9]+}}(3)
+; A2Q: blr
+}
+
+; Function Attrs: nounwind
+define void @bar2(double* nocapture %x) #0 {
+entry:
+  %0 = bitcast double* %x to i8*
+  tail call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 128, i32 32, i1 false)
+  ret void
+
+; PWR7-LABEL: @bar2
+; PWR7-NOT: bl memset
+; PWR7: stxvw4x
+; PWR7: blr
+
+; PWR8-LABEL: @bar2
+; PWR8-NOT: bl memset
+; PWR8: stxvw4x
+; PWR8: blr
+
+; A2Q-LABEL: @bar2
+; A2Q-NOT: bl memset
+; A2Q: qvstfdx
+; A2Q: blr
+}
+
+; Function Attrs: nounwind
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) #0
+
+attributes #0 = { nounwind }
+
diff --git a/test/CodeGen/PowerPC/ppc64-byval-align.ll b/test/CodeGen/PowerPC/ppc64-byval-align.ll

index a4d5c2a72d4c28129fca8c748f6a238be9cdad3a..be1c502270074c61404f18cc8b46533f8de43e41 100644 (file)
--- a/test/CodeGen/PowerPC/ppc64-byval-align.ll
+++ b/test/CodeGen/PowerPC/ppc64-byval-align.ll
@@ -50,7 +50,9 @@ entry:
  }
  ; CHECK-LABEL: @caller2
  ; CHECK: std 3, [[OFF:[0-9]+]](1)
-; CHECK: ld [[REG:[0-9]+]], [[OFF]](1)
-; CHECK: std [[REG]], 128(1)
+; CHECK: addi [[REG1:[0-9]+]], 1, [[OFF]]
+; CHECK: lxvw4x [[REG2:[0-9]+]], 0, [[REG1]]
+; CHECK: li [[REG3:[0-9]+]], 128
+; CHECK: stxvw4x 0, 1, [[REG3]]
  ; CHECK: bl test2
author	Hal Finkel <hfinkel@anl.gov>
	Fri, 27 Feb 2015 19:58:28 +0000 (19:58 +0000)
committer	Hal Finkel <hfinkel@anl.gov>
	Fri, 27 Feb 2015 19:58:28 +0000 (19:58 +0000)
lib/Target/PowerPC/PPCISelLowering.cpp		patch \| blob \| history
test/CodeGen/PowerPC/memcpy-vec.ll	[new file with mode: 0644]	patch \| blob
test/CodeGen/PowerPC/ppc64-byval-align.ll		patch \| blob \| history