Actually support volatile memcpys in NVPTX lowering

author Eli Bendersky <eliben@google.com>

Fri, 10 Jul 2015 15:40:33 +0000 (15:40 +0000)

committer Eli Bendersky <eliben@google.com>

Fri, 10 Jul 2015 15:40:33 +0000 (15:40 +0000)
author Eli Bendersky <eliben@google.com>
Fri, 10 Jul 2015 15:40:33 +0000 (15:40 +0000)
committer Eli Bendersky <eliben@google.com>
Fri, 10 Jul 2015 15:40:33 +0000 (15:40 +0000)
diff --git a/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp b/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp

index e561293b94ebac5fa2b844014d17e363f8d2c2ad..0bf72febc4a0e15943114e2029538c39f60b3089 100644 (file)
--- a/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp
+++ b/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp
@@ -57,7 +57,6 @@ char NVPTXLowerAggrCopies::ID = 0;
  // Lower MemTransferInst or load-store pair to loop
  static void convertTransferToLoop(
      Instruction *splitAt, Value *srcAddr, Value *dstAddr, Value *len,
-    //unsigned numLoads,
      bool srcVolatile, bool dstVolatile, LLVMContext &Context, Function &F) {
    Type *indType = len->getType();
  
@@ -84,6 +83,8 @@ static void convertTransferToLoop(
    ind->addIncoming(ConstantInt::get(indType, 0), origBB);
  
    // load from srcAddr+ind
+  // TODO: we can leverage the align parameter of llvm.memcpy for more efficient
+  // word-sized loads and stores.
    Value *val = loop.CreateLoad(loop.CreateGEP(loop.getInt8Ty(), srcAddr, ind),
                                 srcVolatile);
    // store at dstAddr+ind
@@ -200,13 +201,14 @@ bool NVPTXLowerAggrCopies::runOnFunction(Function &F) {
    }
  
    for (MemTransferInst *cpy : aggrMemcpys) {
-    Value *len = cpy->getLength();
-    // llvm 2.7 version of memcpy does not have volatile
-    // operand yet. So always making it non-volatile
-    // optimistically, so that we don't see unnecessary
-    // st.volatile in ptx
-    convertTransferToLoop(cpy, cpy->getSource(), cpy->getDest(), len, false,
-                          false, Context, F);
+    convertTransferToLoop(/* splitAt */ cpy,
+                          /* srcAddr */ cpy->getSource(),
+                          /* dstAddr */ cpy->getDest(),
+                          /* len */ cpy->getLength(),
+                          /* srcVolatile */ cpy->isVolatile(),
+                          /* dstVolatile */ cpy->isVolatile(),
+                          /* Context */ Context,
+                          /* Function F */ F);
      cpy->eraseFromParent();
    }
  
diff --git a/test/CodeGen/NVPTX/lower-aggr-copies.ll b/test/CodeGen/NVPTX/lower-aggr-copies.ll

index ff706b5df25b871973192ed222dd0290c0c2381c..c3adfc4646cf11d9ceadba0ad815b85068a9f066 100644 (file)
--- a/test/CodeGen/NVPTX/lower-aggr-copies.ll
+++ b/test/CodeGen/NVPTX/lower-aggr-copies.ll
@@ -19,6 +19,19 @@ entry:
  ; CHECK-NEXT: @%p[[PRED]] bra LBB[[LABEL]]
  }
  
+define i8* @memcpy_volatile_caller(i8* %dst, i8* %src, i64 %n) #0 {
+entry:
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 %n, i32 1, i1 true)
+  ret i8* %dst
+; CHECK-LABEL: .visible .func (.param .b32 func_retval0) memcpy_volatile_caller
+; CHECK: LBB[[LABEL:[_0-9]+]]:
+; CHECK:      ld.volatile.u8 %rs[[REG:[0-9]+]]
+; CHECK:      st.volatile.u8 [%r{{[0-9]+}}], %rs[[REG]]
+; CHECK:      add.s64 %rd[[COUNTER:[0-9]+]], %rd[[COUNTER]], 1
+; CHECK-NEXT: setp.lt.u64 %p[[PRED:[0-9]+]], %rd[[COUNTER]], %rd
+; CHECK-NEXT: @%p[[PRED]] bra LBB[[LABEL]]
+}
+
  define i8* @memset_caller(i8* %dst, i32 %c, i64 %n) #0 {
  entry:
    %0 = trunc i32 %c to i8
author	Eli Bendersky <eliben@google.com>
	Fri, 10 Jul 2015 15:40:33 +0000 (15:40 +0000)
committer	Eli Bendersky <eliben@google.com>
	Fri, 10 Jul 2015 15:40:33 +0000 (15:40 +0000)
lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp		patch \| blob \| history
test/CodeGen/NVPTX/lower-aggr-copies.ll		patch \| blob \| history