From abdf75511be5d0b4239157a5fd86c3c84e10e5bf Mon Sep 17 00:00:00 2001 From: Hal Finkel Date: Tue, 25 Dec 2012 18:51:18 +0000 Subject: [PATCH] Loosen scheduling restrictions on the PPC dcbt intrinsic As with the prefetch intrinsic to which it maps, simply have dcbt marked as reading from and writing to its arguments instead of having unmodeled side effects. While this might cause unwanted code motion (because aliasing checks don't really capture cache-line sharing), it is more important that prefetches in unrolled loops don't block the scheduler from rearranging the unrolled loop body. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171073 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/IntrinsicsPowerPC.td | 3 ++- test/CodeGen/PowerPC/dcbt-sched.ll | 22 ++++++++++++++++++++++ 2 files changed, 24 insertions(+), 1 deletion(-) create mode 100644 test/CodeGen/PowerPC/dcbt-sched.ll diff --git a/include/llvm/IntrinsicsPowerPC.td b/include/llvm/IntrinsicsPowerPC.td index da85bfba863..cde39ccd3c5 100644 --- a/include/llvm/IntrinsicsPowerPC.td +++ b/include/llvm/IntrinsicsPowerPC.td @@ -22,7 +22,8 @@ let TargetPrefix = "ppc" in { // All intrinsics start with "llvm.ppc.". def int_ppc_dcbf : Intrinsic<[], [llvm_ptr_ty], []>; def int_ppc_dcbi : Intrinsic<[], [llvm_ptr_ty], []>; def int_ppc_dcbst : Intrinsic<[], [llvm_ptr_ty], []>; - def int_ppc_dcbt : Intrinsic<[], [llvm_ptr_ty], []>; + def int_ppc_dcbt : Intrinsic<[], [llvm_ptr_ty], + [IntrReadWriteArgMem, NoCapture<0>]>; def int_ppc_dcbtst: Intrinsic<[], [llvm_ptr_ty], []>; def int_ppc_dcbz : Intrinsic<[], [llvm_ptr_ty], []>; def int_ppc_dcbzl : Intrinsic<[], [llvm_ptr_ty], []>; diff --git a/test/CodeGen/PowerPC/dcbt-sched.ll b/test/CodeGen/PowerPC/dcbt-sched.ll new file mode 100644 index 00000000000..dfa1b75bd7d --- /dev/null +++ b/test/CodeGen/PowerPC/dcbt-sched.ll @@ -0,0 +1,22 @@ +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64" +target triple = "powerpc64-unknown-linux-gnu" +; RUN: llc -mcpu=a2 -enable-misched -enable-aa-sched-mi < %s | FileCheck %s + +define i8 @test1(i8* noalias %a, i8* noalias %b, i8* noalias %c) nounwind { +entry: + %q = load i8* %b + call void @llvm.prefetch(i8* %a, i32 0, i32 3, i32 1) + %r = load i8* %c + %s = add i8 %q, %r + ret i8 %s +} + +declare void @llvm.prefetch(i8*, i32, i32, i32) + +; Test that we've moved the second load to before the dcbt to better +; hide its latency. +; CHECK: @test1 +; CHECK: lbz +; CHECK: lbz +; CHECK: dcbt + -- 2.34.1