From dab10b37356ae5eda7f837243f69422821ef887b Mon Sep 17 00:00:00 2001 From: Quentin Colombet Date: Wed, 13 Jan 2016 00:02:40 +0000 Subject: [PATCH] [ARM] Mark VMOV with immediate: isAsCheapAsMove. VMOVs are not strictly speaking cheap, but they are as expensive as a vector copy (VORR), so we should prefer rematerialization over splitting when it applies. rdar://problem/23754176 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@257545 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMInstrNEON.td | 7 +++- test/CodeGen/ARM/zero-cycle-zero.ll | 58 +++++++++++------------------ 2 files changed, 26 insertions(+), 39 deletions(-) diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 7020ffb41b6..defef4ea907 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -5689,7 +5689,10 @@ def : NEONInstAlias<"vmov${p} $Vd, $Vm", // VMOV : Vector Move (Immediate) -let isReMaterializable = 1 in { +// Although VMOVs are not strictly speaking cheap, they are as expensive +// as their copies counterpart (VORR), so we should prefer rematerialization +// over splitting when it applies. +let isReMaterializable = 1, isAsCheapAsAMove=1 in { def VMOVv8i8 : N1ModImm<1, 0b000, 0b1110, 0, 0, 0, 1, (outs DPR:$Vd), (ins nImmSplatI8:$SIMM), IIC_VMOVImm, "vmov", "i8", "$Vd, $SIMM", "", @@ -5744,7 +5747,7 @@ def VMOVv4f32 : N1ModImm<1, 0b000, 0b1111, 0, 1, 0, 1, (outs QPR:$Vd), (ins nImmVMOVF32:$SIMM), IIC_VMOVImm, "vmov", "f32", "$Vd, $SIMM", "", [(set QPR:$Vd, (v4f32 (NEONvmovFPImm timm:$SIMM)))]>; -} // isReMaterializable +} // isReMaterializable, isAsCheapAsAMove // Add support for bytes replication feature, so it could be GAS compatible. // E.g. instructions below: diff --git a/test/CodeGen/ARM/zero-cycle-zero.ll b/test/CodeGen/ARM/zero-cycle-zero.ll index 121a87f5b84..4e8696f4418 100644 --- a/test/CodeGen/ARM/zero-cycle-zero.ll +++ b/test/CodeGen/ARM/zero-cycle-zero.ll @@ -1,26 +1,19 @@ -; RUN: llc -mtriple=armv8 -mcpu=cyclone < %s | FileCheck %s --check-prefix=CHECK-CYCLONE -; RUN: llc -mtriple=armv8 -mcpu=swift < %s | FileCheck %s --check-prefix=CHECK-SWIFT +; RUN: llc -mtriple=armv8 -mcpu=cyclone < %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-NOTSWIFT +; RUN: llc -mtriple=armv8 -mcpu=swift < %s | FileCheck %s --check-prefix=CHECK +; RUN: llc -mtriple=armv8 -mcpu=cortex-a57 < %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-NOTSWIFT declare arm_aapcs_vfpcc void @take_vec64(<2 x i32>) define void @test_vec64() { -; CHECK-CYCLONE-LABEL: test_vec64: -; CHECK-SWIFT-LABEL: test_vec64: +; CHECK-LABEL: test_vec64: call arm_aapcs_vfpcc void @take_vec64(<2 x i32> ) call arm_aapcs_vfpcc void @take_vec64(<2 x i32> ) -; CHECK-CYCLONE-NOT: vmov.f64 d0, -; CHECK-CYCLONE: vmov.i32 d0, #0 -; CHECK-CYCLONE: bl -; CHECK-CYCLONE: vmov.i32 d0, #0 -; CHECK-CYCLONE: bl - -; CHECK-SWIFT: vmov.f64 [[ZEROREG:d[0-9]+]], -; CHECK-SWIFT: vmov.i32 [[ZEROREG]], #0 -; CHECK-SWIFT: vorr d0, [[ZEROREG]], [[ZEROREG]] -; CHECK-SWIFT: bl -; CHECK-SWIFT: vorr d0, [[ZEROREG]], [[ZEROREG]] -; CHECK-SWIFT: bl +; CHECK-NOTSWIFT-NOT: vmov.f64 d0, +; CHECK: vmov.i32 d0, #0 +; CHECK: bl +; CHECK: vmov.i32 d0, #0 +; CHECK: bl ret void } @@ -28,23 +21,15 @@ define void @test_vec64() { declare arm_aapcs_vfpcc void @take_vec128(<8 x i16>) define void @test_vec128() { -; CHECK-CYCLONE-LABEL: test_vec128: -; CHECK-SWIFT-LABEL: test_vec128: +; CHECK-LABEL: test_vec128: call arm_aapcs_vfpcc void @take_vec128(<8 x i16> ) call arm_aapcs_vfpcc void @take_vec128(<8 x i16> ) -; CHECK-CYCLONE-NOT: vmov.f64 [[ZEROREG:d[0-9]+]], -; CHECK-CYCLONE: vmov.i32 q0, #0 -; CHECK-CYCLONE: bl -; CHECK-CYCLONE: vmov.i32 q0, #0 -; CHECK-CYCLONE: bl - -; CHECK-SWIFT-NOT: vmov.f64 [[ZEROREG:d[0-9]+]], -; CHECK-SWIFT: vmov.i32 [[ZEROREG:q[0-9]+]], #0 -; CHECK-SWIFT: vorr q0, [[ZEROREG]], [[ZEROREG]] -; CHECK-SWIFT: bl -; CHECK-SWIFT: vorr q0, [[ZEROREG]], [[ZEROREG]] -; CHECK-SWIFT: bl +; CHECK-NOT: vmov.f64 [[ZEROREG:d[0-9]+]], +; CHECK: vmov.i32 q0, #0 +; CHECK: bl +; CHECK: vmov.i32 q0, #0 +; CHECK: bl ret void } @@ -52,16 +37,15 @@ define void @test_vec128() { declare void @take_i32(i32) define void @test_i32() { -; CHECK-CYCLONE-LABEL: test_i32: -; CHECK-SWIFT-LABEL: test_i32: +; CHECK-LABEL: test_i32: call arm_aapcs_vfpcc void @take_i32(i32 0) call arm_aapcs_vfpcc void @take_i32(i32 0) -; CHECK-CYCLONE-NOT: vmov.f64 [[ZEROREG:d[0-9]+]], -; CHECK-CYCLONE: mov r0, #0 -; CHECK-CYCLONE: bl -; CHECK-CYCLONE: mov r0, #0 -; CHECK-CYCLONE: bl +; CHECK-NOTSWIFT-NOT: vmov.f64 [[ZEROREG:d[0-9]+]], +; CHECK: mov r0, #0 +; CHECK: bl +; CHECK: mov r0, #0 +; CHECK: bl ; It doesn't particularly matter what Swift does here, there isn't carefully ; crafted behaviour that we might break in Cyclone. -- 2.34.1