From 3614662adbe10c4e83338a23e020cd4874c92320 Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Wed, 29 Jul 2015 21:34:32 +0000 Subject: [PATCH] AArch64: use 32-bit MOV rather than UBFX to truncate registers. It's potentially more efficient on Cyclone, and from the optimization guides & schedulers looks like it has no effect on Cortex-A53 or A57. In general you'd expect a MOV to be about the most efficient instruction with its semantics, even though the official "UXTW" alias is really a UBFX. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@243576 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AArch64/AArch64InstrInfo.td | 6 +++--- .../AArch64/aarch64-dynamic-stack-layout.ll | 20 +++++++++---------- test/CodeGen/AArch64/arm64-aapcs.ll | 4 ++-- test/CodeGen/AArch64/arm64-popcnt.ll | 4 ++-- test/CodeGen/AArch64/bitfield.ll | 2 +- 5 files changed, 18 insertions(+), 18 deletions(-) diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td index 867b95b566a..48ac7b8f681 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.td +++ b/lib/Target/AArch64/AArch64InstrInfo.td @@ -5148,10 +5148,10 @@ def : Pat<(i64 (zext def32:$src)), (SUBREG_TO_REG (i64 0), GPR32:$src, sub_32)>; def : Pat<(i64 (anyext GPR32:$src)), (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$src, sub_32)>; -// When we need to explicitly zero-extend, we use an unsigned bitfield move -// instruction (UBFM) on the enclosing super-reg. +// When we need to explicitly zero-extend, we use a 32-bit MOV instruction and +// then assert the extension has happened. def : Pat<(i64 (zext GPR32:$src)), - (UBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$src, sub_32), 0, 31)>; + (SUBREG_TO_REG (i32 0), (ORRWrs WZR, GPR32:$src, 0), sub_32)>; // To sign extend, we use a signed bitfield move instruction (SBFM) on the // containing super-reg. diff --git a/test/CodeGen/AArch64/aarch64-dynamic-stack-layout.ll b/test/CodeGen/AArch64/aarch64-dynamic-stack-layout.ll index 739570236da..83b9d0a30ae 100644 --- a/test/CodeGen/AArch64/aarch64-dynamic-stack-layout.ll +++ b/test/CodeGen/AArch64/aarch64-dynamic-stack-layout.ll @@ -252,11 +252,11 @@ entry: ; CHECK: ldr w[[IARG:[0-9]+]], [x29, #24] ; CHECK: ldr d[[DARG:[0-9]+]], [x29, #40] ; Check correct reservation of 16-byte aligned VLA (size in w0) on stack -; CHECK: ubfx x9, x0, #0, #32 +; CHECK: mov w9, w0 +; CHECK: mov x10, sp ; CHECK: lsl x9, x9, #2 ; CHECK: add x9, x9, #15 ; CHECK: and x9, x9, #0x7fffffff0 -; CHECK: mov x10, sp ; CHECK: sub x[[VLASPTMP:[0-9]+]], x10, x9 ; CHECK: mov sp, x[[VLASPTMP]] ; Check correct access to local variable, through frame pointer @@ -299,11 +299,11 @@ entry: ; CHECK: ldr w[[IARG:[0-9]+]], [x29, #24] ; CHECK: ldr d[[DARG:[0-9]+]], [x29, #40] ; Check correct reservation of 16-byte aligned VLA (size in w0) on stack -; CHECK: ubfx x9, x0, #0, #32 +; CHECK: mov w9, w0 +; CHECK: mov x10, sp ; CHECK: lsl x9, x9, #2 ; CHECK: add x9, x9, #15 ; CHECK: and x9, x9, #0x7fffffff0 -; CHECK: mov x10, sp ; CHECK: sub x[[VLASPTMP:[0-9]+]], x10, x9 ; CHECK: mov sp, x[[VLASPTMP]] ; Check correct access to local variable, through frame pointer @@ -361,11 +361,11 @@ entry: ; CHECK: ldr d[[DARG:[0-9]+]], [x29, #40] ; Check correct reservation of 16-byte aligned VLA (size in w0) on stack ; and set-up of base pointer (x19). -; CHECK: ubfx x9, x0, #0, #32 +; CHECK: mov w9, w0 +; CHECK: mov x10, sp ; CHECK: lsl x9, x9, #2 ; CHECK: add x9, x9, #15 ; CHECK: and x9, x9, #0x7fffffff0 -; CHECK: mov x10, sp ; CHECK: sub x[[VLASPTMP:[0-9]+]], x10, x9 ; CHECK: mov sp, x[[VLASPTMP]] ; Check correct access to local variable, through base pointer @@ -414,11 +414,11 @@ entry: ; CHECK: ldr d[[DARG:[0-9]+]], [x29, #40] ; Check correct reservation of 16-byte aligned VLA (size in w0) on stack ; and set-up of base pointer (x19). -; CHECK: ubfx x9, x0, #0, #32 +; CHECK: mov w9, w0 +; CHECK: mov x10, sp ; CHECK: lsl x9, x9, #2 ; CHECK: add x9, x9, #15 ; CHECK: and x9, x9, #0x7fffffff0 -; CHECK: mov x10, sp ; CHECK: sub x[[VLASPTMP:[0-9]+]], x10, x9 ; CHECK: mov sp, x[[VLASPTMP]] ; Check correct access to local variable, through base pointer @@ -465,11 +465,11 @@ entry: ; CHECK: ldr d[[DARG:[0-9]+]], [x29, #40] ; Check correct reservation of 16-byte aligned VLA (size in w0) on stack ; and set-up of base pointer (x19). -; CHECK: ubfx x9, x0, #0, #32 +; CHECK: mov w9, w0 +; CHECK: mov x10, sp ; CHECK: lsl x9, x9, #2 ; CHECK: add x9, x9, #15 ; CHECK: and x9, x9, #0x7fffffff0 -; CHECK: mov x10, sp ; CHECK: sub x[[VLASPTMP:[0-9]+]], x10, x9 ; CHECK: mov sp, x[[VLASPTMP]] ; Check correct access to local variable, through base pointer diff --git a/test/CodeGen/AArch64/arm64-aapcs.ll b/test/CodeGen/AArch64/arm64-aapcs.ll index d0880cd4f3e..f345acf453d 100644 --- a/test/CodeGen/AArch64/arm64-aapcs.ll +++ b/test/CodeGen/AArch64/arm64-aapcs.ll @@ -78,8 +78,8 @@ define void @test_extension(i1 %bool, i8 %char, i16 %short, i32 %int) { %ext_int = zext i32 %int to i64 store volatile i64 %ext_int, i64* @var64 -; CHECK: ubfx [[EXT:x[0-9]+]], x3, #0, #32 -; CHECK: str [[EXT]], [{{x[0-9]+}}, :lo12:var64] +; CHECK: mov w[[EXT:[0-9]+]], w3 +; CHECK: str x[[EXT]], [{{x[0-9]+}}, :lo12:var64] ret void } diff --git a/test/CodeGen/AArch64/arm64-popcnt.ll b/test/CodeGen/AArch64/arm64-popcnt.ll index b0b529a13f4..d6c9471b7a1 100644 --- a/test/CodeGen/AArch64/arm64-popcnt.ll +++ b/test/CodeGen/AArch64/arm64-popcnt.ll @@ -4,8 +4,8 @@ define i32 @cnt32_advsimd(i32 %x) nounwind readnone { %cnt = tail call i32 @llvm.ctpop.i32(i32 %x) ret i32 %cnt -; CHECK: ubfx x{{[0-9]+}} -; CHECK: fmov d0, x{{[0-9]+}} +; CHECK: mov w[[IN64:[0-9]+]], w0 +; CHECK: fmov d0, x[[IN64]] ; CHECK: cnt.8b v0, v0 ; CHECK: uaddlv.8b h0, v0 ; CHECK: fmov w0, s0 diff --git a/test/CodeGen/AArch64/bitfield.ll b/test/CodeGen/AArch64/bitfield.ll index 78399c80b5d..e1e4f62f662 100644 --- a/test/CodeGen/AArch64/bitfield.ll +++ b/test/CodeGen/AArch64/bitfield.ll @@ -60,7 +60,7 @@ define void @test_extendw(i32 %var) { %uxt64 = zext i32 %var to i64 store volatile i64 %uxt64, i64* @var64 -; CHECK: ubfx {{x[0-9]+}}, {{x[0-9]+}}, #0, #32 +; CHECK: mov {{w[0-9]+}}, w0 ret void } -- 2.34.1