From 0f1a21bcb8c51df494a58f519df9fc901bff585d Mon Sep 17 00:00:00 2001 From: Ahmed Bougacha Date: Mon, 2 Feb 2015 17:55:57 +0000 Subject: [PATCH] [AArch64] Prefer DUP/MOV ("CPY") to INS for vector_extract. This avoids a partial false dependency on the previous content of the upper lanes of the destination vector register. Differential Revision: http://reviews.llvm.org/D7307 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@227820 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AArch64/AArch64InstrInfo.td | 20 ++++++------------- ...arm64-fixed-point-scalar-cvt-dagcombine.ll | 2 +- test/CodeGen/AArch64/arm64-neon-copy.ll | 2 +- test/CodeGen/AArch64/fp16-v8-instructions.ll | 8 ++++---- test/CodeGen/AArch64/neon-scalar-copy.ll | 8 ++++---- 5 files changed, 16 insertions(+), 24 deletions(-) diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td index e0fb90a9f62..6e4c0b006f2 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.td +++ b/lib/Target/AArch64/AArch64InstrInfo.td @@ -3716,29 +3716,21 @@ defm : Neon_INS_elt_pattern; // Floating point vector extractions are codegen'd as either a sequence of -// subregister extractions, possibly fed by an INS if the lane number is -// anything other than zero. +// subregister extractions, or a MOV (aka CPY here, alias for DUP) if +// the lane number is anything other than zero. def : Pat<(vector_extract (v2f64 V128:$Rn), 0), (f64 (EXTRACT_SUBREG V128:$Rn, dsub))>; def : Pat<(vector_extract (v4f32 V128:$Rn), 0), (f32 (EXTRACT_SUBREG V128:$Rn, ssub))>; def : Pat<(vector_extract (v8f16 V128:$Rn), 0), (f16 (EXTRACT_SUBREG V128:$Rn, hsub))>; + def : Pat<(vector_extract (v2f64 V128:$Rn), VectorIndexD:$idx), - (f64 (EXTRACT_SUBREG - (INSvi64lane (v2f64 (IMPLICIT_DEF)), 0, - V128:$Rn, VectorIndexD:$idx), - dsub))>; + (f64 (CPYi64 V128:$Rn, VectorIndexD:$idx))>; def : Pat<(vector_extract (v4f32 V128:$Rn), VectorIndexS:$idx), - (f32 (EXTRACT_SUBREG - (INSvi32lane (v4f32 (IMPLICIT_DEF)), 0, - V128:$Rn, VectorIndexS:$idx), - ssub))>; + (f32 (CPYi32 V128:$Rn, VectorIndexS:$idx))>; def : Pat<(vector_extract (v8f16 V128:$Rn), VectorIndexH:$idx), - (f16 (EXTRACT_SUBREG - (INSvi16lane (v8f16 (IMPLICIT_DEF)), 0, - V128:$Rn, VectorIndexH:$idx), - hsub))>; + (f16 (CPYi16 V128:$Rn, VectorIndexH:$idx))>; // All concat_vectors operations are canonicalised to act on i64 vectors for // AArch64. In the general case we need an instruction, which had just as well be diff --git a/test/CodeGen/AArch64/arm64-fixed-point-scalar-cvt-dagcombine.ll b/test/CodeGen/AArch64/arm64-fixed-point-scalar-cvt-dagcombine.ll index e51c38b2b95..e41e19e50ee 100644 --- a/test/CodeGen/AArch64/arm64-fixed-point-scalar-cvt-dagcombine.ll +++ b/test/CodeGen/AArch64/arm64-fixed-point-scalar-cvt-dagcombine.ll @@ -6,7 +6,7 @@ ; rdar://11855286 define double @foo0(<2 x i64> %a) nounwind { ; CHECK: scvtf.2d [[REG:v[0-9]+]], v0, #9 -; CHECK-NEXT: ins.d v0[0], [[REG]][1] +; CHECK-NEXT: mov d0, [[REG]][1] %vecext = extractelement <2 x i64> %a, i32 1 %fcvt_n = tail call double @llvm.aarch64.neon.vcvtfxs2fp.f64.i64(i64 %vecext, i32 9) ret double %fcvt_n diff --git a/test/CodeGen/AArch64/arm64-neon-copy.ll b/test/CodeGen/AArch64/arm64-neon-copy.ll index 1cfba826d51..4a92c3d49c1 100644 --- a/test/CodeGen/AArch64/arm64-neon-copy.ll +++ b/test/CodeGen/AArch64/arm64-neon-copy.ll @@ -188,7 +188,7 @@ define <2 x float> @ins4f2(<4 x float> %tmp1, <2 x float> %tmp2) { define <1 x double> @ins2f1(<2 x double> %tmp1, <1 x double> %tmp2) { ; CHECK-LABEL: ins2f1: -; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1] +; CHECK: mov {{d[0-9]+}}, {{v[0-9]+}}.d[1] %tmp3 = extractelement <2 x double> %tmp1, i32 1 %tmp4 = insertelement <1 x double> %tmp2, double %tmp3, i32 0 ret <1 x double> %tmp4 diff --git a/test/CodeGen/AArch64/fp16-v8-instructions.ll b/test/CodeGen/AArch64/fp16-v8-instructions.ll index 9ee2296ace8..b75f1605335 100644 --- a/test/CodeGen/AArch64/fp16-v8-instructions.ll +++ b/test/CodeGen/AArch64/fp16-v8-instructions.ll @@ -188,10 +188,10 @@ define <8 x half> @s_to_h(<8 x float> %a) { define <8 x half> @d_to_h(<8 x double> %a) { ; CHECK-LABEL: d_to_h: -; CHECK-DAG: ins v{{[0-9]+}}.d -; CHECK-DAG: ins v{{[0-9]+}}.d -; CHECK-DAG: ins v{{[0-9]+}}.d -; CHECK-DAG: ins v{{[0-9]+}}.d +; CHECK-DAG: mov d{{[0-9]+}}, v{{[0-9]+}}.d[1] +; CHECK-DAG: mov d{{[0-9]+}}, v{{[0-9]+}}.d[1] +; CHECK-DAG: mov d{{[0-9]+}}, v{{[0-9]+}}.d[1] +; CHECK-DAG: mov d{{[0-9]+}}, v{{[0-9]+}}.d[1] ; CHECK-DAG: fcvt h ; CHECK-DAG: fcvt h ; CHECK-DAG: fcvt h diff --git a/test/CodeGen/AArch64/neon-scalar-copy.ll b/test/CodeGen/AArch64/neon-scalar-copy.ll index cd6aead1b50..3f770600ac5 100644 --- a/test/CodeGen/AArch64/neon-scalar-copy.ll +++ b/test/CodeGen/AArch64/neon-scalar-copy.ll @@ -2,7 +2,7 @@ define float @test_dup_sv2S(<2 x float> %v) #0 { ; CHECK-LABEL: test_dup_sv2S: - ; CHECK-NEXT: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] + ; CHECK-NEXT: mov s{{[0-9]+}}, {{v[0-9]+}}.s[1] ; CHECK-NEXT: ret %tmp1 = extractelement <2 x float> %v, i32 1 ret float %tmp1 @@ -19,7 +19,7 @@ define float @test_dup_sv2S_0(<2 x float> %v) #0 { define float @test_dup_sv4S(<4 x float> %v) #0 { ; CHECK-LABEL: test_dup_sv4S: - ; CHECK-NEXT: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] + ; CHECK-NEXT: mov s{{[0-9]+}}, {{v[0-9]+}}.s[1] ; CHECK-NEXT: ret %tmp1 = extractelement <4 x float> %v, i32 1 ret float %tmp1 @@ -45,7 +45,7 @@ define double @test_dup_dvD(<1 x double> %v) #0 { define double @test_dup_dv2D(<2 x double> %v) #0 { ; CHECK-LABEL: test_dup_dv2D: - ; CHECK-NEXT: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1] + ; CHECK-NEXT: mov d{{[0-9]+}}, {{v[0-9]+}}.d[1] ; CHECK-NEXT: ret %tmp1 = extractelement <2 x double> %v, i32 1 ret double %tmp1 @@ -62,7 +62,7 @@ define double @test_dup_dv2D_0(<2 x double> %v) #0 { define half @test_dup_hv8H(<8 x half> %v) #0 { ; CHECK-LABEL: test_dup_hv8H: - ; CHECK-NEXT: ins {{v[0-9]+}}.h[0], {{v[0-9]+}}.h[1] + ; CHECK-NEXT: mov h{{[0-9]+}}, {{v[0-9]+}}.h[1] ; CHECK-NEXT: ret %tmp1 = extractelement <8 x half> %v, i32 1 ret half %tmp1 -- 2.34.1