[AArch64] Prefer DUP/MOV ("CPY") to INS for vector_extract.

author Ahmed Bougacha <ahmed.bougacha@gmail.com>

Mon, 2 Feb 2015 17:55:57 +0000 (17:55 +0000)

committer Ahmed Bougacha <ahmed.bougacha@gmail.com>

Mon, 2 Feb 2015 17:55:57 +0000 (17:55 +0000)
author Ahmed Bougacha <ahmed.bougacha@gmail.com>
Mon, 2 Feb 2015 17:55:57 +0000 (17:55 +0000)
committer Ahmed Bougacha <ahmed.bougacha@gmail.com>
Mon, 2 Feb 2015 17:55:57 +0000 (17:55 +0000)
diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td

index e0fb90a9f6210c8809e747a7d382c87660646938..6e4c0b006f28f8c37be886c1d560254a8b568e36 100644 (file)
--- a/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/lib/Target/AArch64/AArch64InstrInfo.td
@@ -3716,29 +3716,21 @@ defm : Neon_INS_elt_pattern<v2i64, v1i64, i64, INSvi32lane>;
  
  
  // Floating point vector extractions are codegen'd as either a sequence of
-// subregister extractions, possibly fed by an INS if the lane number is
-// anything other than zero.
+// subregister extractions, or a MOV (aka CPY here, alias for DUP) if
+// the lane number is anything other than zero.
  def : Pat<(vector_extract (v2f64 V128:$Rn), 0),
            (f64 (EXTRACT_SUBREG V128:$Rn, dsub))>;
  def : Pat<(vector_extract (v4f32 V128:$Rn), 0),
            (f32 (EXTRACT_SUBREG V128:$Rn, ssub))>;
  def : Pat<(vector_extract (v8f16 V128:$Rn), 0),
            (f16 (EXTRACT_SUBREG V128:$Rn, hsub))>;
+
  def : Pat<(vector_extract (v2f64 V128:$Rn), VectorIndexD:$idx),
-          (f64 (EXTRACT_SUBREG
-            (INSvi64lane (v2f64 (IMPLICIT_DEF)), 0,
-                         V128:$Rn, VectorIndexD:$idx),
-            dsub))>;
+          (f64 (CPYi64 V128:$Rn, VectorIndexD:$idx))>;
  def : Pat<(vector_extract (v4f32 V128:$Rn), VectorIndexS:$idx),
-          (f32 (EXTRACT_SUBREG
-            (INSvi32lane (v4f32 (IMPLICIT_DEF)), 0,
-                         V128:$Rn, VectorIndexS:$idx),
-            ssub))>;
+          (f32 (CPYi32 V128:$Rn, VectorIndexS:$idx))>;
  def : Pat<(vector_extract (v8f16 V128:$Rn), VectorIndexH:$idx),
-          (f16 (EXTRACT_SUBREG
-            (INSvi16lane (v8f16 (IMPLICIT_DEF)), 0,
-                         V128:$Rn, VectorIndexH:$idx),
-            hsub))>;
+          (f16 (CPYi16 V128:$Rn, VectorIndexH:$idx))>;
  
  // All concat_vectors operations are canonicalised to act on i64 vectors for
  // AArch64. In the general case we need an instruction, which had just as well be
diff --git a/test/CodeGen/AArch64/arm64-fixed-point-scalar-cvt-dagcombine.ll b/test/CodeGen/AArch64/arm64-fixed-point-scalar-cvt-dagcombine.ll

index e51c38b2b95e876e02845404c6b6dcf2e2a005ce..e41e19e50eea1f13e4cc24e098b3e89a9cbf4cb7 100644 (file)
--- a/test/CodeGen/AArch64/arm64-fixed-point-scalar-cvt-dagcombine.ll
+++ b/test/CodeGen/AArch64/arm64-fixed-point-scalar-cvt-dagcombine.ll
@@ -6,7 +6,7 @@
  ; rdar://11855286
  define double @foo0(<2 x i64> %a) nounwind {
  ; CHECK:  scvtf.2d  [[REG:v[0-9]+]], v0, #9
-; CHECK-NEXT:  ins.d v0[0], [[REG]][1]
+; CHECK-NEXT:  mov  d0, [[REG]][1]
    %vecext = extractelement <2 x i64> %a, i32 1
    %fcvt_n = tail call double @llvm.aarch64.neon.vcvtfxs2fp.f64.i64(i64 %vecext, i32 9)
    ret double %fcvt_n
diff --git a/test/CodeGen/AArch64/arm64-neon-copy.ll b/test/CodeGen/AArch64/arm64-neon-copy.ll

index 1cfba826d510fd13e4a9d9270ea1f8ffd8af5bc4..4a92c3d49c159c1679a65adce3fc9ec2c47f58e3 100644 (file)
--- a/test/CodeGen/AArch64/arm64-neon-copy.ll
+++ b/test/CodeGen/AArch64/arm64-neon-copy.ll
@@ -188,7 +188,7 @@ define <2 x float> @ins4f2(<4 x float> %tmp1, <2 x float> %tmp2) {
  
  define <1 x double> @ins2f1(<2 x double> %tmp1, <1 x double> %tmp2) {
  ; CHECK-LABEL: ins2f1:
-; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
+; CHECK: mov {{d[0-9]+}}, {{v[0-9]+}}.d[1]
    %tmp3 = extractelement <2 x double> %tmp1, i32 1
    %tmp4 = insertelement <1 x double> %tmp2, double %tmp3, i32 0
    ret <1 x double> %tmp4
diff --git a/test/CodeGen/AArch64/fp16-v8-instructions.ll b/test/CodeGen/AArch64/fp16-v8-instructions.ll

index 9ee2296ace83aa99011b8afca636946f10a3f88e..b75f16053354ee5c48e73cc63072a5e5f1b71385 100644 (file)
--- a/test/CodeGen/AArch64/fp16-v8-instructions.ll
+++ b/test/CodeGen/AArch64/fp16-v8-instructions.ll
@@ -188,10 +188,10 @@ define <8 x half> @s_to_h(<8 x float> %a) {
  
  define <8 x half> @d_to_h(<8 x double> %a) {
  ; CHECK-LABEL: d_to_h:
-; CHECK-DAG: ins v{{[0-9]+}}.d
-; CHECK-DAG: ins v{{[0-9]+}}.d
-; CHECK-DAG: ins v{{[0-9]+}}.d
-; CHECK-DAG: ins v{{[0-9]+}}.d
+; CHECK-DAG: mov d{{[0-9]+}}, v{{[0-9]+}}.d[1]
+; CHECK-DAG: mov d{{[0-9]+}}, v{{[0-9]+}}.d[1]
+; CHECK-DAG: mov d{{[0-9]+}}, v{{[0-9]+}}.d[1]
+; CHECK-DAG: mov d{{[0-9]+}}, v{{[0-9]+}}.d[1]
  ; CHECK-DAG: fcvt h
  ; CHECK-DAG: fcvt h
  ; CHECK-DAG: fcvt h
diff --git a/test/CodeGen/AArch64/neon-scalar-copy.ll b/test/CodeGen/AArch64/neon-scalar-copy.ll

index cd6aead1b50357b907e5fcb16fc141ade897f544..3f770600ac5929fe2f7a66cf03de1c91e843af4d 100644 (file)
--- a/test/CodeGen/AArch64/neon-scalar-copy.ll
+++ b/test/CodeGen/AArch64/neon-scalar-copy.ll
@@ -2,7 +2,7 @@
  
  define float @test_dup_sv2S(<2 x float> %v) #0 {
   ; CHECK-LABEL: test_dup_sv2S:
- ; CHECK-NEXT: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+ ; CHECK-NEXT: mov s{{[0-9]+}}, {{v[0-9]+}}.s[1]
   ; CHECK-NEXT: ret
   %tmp1 = extractelement <2 x float> %v, i32 1
   ret float  %tmp1
@@ -19,7 +19,7 @@ define float @test_dup_sv2S_0(<2 x float> %v) #0 {
  
  define float @test_dup_sv4S(<4 x float> %v) #0 {
   ; CHECK-LABEL: test_dup_sv4S:
- ; CHECK-NEXT: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+ ; CHECK-NEXT: mov s{{[0-9]+}}, {{v[0-9]+}}.s[1]
   ; CHECK-NEXT: ret
   %tmp1 = extractelement <4 x float> %v, i32 1
   ret float  %tmp1
@@ -45,7 +45,7 @@ define double @test_dup_dvD(<1 x double> %v) #0 {
  
  define double @test_dup_dv2D(<2 x double> %v) #0 {
   ; CHECK-LABEL: test_dup_dv2D:
- ; CHECK-NEXT: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
+ ; CHECK-NEXT: mov d{{[0-9]+}}, {{v[0-9]+}}.d[1]
   ; CHECK-NEXT: ret
   %tmp1 = extractelement <2 x double> %v, i32 1
   ret double  %tmp1
@@ -62,7 +62,7 @@ define double @test_dup_dv2D_0(<2 x double> %v) #0 {
  
  define half @test_dup_hv8H(<8 x half> %v) #0 {
   ; CHECK-LABEL: test_dup_hv8H:
- ; CHECK-NEXT: ins {{v[0-9]+}}.h[0], {{v[0-9]+}}.h[1]
+ ; CHECK-NEXT: mov h{{[0-9]+}}, {{v[0-9]+}}.h[1]
   ; CHECK-NEXT: ret
   %tmp1 = extractelement <8 x half> %v, i32 1
   ret half  %tmp1
author	Ahmed Bougacha <ahmed.bougacha@gmail.com>
	Mon, 2 Feb 2015 17:55:57 +0000 (17:55 +0000)
committer	Ahmed Bougacha <ahmed.bougacha@gmail.com>
	Mon, 2 Feb 2015 17:55:57 +0000 (17:55 +0000)
lib/Target/AArch64/AArch64InstrInfo.td		patch \| blob \| history
test/CodeGen/AArch64/arm64-fixed-point-scalar-cvt-dagcombine.ll		patch \| blob \| history
test/CodeGen/AArch64/arm64-neon-copy.ll		patch \| blob \| history
test/CodeGen/AArch64/fp16-v8-instructions.ll		patch \| blob \| history
test/CodeGen/AArch64/neon-scalar-copy.ll		patch \| blob \| history