From: Matthew Simpson <mssimpso@codeaurora.org>
Date: Mon, 21 Dec 2015 18:31:25 +0000 (+0000)
Subject: [AArch64] Add additional extract-extend patterns for smov
X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=0ce5d69ee3fb6415ed625805b50ddedc2ee6964f;p=oota-llvm.git

[AArch64] Add additional extract-extend patterns for smov

This patch adds to the target description two additional patterns for matching
extract-extend operations to SMOV. The patterns catch the v16i8-to-i64 and
v8i16-to-i64 cases. The existing patterns miss these cases because the
extracted elements must first be legalized to i32, resulting in any_extend
nodes.

This was originally implemented as a DAG combine (r255895), but was reverted
due to failing out-of-tree tests.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@256176 91177308-0d34-0410-b5e6-96231b3b80d8
---

diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td
index 70a1f849f1a..d02bc9ff394 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/lib/Target/AArch64/AArch64InstrInfo.td
@@ -3806,6 +3806,13 @@ def : Pat<(sext_inreg (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx),i16),
 def : Pat<(sext (i32 (vector_extract (v4i32 V128:$Rn), VectorIndexS:$idx))),
           (i64 (SMOVvi32to64 V128:$Rn, VectorIndexS:$idx))>;
 
+def : Pat<(sext_inreg (i64 (anyext (i32 (vector_extract (v16i8 V128:$Rn),
+            VectorIndexB:$idx)))), i8),
+          (i64 (SMOVvi8to64 V128:$Rn, VectorIndexB:$idx))>;
+def : Pat<(sext_inreg (i64 (anyext (i32 (vector_extract (v8i16 V128:$Rn),
+            VectorIndexH:$idx)))), i16),
+          (i64 (SMOVvi16to64 V128:$Rn, VectorIndexH:$idx))>;
+
 // Extracting i8 or i16 elements will have the zero-extend transformed to
 // an 'and' mask by type legalization since neither i8 nor i16 are legal types
 // for AArch64. Match these patterns here since UMOV already zeroes out the high
diff --git a/test/CodeGen/AArch64/arm64-neon-copy.ll b/test/CodeGen/AArch64/arm64-neon-copy.ll
index b74a40626ce..83b1cac70f5 100644
--- a/test/CodeGen/AArch64/arm64-neon-copy.ll
+++ b/test/CodeGen/AArch64/arm64-neon-copy.ll
@@ -320,21 +320,20 @@ define i32 @smovw8h(<8 x i16> %tmp1) {
   ret i32 %tmp5
 }
 
-define i32 @smovx16b(<16 x i8> %tmp1) {
+define i64 @smovx16b(<16 x i8> %tmp1) {
 ; CHECK-LABEL: smovx16b:
-; CHECK: smov {{[xw][0-9]+}}, {{v[0-9]+}}.b[8]
+; CHECK: smov {{x[0-9]+}}, {{v[0-9]+}}.b[8]
   %tmp3 = extractelement <16 x i8> %tmp1, i32 8
-  %tmp4 = sext i8 %tmp3 to i32
-  %tmp5 = add i32 %tmp4, %tmp4
-  ret i32 %tmp5
+  %tmp4 = sext i8 %tmp3 to i64
+  ret i64 %tmp4
 }
 
-define i32 @smovx8h(<8 x i16> %tmp1) {
+define i64 @smovx8h(<8 x i16> %tmp1) {
 ; CHECK-LABEL: smovx8h:
-; CHECK: smov {{[xw][0-9]+}}, {{v[0-9]+}}.h[2]
+; CHECK: smov {{x[0-9]+}}, {{v[0-9]+}}.h[2]
   %tmp3 = extractelement <8 x i16> %tmp1, i32 2
-  %tmp4 = sext i16 %tmp3 to i32
-  ret i32 %tmp4
+  %tmp4 = sext i16 %tmp3 to i64
+  ret i64 %tmp4
 }
 
 define i64 @smovx4s(<4 x i32> %tmp1) {