From: Bob Wilson <bob.wilson@apple.com>
Date: Fri, 20 Jan 2012 20:59:56 +0000 (+0000)
Subject: ARM vector any_extends need to be selected to vmovl.  <rdar://problem/10723651>
X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=1e9ccd68d40c3d79b2f25f471553914d73bdee58;p=oota-llvm.git

ARM vector any_extends need to be selected to vmovl.  <rdar://problem/10723651>

We have patterns for vector sext and zext operations but were missing
anyext.  Without those patterns, codegen will fail when the selection DAG
has any_extend nodes.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@148568 91177308-0d34-0410-b5e6-96231b3b80d8
---

diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td
index 781d1583408..a2df5bde880 100644
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -5024,6 +5024,9 @@ defm VQMOVNsu : N2VNInt_HSD<0b11,0b11,0b10,0b00100,1,0, IIC_VQUNAiD,
 //   VMOVL    : Vector Lengthening Move
 defm VMOVLs   : N2VL_QHS<0b01,0b10100,0,1, "vmovl", "s", sext>;
 defm VMOVLu   : N2VL_QHS<0b11,0b10100,0,1, "vmovl", "u", zext>;
+def : Pat<(v8i16 (anyext (v8i8 DPR:$Vm))), (VMOVLuv8i16 DPR:$Vm)>;
+def : Pat<(v4i32 (anyext (v4i16 DPR:$Vm))), (VMOVLuv4i32 DPR:$Vm)>;
+def : Pat<(v2i64 (anyext (v2i32 DPR:$Vm))), (VMOVLuv2i64 DPR:$Vm)>;
 
 // Vector Conversions.
 
diff --git a/test/CodeGen/ARM/vmov.ll b/test/CodeGen/ARM/vmov.ll
index be95657915c..0c2387960b4 100644
--- a/test/CodeGen/ARM/vmov.ll
+++ b/test/CodeGen/ARM/vmov.ll
@@ -381,3 +381,20 @@ entry:
   store <4 x float> %b, <4 x float> *%p
   ret void
 }
+
+; Vector any_extends must be selected as either vmovl.u or vmovl.s.
+; rdar://10723651
+define void @any_extend(<4 x i1> %x, <4 x i32> %y) nounwind ssp {
+entry:
+;CHECK: any_extend
+;CHECK: vmovl
+  %and.i186 = zext <4 x i1> %x to <4 x i32>
+  %add.i185 = sub <4 x i32> %and.i186, %y
+  %sub.i = sub <4 x i32> %add.i185, zeroinitializer
+  %add.i = add <4 x i32> %sub.i, zeroinitializer
+  %vmovn.i = trunc <4 x i32> %add.i to <4 x i16>
+  tail call void @llvm.arm.neon.vst1.v4i16(i8* undef, <4 x i16> %vmovn.i, i32 2)
+  unreachable
+}
+
+declare void @llvm.arm.neon.vst1.v4i16(i8*, <4 x i16>, i32) nounwind