From: Bob Wilson Date: Fri, 20 Jan 2012 20:59:56 +0000 (+0000) Subject: ARM vector any_extends need to be selected to vmovl. X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=1e9ccd68d40c3d79b2f25f471553914d73bdee58;p=oota-llvm.git ARM vector any_extends need to be selected to vmovl. We have patterns for vector sext and zext operations but were missing anyext. Without those patterns, codegen will fail when the selection DAG has any_extend nodes. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@148568 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 781d1583408..a2df5bde880 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -5024,6 +5024,9 @@ defm VQMOVNsu : N2VNInt_HSD<0b11,0b11,0b10,0b00100,1,0, IIC_VQUNAiD, // VMOVL : Vector Lengthening Move defm VMOVLs : N2VL_QHS<0b01,0b10100,0,1, "vmovl", "s", sext>; defm VMOVLu : N2VL_QHS<0b11,0b10100,0,1, "vmovl", "u", zext>; +def : Pat<(v8i16 (anyext (v8i8 DPR:$Vm))), (VMOVLuv8i16 DPR:$Vm)>; +def : Pat<(v4i32 (anyext (v4i16 DPR:$Vm))), (VMOVLuv4i32 DPR:$Vm)>; +def : Pat<(v2i64 (anyext (v2i32 DPR:$Vm))), (VMOVLuv2i64 DPR:$Vm)>; // Vector Conversions. diff --git a/test/CodeGen/ARM/vmov.ll b/test/CodeGen/ARM/vmov.ll index be95657915c..0c2387960b4 100644 --- a/test/CodeGen/ARM/vmov.ll +++ b/test/CodeGen/ARM/vmov.ll @@ -381,3 +381,20 @@ entry: store <4 x float> %b, <4 x float> *%p ret void } + +; Vector any_extends must be selected as either vmovl.u or vmovl.s. +; rdar://10723651 +define void @any_extend(<4 x i1> %x, <4 x i32> %y) nounwind ssp { +entry: +;CHECK: any_extend +;CHECK: vmovl + %and.i186 = zext <4 x i1> %x to <4 x i32> + %add.i185 = sub <4 x i32> %and.i186, %y + %sub.i = sub <4 x i32> %add.i185, zeroinitializer + %add.i = add <4 x i32> %sub.i, zeroinitializer + %vmovn.i = trunc <4 x i32> %add.i to <4 x i16> + tail call void @llvm.arm.neon.vst1.v4i16(i8* undef, <4 x i16> %vmovn.i, i32 2) + unreachable +} + +declare void @llvm.arm.neon.vst1.v4i16(i8*, <4 x i16>, i32) nounwind