From: Bob Wilson Date: Fri, 9 Oct 2009 05:14:48 +0000 (+0000) Subject: Convert more NEON tests to use FileCheck. X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=0305dd745e1dc1b1a6108d3a1ca2d48430a31497;p=oota-llvm.git Convert more NEON tests to use FileCheck. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@83616 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/test/CodeGen/ARM/vset_lane.ll b/test/CodeGen/ARM/vset_lane.ll index bb20dede36d..65d246db35f 100644 --- a/test/CodeGen/ARM/vset_lane.ll +++ b/test/CodeGen/ARM/vset_lane.ll @@ -1,46 +1,57 @@ -; RUN: llc < %s -march=arm -mattr=+neon > %t -; RUN: grep {vmov\\.8} %t | count 2 -; RUN: grep {vmov\\.16} %t | count 2 -; RUN: grep {vmov\\.32} %t | count 2 -; RUN: grep {fcpys} %t | count 2 +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s define <8 x i8> @vset_lane8(<8 x i8>* %A, i8 %B) nounwind { +;CHECK: vset_lane8: +;CHECK: vmov.8 %tmp1 = load <8 x i8>* %A %tmp2 = insertelement <8 x i8> %tmp1, i8 %B, i32 1 ret <8 x i8> %tmp2 } define <4 x i16> @vset_lane16(<4 x i16>* %A, i16 %B) nounwind { +;CHECK: vset_lane16: +;CHECK: vmov.16 %tmp1 = load <4 x i16>* %A %tmp2 = insertelement <4 x i16> %tmp1, i16 %B, i32 1 ret <4 x i16> %tmp2 } define <2 x i32> @vset_lane32(<2 x i32>* %A, i32 %B) nounwind { +;CHECK: vset_lane32: +;CHECK: vmov.32 %tmp1 = load <2 x i32>* %A %tmp2 = insertelement <2 x i32> %tmp1, i32 %B, i32 1 ret <2 x i32> %tmp2 } define <16 x i8> @vsetQ_lane8(<16 x i8>* %A, i8 %B) nounwind { +;CHECK: vsetQ_lane8: +;CHECK: vmov.8 %tmp1 = load <16 x i8>* %A %tmp2 = insertelement <16 x i8> %tmp1, i8 %B, i32 1 ret <16 x i8> %tmp2 } define <8 x i16> @vsetQ_lane16(<8 x i16>* %A, i16 %B) nounwind { +;CHECK: vsetQ_lane16: +;CHECK: vmov.16 %tmp1 = load <8 x i16>* %A %tmp2 = insertelement <8 x i16> %tmp1, i16 %B, i32 1 ret <8 x i16> %tmp2 } define <4 x i32> @vsetQ_lane32(<4 x i32>* %A, i32 %B) nounwind { +;CHECK: vsetQ_lane32: +;CHECK: vmov.32 %tmp1 = load <4 x i32>* %A %tmp2 = insertelement <4 x i32> %tmp1, i32 %B, i32 1 ret <4 x i32> %tmp2 } define arm_aapcs_vfpcc <2 x float> @test_vset_lanef32(float %arg0_float32_t, <2 x float> %arg1_float32x2_t) nounwind { +;CHECK: test_vset_lanef32: +;CHECK: fcpys +;CHECK: fcpys entry: %0 = insertelement <2 x float> %arg1_float32x2_t, float %arg0_float32_t, i32 1 ; <<2 x float>> [#uses=1] ret <2 x float> %0 diff --git a/test/CodeGen/ARM/vshift.ll b/test/CodeGen/ARM/vshift.ll index 346d7e28f47..996858f4cf5 100644 --- a/test/CodeGen/ARM/vshift.ll +++ b/test/CodeGen/ARM/vshift.ll @@ -1,30 +1,8 @@ -; RUN: llc < %s -march=arm -mattr=+neon > %t -; RUN: grep {vshl\\.s8} %t | count 2 -; RUN: grep {vshl\\.s16} %t | count 2 -; RUN: grep {vshl\\.s32} %t | count 2 -; RUN: grep {vshl\\.s64} %t | count 2 -; RUN: grep {vshl\\.u8} %t | count 4 -; RUN: grep {vshl\\.u16} %t | count 4 -; RUN: grep {vshl\\.u32} %t | count 4 -; RUN: grep {vshl\\.u64} %t | count 4 -; RUN: grep {vshl\\.i8} %t | count 2 -; RUN: grep {vshl\\.i16} %t | count 2 -; RUN: grep {vshl\\.i32} %t | count 2 -; RUN: grep {vshl\\.i64} %t | count 2 -; RUN: grep {vshr\\.u8} %t | count 2 -; RUN: grep {vshr\\.u16} %t | count 2 -; RUN: grep {vshr\\.u32} %t | count 2 -; RUN: grep {vshr\\.u64} %t | count 2 -; RUN: grep {vshr\\.s8} %t | count 2 -; RUN: grep {vshr\\.s16} %t | count 2 -; RUN: grep {vshr\\.s32} %t | count 2 -; RUN: grep {vshr\\.s64} %t | count 2 -; RUN: grep {vneg\\.s8} %t | count 4 -; RUN: grep {vneg\\.s16} %t | count 4 -; RUN: grep {vneg\\.s32} %t | count 4 -; RUN: grep {vsub\\.i64} %t | count 4 +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s define <8 x i8> @vshls8(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: vshls8: +;CHECK: vshl.u8 %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B %tmp3 = shl <8 x i8> %tmp1, %tmp2 @@ -32,6 +10,8 @@ define <8 x i8> @vshls8(<8 x i8>* %A, <8 x i8>* %B) nounwind { } define <4 x i16> @vshls16(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK: vshls16: +;CHECK: vshl.u16 %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B %tmp3 = shl <4 x i16> %tmp1, %tmp2 @@ -39,6 +19,8 @@ define <4 x i16> @vshls16(<4 x i16>* %A, <4 x i16>* %B) nounwind { } define <2 x i32> @vshls32(<2 x i32>* %A, <2 x i32>* %B) nounwind { +;CHECK: vshls32: +;CHECK: vshl.u32 %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B %tmp3 = shl <2 x i32> %tmp1, %tmp2 @@ -46,6 +28,8 @@ define <2 x i32> @vshls32(<2 x i32>* %A, <2 x i32>* %B) nounwind { } define <1 x i64> @vshls64(<1 x i64>* %A, <1 x i64>* %B) nounwind { +;CHECK: vshls64: +;CHECK: vshl.u64 %tmp1 = load <1 x i64>* %A %tmp2 = load <1 x i64>* %B %tmp3 = shl <1 x i64> %tmp1, %tmp2 @@ -53,30 +37,40 @@ define <1 x i64> @vshls64(<1 x i64>* %A, <1 x i64>* %B) nounwind { } define <8 x i8> @vshli8(<8 x i8>* %A) nounwind { +;CHECK: vshli8: +;CHECK: vshl.i8 %tmp1 = load <8 x i8>* %A %tmp2 = shl <8 x i8> %tmp1, < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 > ret <8 x i8> %tmp2 } define <4 x i16> @vshli16(<4 x i16>* %A) nounwind { +;CHECK: vshli16: +;CHECK: vshl.i16 %tmp1 = load <4 x i16>* %A %tmp2 = shl <4 x i16> %tmp1, < i16 15, i16 15, i16 15, i16 15 > ret <4 x i16> %tmp2 } define <2 x i32> @vshli32(<2 x i32>* %A) nounwind { +;CHECK: vshli32: +;CHECK: vshl.i32 %tmp1 = load <2 x i32>* %A %tmp2 = shl <2 x i32> %tmp1, < i32 31, i32 31 > ret <2 x i32> %tmp2 } define <1 x i64> @vshli64(<1 x i64>* %A) nounwind { +;CHECK: vshli64: +;CHECK: vshl.i64 %tmp1 = load <1 x i64>* %A %tmp2 = shl <1 x i64> %tmp1, < i64 63 > ret <1 x i64> %tmp2 } define <16 x i8> @vshlQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind { +;CHECK: vshlQs8: +;CHECK: vshl.u8 %tmp1 = load <16 x i8>* %A %tmp2 = load <16 x i8>* %B %tmp3 = shl <16 x i8> %tmp1, %tmp2 @@ -84,6 +78,8 @@ define <16 x i8> @vshlQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind { } define <8 x i16> @vshlQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind { +;CHECK: vshlQs16: +;CHECK: vshl.u16 %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i16>* %B %tmp3 = shl <8 x i16> %tmp1, %tmp2 @@ -91,6 +87,8 @@ define <8 x i16> @vshlQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind { } define <4 x i32> @vshlQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind { +;CHECK: vshlQs32: +;CHECK: vshl.u32 %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i32>* %B %tmp3 = shl <4 x i32> %tmp1, %tmp2 @@ -98,6 +96,8 @@ define <4 x i32> @vshlQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind { } define <2 x i64> @vshlQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind { +;CHECK: vshlQs64: +;CHECK: vshl.u64 %tmp1 = load <2 x i64>* %A %tmp2 = load <2 x i64>* %B %tmp3 = shl <2 x i64> %tmp1, %tmp2 @@ -105,30 +105,41 @@ define <2 x i64> @vshlQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind { } define <16 x i8> @vshlQi8(<16 x i8>* %A) nounwind { +;CHECK: vshlQi8: +;CHECK: vshl.i8 %tmp1 = load <16 x i8>* %A %tmp2 = shl <16 x i8> %tmp1, < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 > ret <16 x i8> %tmp2 } define <8 x i16> @vshlQi16(<8 x i16>* %A) nounwind { +;CHECK: vshlQi16: +;CHECK: vshl.i16 %tmp1 = load <8 x i16>* %A %tmp2 = shl <8 x i16> %tmp1, < i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15 > ret <8 x i16> %tmp2 } define <4 x i32> @vshlQi32(<4 x i32>* %A) nounwind { +;CHECK: vshlQi32: +;CHECK: vshl.i32 %tmp1 = load <4 x i32>* %A %tmp2 = shl <4 x i32> %tmp1, < i32 31, i32 31, i32 31, i32 31 > ret <4 x i32> %tmp2 } define <2 x i64> @vshlQi64(<2 x i64>* %A) nounwind { +;CHECK: vshlQi64: +;CHECK: vshl.i64 %tmp1 = load <2 x i64>* %A %tmp2 = shl <2 x i64> %tmp1, < i64 63, i64 63 > ret <2 x i64> %tmp2 } define <8 x i8> @vlshru8(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: vlshru8: +;CHECK: vneg.s8 +;CHECK: vshl.u8 %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B %tmp3 = lshr <8 x i8> %tmp1, %tmp2 @@ -136,6 +147,9 @@ define <8 x i8> @vlshru8(<8 x i8>* %A, <8 x i8>* %B) nounwind { } define <4 x i16> @vlshru16(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK: vlshru16: +;CHECK: vneg.s16 +;CHECK: vshl.u16 %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B %tmp3 = lshr <4 x i16> %tmp1, %tmp2 @@ -143,6 +157,9 @@ define <4 x i16> @vlshru16(<4 x i16>* %A, <4 x i16>* %B) nounwind { } define <2 x i32> @vlshru32(<2 x i32>* %A, <2 x i32>* %B) nounwind { +;CHECK: vlshru32: +;CHECK: vneg.s32 +;CHECK: vshl.u32 %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B %tmp3 = lshr <2 x i32> %tmp1, %tmp2 @@ -150,6 +167,9 @@ define <2 x i32> @vlshru32(<2 x i32>* %A, <2 x i32>* %B) nounwind { } define <1 x i64> @vlshru64(<1 x i64>* %A, <1 x i64>* %B) nounwind { +;CHECK: vlshru64: +;CHECK: vsub.i64 +;CHECK: vshl.u64 %tmp1 = load <1 x i64>* %A %tmp2 = load <1 x i64>* %B %tmp3 = lshr <1 x i64> %tmp1, %tmp2 @@ -157,30 +177,41 @@ define <1 x i64> @vlshru64(<1 x i64>* %A, <1 x i64>* %B) nounwind { } define <8 x i8> @vlshri8(<8 x i8>* %A) nounwind { +;CHECK: vlshri8: +;CHECK: vshr.u8 %tmp1 = load <8 x i8>* %A %tmp2 = lshr <8 x i8> %tmp1, < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 > ret <8 x i8> %tmp2 } define <4 x i16> @vlshri16(<4 x i16>* %A) nounwind { +;CHECK: vlshri16: +;CHECK: vshr.u16 %tmp1 = load <4 x i16>* %A %tmp2 = lshr <4 x i16> %tmp1, < i16 16, i16 16, i16 16, i16 16 > ret <4 x i16> %tmp2 } define <2 x i32> @vlshri32(<2 x i32>* %A) nounwind { +;CHECK: vlshri32: +;CHECK: vshr.u32 %tmp1 = load <2 x i32>* %A %tmp2 = lshr <2 x i32> %tmp1, < i32 32, i32 32 > ret <2 x i32> %tmp2 } define <1 x i64> @vlshri64(<1 x i64>* %A) nounwind { +;CHECK: vlshri64: +;CHECK: vshr.u64 %tmp1 = load <1 x i64>* %A %tmp2 = lshr <1 x i64> %tmp1, < i64 64 > ret <1 x i64> %tmp2 } define <16 x i8> @vlshrQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind { +;CHECK: vlshrQu8: +;CHECK: vneg.s8 +;CHECK: vshl.u8 %tmp1 = load <16 x i8>* %A %tmp2 = load <16 x i8>* %B %tmp3 = lshr <16 x i8> %tmp1, %tmp2 @@ -188,6 +219,9 @@ define <16 x i8> @vlshrQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind { } define <8 x i16> @vlshrQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind { +;CHECK: vlshrQu16: +;CHECK: vneg.s16 +;CHECK: vshl.u16 %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i16>* %B %tmp3 = lshr <8 x i16> %tmp1, %tmp2 @@ -195,6 +229,9 @@ define <8 x i16> @vlshrQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind { } define <4 x i32> @vlshrQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind { +;CHECK: vlshrQu32: +;CHECK: vneg.s32 +;CHECK: vshl.u32 %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i32>* %B %tmp3 = lshr <4 x i32> %tmp1, %tmp2 @@ -202,6 +239,9 @@ define <4 x i32> @vlshrQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind { } define <2 x i64> @vlshrQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind { +;CHECK: vlshrQu64: +;CHECK: vsub.i64 +;CHECK: vshl.u64 %tmp1 = load <2 x i64>* %A %tmp2 = load <2 x i64>* %B %tmp3 = lshr <2 x i64> %tmp1, %tmp2 @@ -209,30 +249,41 @@ define <2 x i64> @vlshrQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind { } define <16 x i8> @vlshrQi8(<16 x i8>* %A) nounwind { +;CHECK: vlshrQi8: +;CHECK: vshr.u8 %tmp1 = load <16 x i8>* %A %tmp2 = lshr <16 x i8> %tmp1, < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 > ret <16 x i8> %tmp2 } define <8 x i16> @vlshrQi16(<8 x i16>* %A) nounwind { +;CHECK: vlshrQi16: +;CHECK: vshr.u16 %tmp1 = load <8 x i16>* %A %tmp2 = lshr <8 x i16> %tmp1, < i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16 > ret <8 x i16> %tmp2 } define <4 x i32> @vlshrQi32(<4 x i32>* %A) nounwind { +;CHECK: vlshrQi32: +;CHECK: vshr.u32 %tmp1 = load <4 x i32>* %A %tmp2 = lshr <4 x i32> %tmp1, < i32 32, i32 32, i32 32, i32 32 > ret <4 x i32> %tmp2 } define <2 x i64> @vlshrQi64(<2 x i64>* %A) nounwind { +;CHECK: vlshrQi64: +;CHECK: vshr.u64 %tmp1 = load <2 x i64>* %A %tmp2 = lshr <2 x i64> %tmp1, < i64 64, i64 64 > ret <2 x i64> %tmp2 } define <8 x i8> @vashrs8(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: vashrs8: +;CHECK: vneg.s8 +;CHECK: vshl.s8 %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B %tmp3 = ashr <8 x i8> %tmp1, %tmp2 @@ -240,6 +291,9 @@ define <8 x i8> @vashrs8(<8 x i8>* %A, <8 x i8>* %B) nounwind { } define <4 x i16> @vashrs16(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK: vashrs16: +;CHECK: vneg.s16 +;CHECK: vshl.s16 %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B %tmp3 = ashr <4 x i16> %tmp1, %tmp2 @@ -247,6 +301,9 @@ define <4 x i16> @vashrs16(<4 x i16>* %A, <4 x i16>* %B) nounwind { } define <2 x i32> @vashrs32(<2 x i32>* %A, <2 x i32>* %B) nounwind { +;CHECK: vashrs32: +;CHECK: vneg.s32 +;CHECK: vshl.s32 %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B %tmp3 = ashr <2 x i32> %tmp1, %tmp2 @@ -254,6 +311,9 @@ define <2 x i32> @vashrs32(<2 x i32>* %A, <2 x i32>* %B) nounwind { } define <1 x i64> @vashrs64(<1 x i64>* %A, <1 x i64>* %B) nounwind { +;CHECK: vashrs64: +;CHECK: vsub.i64 +;CHECK: vshl.s64 %tmp1 = load <1 x i64>* %A %tmp2 = load <1 x i64>* %B %tmp3 = ashr <1 x i64> %tmp1, %tmp2 @@ -261,30 +321,41 @@ define <1 x i64> @vashrs64(<1 x i64>* %A, <1 x i64>* %B) nounwind { } define <8 x i8> @vashri8(<8 x i8>* %A) nounwind { +;CHECK: vashri8: +;CHECK: vshr.s8 %tmp1 = load <8 x i8>* %A %tmp2 = ashr <8 x i8> %tmp1, < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 > ret <8 x i8> %tmp2 } define <4 x i16> @vashri16(<4 x i16>* %A) nounwind { +;CHECK: vashri16: +;CHECK: vshr.s16 %tmp1 = load <4 x i16>* %A %tmp2 = ashr <4 x i16> %tmp1, < i16 16, i16 16, i16 16, i16 16 > ret <4 x i16> %tmp2 } define <2 x i32> @vashri32(<2 x i32>* %A) nounwind { +;CHECK: vashri32: +;CHECK: vshr.s32 %tmp1 = load <2 x i32>* %A %tmp2 = ashr <2 x i32> %tmp1, < i32 32, i32 32 > ret <2 x i32> %tmp2 } define <1 x i64> @vashri64(<1 x i64>* %A) nounwind { +;CHECK: vashri64: +;CHECK: vshr.s64 %tmp1 = load <1 x i64>* %A %tmp2 = ashr <1 x i64> %tmp1, < i64 64 > ret <1 x i64> %tmp2 } define <16 x i8> @vashrQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind { +;CHECK: vashrQs8: +;CHECK: vneg.s8 +;CHECK: vshl.s8 %tmp1 = load <16 x i8>* %A %tmp2 = load <16 x i8>* %B %tmp3 = ashr <16 x i8> %tmp1, %tmp2 @@ -292,6 +363,9 @@ define <16 x i8> @vashrQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind { } define <8 x i16> @vashrQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind { +;CHECK: vashrQs16: +;CHECK: vneg.s16 +;CHECK: vshl.s16 %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i16>* %B %tmp3 = ashr <8 x i16> %tmp1, %tmp2 @@ -299,6 +373,9 @@ define <8 x i16> @vashrQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind { } define <4 x i32> @vashrQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind { +;CHECK: vashrQs32: +;CHECK: vneg.s32 +;CHECK: vshl.s32 %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i32>* %B %tmp3 = ashr <4 x i32> %tmp1, %tmp2 @@ -306,6 +383,9 @@ define <4 x i32> @vashrQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind { } define <2 x i64> @vashrQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind { +;CHECK: vashrQs64: +;CHECK: vsub.i64 +;CHECK: vshl.s64 %tmp1 = load <2 x i64>* %A %tmp2 = load <2 x i64>* %B %tmp3 = ashr <2 x i64> %tmp1, %tmp2 @@ -313,24 +393,32 @@ define <2 x i64> @vashrQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind { } define <16 x i8> @vashrQi8(<16 x i8>* %A) nounwind { +;CHECK: vashrQi8: +;CHECK: vshr.s8 %tmp1 = load <16 x i8>* %A %tmp2 = ashr <16 x i8> %tmp1, < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 > ret <16 x i8> %tmp2 } define <8 x i16> @vashrQi16(<8 x i16>* %A) nounwind { +;CHECK: vashrQi16: +;CHECK: vshr.s16 %tmp1 = load <8 x i16>* %A %tmp2 = ashr <8 x i16> %tmp1, < i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16 > ret <8 x i16> %tmp2 } define <4 x i32> @vashrQi32(<4 x i32>* %A) nounwind { +;CHECK: vashrQi32: +;CHECK: vshr.s32 %tmp1 = load <4 x i32>* %A %tmp2 = ashr <4 x i32> %tmp1, < i32 32, i32 32, i32 32, i32 32 > ret <4 x i32> %tmp2 } define <2 x i64> @vashrQi64(<2 x i64>* %A) nounwind { +;CHECK: vashrQi64: +;CHECK: vshr.s64 %tmp1 = load <2 x i64>* %A %tmp2 = ashr <2 x i64> %tmp1, < i64 64, i64 64 > ret <2 x i64> %tmp2 diff --git a/test/CodeGen/ARM/vshiftins.ll b/test/CodeGen/ARM/vshiftins.ll index 251efdc167e..3a4f8574e39 100644 --- a/test/CodeGen/ARM/vshiftins.ll +++ b/test/CodeGen/ARM/vshiftins.ll @@ -1,14 +1,8 @@ -; RUN: llc < %s -march=arm -mattr=+neon > %t -; RUN: grep {vsli\\.8} %t | count 2 -; RUN: grep {vsli\\.16} %t | count 2 -; RUN: grep {vsli\\.32} %t | count 2 -; RUN: grep {vsli\\.64} %t | count 2 -; RUN: grep {vsri\\.8} %t | count 2 -; RUN: grep {vsri\\.16} %t | count 2 -; RUN: grep {vsri\\.32} %t | count 2 -; RUN: grep {vsri\\.64} %t | count 2 +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s define <8 x i8> @vsli8(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: vsli8: +;CHECK: vsli.8 %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B %tmp3 = call <8 x i8> @llvm.arm.neon.vshiftins.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >) @@ -16,6 +10,8 @@ define <8 x i8> @vsli8(<8 x i8>* %A, <8 x i8>* %B) nounwind { } define <4 x i16> @vsli16(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK: vsli16: +;CHECK: vsli.16 %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B %tmp3 = call <4 x i16> @llvm.arm.neon.vshiftins.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i16> < i16 15, i16 15, i16 15, i16 15 >) @@ -23,6 +19,8 @@ define <4 x i16> @vsli16(<4 x i16>* %A, <4 x i16>* %B) nounwind { } define <2 x i32> @vsli32(<2 x i32>* %A, <2 x i32>* %B) nounwind { +;CHECK: vsli32: +;CHECK: vsli.32 %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B %tmp3 = call <2 x i32> @llvm.arm.neon.vshiftins.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2, <2 x i32> < i32 31, i32 31 >) @@ -30,6 +28,8 @@ define <2 x i32> @vsli32(<2 x i32>* %A, <2 x i32>* %B) nounwind { } define <1 x i64> @vsli64(<1 x i64>* %A, <1 x i64>* %B) nounwind { +;CHECK: vsli64: +;CHECK: vsli.64 %tmp1 = load <1 x i64>* %A %tmp2 = load <1 x i64>* %B %tmp3 = call <1 x i64> @llvm.arm.neon.vshiftins.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2, <1 x i64> < i64 63 >) @@ -37,6 +37,8 @@ define <1 x i64> @vsli64(<1 x i64>* %A, <1 x i64>* %B) nounwind { } define <16 x i8> @vsliQ8(<16 x i8>* %A, <16 x i8>* %B) nounwind { +;CHECK: vsliQ8: +;CHECK: vsli.8 %tmp1 = load <16 x i8>* %A %tmp2 = load <16 x i8>* %B %tmp3 = call <16 x i8> @llvm.arm.neon.vshiftins.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >) @@ -44,6 +46,8 @@ define <16 x i8> @vsliQ8(<16 x i8>* %A, <16 x i8>* %B) nounwind { } define <8 x i16> @vsliQ16(<8 x i16>* %A, <8 x i16>* %B) nounwind { +;CHECK: vsliQ16: +;CHECK: vsli.16 %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i16>* %B %tmp3 = call <8 x i16> @llvm.arm.neon.vshiftins.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i16> < i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15 >) @@ -51,6 +55,8 @@ define <8 x i16> @vsliQ16(<8 x i16>* %A, <8 x i16>* %B) nounwind { } define <4 x i32> @vsliQ32(<4 x i32>* %A, <4 x i32>* %B) nounwind { +;CHECK: vsliQ32: +;CHECK: vsli.32 %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i32>* %B %tmp3 = call <4 x i32> @llvm.arm.neon.vshiftins.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> < i32 31, i32 31, i32 31, i32 31 >) @@ -58,6 +64,8 @@ define <4 x i32> @vsliQ32(<4 x i32>* %A, <4 x i32>* %B) nounwind { } define <2 x i64> @vsliQ64(<2 x i64>* %A, <2 x i64>* %B) nounwind { +;CHECK: vsliQ64: +;CHECK: vsli.64 %tmp1 = load <2 x i64>* %A %tmp2 = load <2 x i64>* %B %tmp3 = call <2 x i64> @llvm.arm.neon.vshiftins.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2, <2 x i64> < i64 63, i64 63 >) @@ -65,6 +73,8 @@ define <2 x i64> @vsliQ64(<2 x i64>* %A, <2 x i64>* %B) nounwind { } define <8 x i8> @vsri8(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: vsri8: +;CHECK: vsri.8 %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B %tmp3 = call <8 x i8> @llvm.arm.neon.vshiftins.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >) @@ -72,6 +82,8 @@ define <8 x i8> @vsri8(<8 x i8>* %A, <8 x i8>* %B) nounwind { } define <4 x i16> @vsri16(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK: vsri16: +;CHECK: vsri.16 %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B %tmp3 = call <4 x i16> @llvm.arm.neon.vshiftins.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i16> < i16 -16, i16 -16, i16 -16, i16 -16 >) @@ -79,6 +91,8 @@ define <4 x i16> @vsri16(<4 x i16>* %A, <4 x i16>* %B) nounwind { } define <2 x i32> @vsri32(<2 x i32>* %A, <2 x i32>* %B) nounwind { +;CHECK: vsri32: +;CHECK: vsri.32 %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B %tmp3 = call <2 x i32> @llvm.arm.neon.vshiftins.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2, <2 x i32> < i32 -32, i32 -32 >) @@ -86,6 +100,8 @@ define <2 x i32> @vsri32(<2 x i32>* %A, <2 x i32>* %B) nounwind { } define <1 x i64> @vsri64(<1 x i64>* %A, <1 x i64>* %B) nounwind { +;CHECK: vsri64: +;CHECK: vsri.64 %tmp1 = load <1 x i64>* %A %tmp2 = load <1 x i64>* %B %tmp3 = call <1 x i64> @llvm.arm.neon.vshiftins.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2, <1 x i64> < i64 -64 >) @@ -93,6 +109,8 @@ define <1 x i64> @vsri64(<1 x i64>* %A, <1 x i64>* %B) nounwind { } define <16 x i8> @vsriQ8(<16 x i8>* %A, <16 x i8>* %B) nounwind { +;CHECK: vsriQ8: +;CHECK: vsri.8 %tmp1 = load <16 x i8>* %A %tmp2 = load <16 x i8>* %B %tmp3 = call <16 x i8> @llvm.arm.neon.vshiftins.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >) @@ -100,6 +118,8 @@ define <16 x i8> @vsriQ8(<16 x i8>* %A, <16 x i8>* %B) nounwind { } define <8 x i16> @vsriQ16(<8 x i16>* %A, <8 x i16>* %B) nounwind { +;CHECK: vsriQ16: +;CHECK: vsri.16 %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i16>* %B %tmp3 = call <8 x i16> @llvm.arm.neon.vshiftins.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i16> < i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16 >) @@ -107,6 +127,8 @@ define <8 x i16> @vsriQ16(<8 x i16>* %A, <8 x i16>* %B) nounwind { } define <4 x i32> @vsriQ32(<4 x i32>* %A, <4 x i32>* %B) nounwind { +;CHECK: vsriQ32: +;CHECK: vsri.32 %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i32>* %B %tmp3 = call <4 x i32> @llvm.arm.neon.vshiftins.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> < i32 -32, i32 -32, i32 -32, i32 -32 >) @@ -114,6 +136,8 @@ define <4 x i32> @vsriQ32(<4 x i32>* %A, <4 x i32>* %B) nounwind { } define <2 x i64> @vsriQ64(<2 x i64>* %A, <2 x i64>* %B) nounwind { +;CHECK: vsriQ64: +;CHECK: vsri.64 %tmp1 = load <2 x i64>* %A %tmp2 = load <2 x i64>* %B %tmp3 = call <2 x i64> @llvm.arm.neon.vshiftins.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2, <2 x i64> < i64 -64, i64 -64 >) diff --git a/test/CodeGen/ARM/vshl.ll b/test/CodeGen/ARM/vshl.ll index 773b184a173..b0d0315d582 100644 --- a/test/CodeGen/ARM/vshl.ll +++ b/test/CodeGen/ARM/vshl.ll @@ -1,26 +1,8 @@ -; RUN: llc < %s -march=arm -mattr=+neon > %t -; RUN: grep {vshl\\.s8} %t | count 2 -; RUN: grep {vshl\\.s16} %t | count 2 -; RUN: grep {vshl\\.s32} %t | count 2 -; RUN: grep {vshl\\.s64} %t | count 2 -; RUN: grep {vshl\\.u8} %t | count 2 -; RUN: grep {vshl\\.u16} %t | count 2 -; RUN: grep {vshl\\.u32} %t | count 2 -; RUN: grep {vshl\\.u64} %t | count 2 -; RUN: grep {vshl\\.i8} %t | count 2 -; RUN: grep {vshl\\.i16} %t | count 2 -; RUN: grep {vshl\\.i32} %t | count 2 -; RUN: grep {vshl\\.i64} %t | count 2 -; RUN: grep {vshr\\.s8} %t | count 2 -; RUN: grep {vshr\\.s16} %t | count 2 -; RUN: grep {vshr\\.s32} %t | count 2 -; RUN: grep {vshr\\.s64} %t | count 2 -; RUN: grep {vshr\\.u8} %t | count 2 -; RUN: grep {vshr\\.u16} %t | count 2 -; RUN: grep {vshr\\.u32} %t | count 2 -; RUN: grep {vshr\\.u64} %t | count 2 +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s define <8 x i8> @vshls8(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: vshls8: +;CHECK: vshl.s8 %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B %tmp3 = call <8 x i8> @llvm.arm.neon.vshifts.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) @@ -28,6 +10,8 @@ define <8 x i8> @vshls8(<8 x i8>* %A, <8 x i8>* %B) nounwind { } define <4 x i16> @vshls16(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK: vshls16: +;CHECK: vshl.s16 %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B %tmp3 = call <4 x i16> @llvm.arm.neon.vshifts.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) @@ -35,6 +19,8 @@ define <4 x i16> @vshls16(<4 x i16>* %A, <4 x i16>* %B) nounwind { } define <2 x i32> @vshls32(<2 x i32>* %A, <2 x i32>* %B) nounwind { +;CHECK: vshls32: +;CHECK: vshl.s32 %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B %tmp3 = call <2 x i32> @llvm.arm.neon.vshifts.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) @@ -42,6 +28,8 @@ define <2 x i32> @vshls32(<2 x i32>* %A, <2 x i32>* %B) nounwind { } define <1 x i64> @vshls64(<1 x i64>* %A, <1 x i64>* %B) nounwind { +;CHECK: vshls64: +;CHECK: vshl.s64 %tmp1 = load <1 x i64>* %A %tmp2 = load <1 x i64>* %B %tmp3 = call <1 x i64> @llvm.arm.neon.vshifts.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2) @@ -49,6 +37,8 @@ define <1 x i64> @vshls64(<1 x i64>* %A, <1 x i64>* %B) nounwind { } define <8 x i8> @vshlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: vshlu8: +;CHECK: vshl.u8 %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B %tmp3 = call <8 x i8> @llvm.arm.neon.vshiftu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) @@ -56,6 +46,8 @@ define <8 x i8> @vshlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind { } define <4 x i16> @vshlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK: vshlu16: +;CHECK: vshl.u16 %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B %tmp3 = call <4 x i16> @llvm.arm.neon.vshiftu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) @@ -63,6 +55,8 @@ define <4 x i16> @vshlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind { } define <2 x i32> @vshlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind { +;CHECK: vshlu32: +;CHECK: vshl.u32 %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B %tmp3 = call <2 x i32> @llvm.arm.neon.vshiftu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) @@ -70,6 +64,8 @@ define <2 x i32> @vshlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind { } define <1 x i64> @vshlu64(<1 x i64>* %A, <1 x i64>* %B) nounwind { +;CHECK: vshlu64: +;CHECK: vshl.u64 %tmp1 = load <1 x i64>* %A %tmp2 = load <1 x i64>* %B %tmp3 = call <1 x i64> @llvm.arm.neon.vshiftu.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2) @@ -77,6 +73,8 @@ define <1 x i64> @vshlu64(<1 x i64>* %A, <1 x i64>* %B) nounwind { } define <16 x i8> @vshlQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind { +;CHECK: vshlQs8: +;CHECK: vshl.s8 %tmp1 = load <16 x i8>* %A %tmp2 = load <16 x i8>* %B %tmp3 = call <16 x i8> @llvm.arm.neon.vshifts.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) @@ -84,6 +82,8 @@ define <16 x i8> @vshlQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind { } define <8 x i16> @vshlQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind { +;CHECK: vshlQs16: +;CHECK: vshl.s16 %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i16>* %B %tmp3 = call <8 x i16> @llvm.arm.neon.vshifts.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) @@ -91,6 +91,8 @@ define <8 x i16> @vshlQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind { } define <4 x i32> @vshlQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind { +;CHECK: vshlQs32: +;CHECK: vshl.s32 %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i32>* %B %tmp3 = call <4 x i32> @llvm.arm.neon.vshifts.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) @@ -98,6 +100,8 @@ define <4 x i32> @vshlQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind { } define <2 x i64> @vshlQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind { +;CHECK: vshlQs64: +;CHECK: vshl.s64 %tmp1 = load <2 x i64>* %A %tmp2 = load <2 x i64>* %B %tmp3 = call <2 x i64> @llvm.arm.neon.vshifts.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2) @@ -105,6 +109,8 @@ define <2 x i64> @vshlQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind { } define <16 x i8> @vshlQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind { +;CHECK: vshlQu8: +;CHECK: vshl.u8 %tmp1 = load <16 x i8>* %A %tmp2 = load <16 x i8>* %B %tmp3 = call <16 x i8> @llvm.arm.neon.vshiftu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) @@ -112,6 +118,8 @@ define <16 x i8> @vshlQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind { } define <8 x i16> @vshlQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind { +;CHECK: vshlQu16: +;CHECK: vshl.u16 %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i16>* %B %tmp3 = call <8 x i16> @llvm.arm.neon.vshiftu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) @@ -119,6 +127,8 @@ define <8 x i16> @vshlQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind { } define <4 x i32> @vshlQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind { +;CHECK: vshlQu32: +;CHECK: vshl.u32 %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i32>* %B %tmp3 = call <4 x i32> @llvm.arm.neon.vshiftu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) @@ -126,6 +136,8 @@ define <4 x i32> @vshlQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind { } define <2 x i64> @vshlQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind { +;CHECK: vshlQu64: +;CHECK: vshl.u64 %tmp1 = load <2 x i64>* %A %tmp2 = load <2 x i64>* %B %tmp3 = call <2 x i64> @llvm.arm.neon.vshiftu.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2) @@ -136,48 +148,64 @@ define <2 x i64> @vshlQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind { ; Test a mix of both signed and unsigned intrinsics. define <8 x i8> @vshli8(<8 x i8>* %A) nounwind { +;CHECK: vshli8: +;CHECK: vshl.i8 %tmp1 = load <8 x i8>* %A %tmp2 = call <8 x i8> @llvm.arm.neon.vshifts.v8i8(<8 x i8> %tmp1, <8 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >) ret <8 x i8> %tmp2 } define <4 x i16> @vshli16(<4 x i16>* %A) nounwind { +;CHECK: vshli16: +;CHECK: vshl.i16 %tmp1 = load <4 x i16>* %A %tmp2 = call <4 x i16> @llvm.arm.neon.vshiftu.v4i16(<4 x i16> %tmp1, <4 x i16> < i16 15, i16 15, i16 15, i16 15 >) ret <4 x i16> %tmp2 } define <2 x i32> @vshli32(<2 x i32>* %A) nounwind { +;CHECK: vshli32: +;CHECK: vshl.i32 %tmp1 = load <2 x i32>* %A %tmp2 = call <2 x i32> @llvm.arm.neon.vshifts.v2i32(<2 x i32> %tmp1, <2 x i32> < i32 31, i32 31 >) ret <2 x i32> %tmp2 } define <1 x i64> @vshli64(<1 x i64>* %A) nounwind { +;CHECK: vshli64: +;CHECK: vshl.i64 %tmp1 = load <1 x i64>* %A %tmp2 = call <1 x i64> @llvm.arm.neon.vshiftu.v1i64(<1 x i64> %tmp1, <1 x i64> < i64 63 >) ret <1 x i64> %tmp2 } define <16 x i8> @vshlQi8(<16 x i8>* %A) nounwind { +;CHECK: vshlQi8: +;CHECK: vshl.i8 %tmp1 = load <16 x i8>* %A %tmp2 = call <16 x i8> @llvm.arm.neon.vshifts.v16i8(<16 x i8> %tmp1, <16 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >) ret <16 x i8> %tmp2 } define <8 x i16> @vshlQi16(<8 x i16>* %A) nounwind { +;CHECK: vshlQi16: +;CHECK: vshl.i16 %tmp1 = load <8 x i16>* %A %tmp2 = call <8 x i16> @llvm.arm.neon.vshiftu.v8i16(<8 x i16> %tmp1, <8 x i16> < i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15 >) ret <8 x i16> %tmp2 } define <4 x i32> @vshlQi32(<4 x i32>* %A) nounwind { +;CHECK: vshlQi32: +;CHECK: vshl.i32 %tmp1 = load <4 x i32>* %A %tmp2 = call <4 x i32> @llvm.arm.neon.vshifts.v4i32(<4 x i32> %tmp1, <4 x i32> < i32 31, i32 31, i32 31, i32 31 >) ret <4 x i32> %tmp2 } define <2 x i64> @vshlQi64(<2 x i64>* %A) nounwind { +;CHECK: vshlQi64: +;CHECK: vshl.i64 %tmp1 = load <2 x i64>* %A %tmp2 = call <2 x i64> @llvm.arm.neon.vshiftu.v2i64(<2 x i64> %tmp1, <2 x i64> < i64 63, i64 63 >) ret <2 x i64> %tmp2 @@ -186,96 +214,128 @@ define <2 x i64> @vshlQi64(<2 x i64>* %A) nounwind { ; Right shift by immediate: define <8 x i8> @vshrs8(<8 x i8>* %A) nounwind { +;CHECK: vshrs8: +;CHECK: vshr.s8 %tmp1 = load <8 x i8>* %A %tmp2 = call <8 x i8> @llvm.arm.neon.vshifts.v8i8(<8 x i8> %tmp1, <8 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >) ret <8 x i8> %tmp2 } define <4 x i16> @vshrs16(<4 x i16>* %A) nounwind { +;CHECK: vshrs16: +;CHECK: vshr.s16 %tmp1 = load <4 x i16>* %A %tmp2 = call <4 x i16> @llvm.arm.neon.vshifts.v4i16(<4 x i16> %tmp1, <4 x i16> < i16 -16, i16 -16, i16 -16, i16 -16 >) ret <4 x i16> %tmp2 } define <2 x i32> @vshrs32(<2 x i32>* %A) nounwind { +;CHECK: vshrs32: +;CHECK: vshr.s32 %tmp1 = load <2 x i32>* %A %tmp2 = call <2 x i32> @llvm.arm.neon.vshifts.v2i32(<2 x i32> %tmp1, <2 x i32> < i32 -32, i32 -32 >) ret <2 x i32> %tmp2 } define <1 x i64> @vshrs64(<1 x i64>* %A) nounwind { +;CHECK: vshrs64: +;CHECK: vshr.s64 %tmp1 = load <1 x i64>* %A %tmp2 = call <1 x i64> @llvm.arm.neon.vshifts.v1i64(<1 x i64> %tmp1, <1 x i64> < i64 -64 >) ret <1 x i64> %tmp2 } define <8 x i8> @vshru8(<8 x i8>* %A) nounwind { +;CHECK: vshru8: +;CHECK: vshr.u8 %tmp1 = load <8 x i8>* %A %tmp2 = call <8 x i8> @llvm.arm.neon.vshiftu.v8i8(<8 x i8> %tmp1, <8 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >) ret <8 x i8> %tmp2 } define <4 x i16> @vshru16(<4 x i16>* %A) nounwind { +;CHECK: vshru16: +;CHECK: vshr.u16 %tmp1 = load <4 x i16>* %A %tmp2 = call <4 x i16> @llvm.arm.neon.vshiftu.v4i16(<4 x i16> %tmp1, <4 x i16> < i16 -16, i16 -16, i16 -16, i16 -16 >) ret <4 x i16> %tmp2 } define <2 x i32> @vshru32(<2 x i32>* %A) nounwind { +;CHECK: vshru32: +;CHECK: vshr.u32 %tmp1 = load <2 x i32>* %A %tmp2 = call <2 x i32> @llvm.arm.neon.vshiftu.v2i32(<2 x i32> %tmp1, <2 x i32> < i32 -32, i32 -32 >) ret <2 x i32> %tmp2 } define <1 x i64> @vshru64(<1 x i64>* %A) nounwind { +;CHECK: vshru64: +;CHECK: vshr.u64 %tmp1 = load <1 x i64>* %A %tmp2 = call <1 x i64> @llvm.arm.neon.vshiftu.v1i64(<1 x i64> %tmp1, <1 x i64> < i64 -64 >) ret <1 x i64> %tmp2 } define <16 x i8> @vshrQs8(<16 x i8>* %A) nounwind { +;CHECK: vshrQs8: +;CHECK: vshr.s8 %tmp1 = load <16 x i8>* %A %tmp2 = call <16 x i8> @llvm.arm.neon.vshifts.v16i8(<16 x i8> %tmp1, <16 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >) ret <16 x i8> %tmp2 } define <8 x i16> @vshrQs16(<8 x i16>* %A) nounwind { +;CHECK: vshrQs16: +;CHECK: vshr.s16 %tmp1 = load <8 x i16>* %A %tmp2 = call <8 x i16> @llvm.arm.neon.vshifts.v8i16(<8 x i16> %tmp1, <8 x i16> < i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16 >) ret <8 x i16> %tmp2 } define <4 x i32> @vshrQs32(<4 x i32>* %A) nounwind { +;CHECK: vshrQs32: +;CHECK: vshr.s32 %tmp1 = load <4 x i32>* %A %tmp2 = call <4 x i32> @llvm.arm.neon.vshifts.v4i32(<4 x i32> %tmp1, <4 x i32> < i32 -32, i32 -32, i32 -32, i32 -32 >) ret <4 x i32> %tmp2 } define <2 x i64> @vshrQs64(<2 x i64>* %A) nounwind { +;CHECK: vshrQs64: +;CHECK: vshr.s64 %tmp1 = load <2 x i64>* %A %tmp2 = call <2 x i64> @llvm.arm.neon.vshifts.v2i64(<2 x i64> %tmp1, <2 x i64> < i64 -64, i64 -64 >) ret <2 x i64> %tmp2 } define <16 x i8> @vshrQu8(<16 x i8>* %A) nounwind { +;CHECK: vshrQu8: +;CHECK: vshr.u8 %tmp1 = load <16 x i8>* %A %tmp2 = call <16 x i8> @llvm.arm.neon.vshiftu.v16i8(<16 x i8> %tmp1, <16 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >) ret <16 x i8> %tmp2 } define <8 x i16> @vshrQu16(<8 x i16>* %A) nounwind { +;CHECK: vshrQu16: +;CHECK: vshr.u16 %tmp1 = load <8 x i16>* %A %tmp2 = call <8 x i16> @llvm.arm.neon.vshiftu.v8i16(<8 x i16> %tmp1, <8 x i16> < i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16 >) ret <8 x i16> %tmp2 } define <4 x i32> @vshrQu32(<4 x i32>* %A) nounwind { +;CHECK: vshrQu32: +;CHECK: vshr.u32 %tmp1 = load <4 x i32>* %A %tmp2 = call <4 x i32> @llvm.arm.neon.vshiftu.v4i32(<4 x i32> %tmp1, <4 x i32> < i32 -32, i32 -32, i32 -32, i32 -32 >) ret <4 x i32> %tmp2 } define <2 x i64> @vshrQu64(<2 x i64>* %A) nounwind { +;CHECK: vshrQu64: +;CHECK: vshr.u64 %tmp1 = load <2 x i64>* %A %tmp2 = call <2 x i64> @llvm.arm.neon.vshiftu.v2i64(<2 x i64> %tmp1, <2 x i64> < i64 -64, i64 -64 >) ret <2 x i64> %tmp2 diff --git a/test/CodeGen/ARM/vshll.ll b/test/CodeGen/ARM/vshll.ll index 5407662ef0e..8e85b98f49b 100644 --- a/test/CodeGen/ARM/vshll.ll +++ b/test/CodeGen/ARM/vshll.ll @@ -1,45 +1,48 @@ -; RUN: llc < %s -march=arm -mattr=+neon > %t -; RUN: grep {vshll\\.s8} %t | count 1 -; RUN: grep {vshll\\.s16} %t | count 1 -; RUN: grep {vshll\\.s32} %t | count 1 -; RUN: grep {vshll\\.u8} %t | count 1 -; RUN: grep {vshll\\.u16} %t | count 1 -; RUN: grep {vshll\\.u32} %t | count 1 -; RUN: grep {vshll\\.i8} %t | count 1 -; RUN: grep {vshll\\.i16} %t | count 1 -; RUN: grep {vshll\\.i32} %t | count 1 +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s define <8 x i16> @vshlls8(<8 x i8>* %A) nounwind { +;CHECK: vshlls8: +;CHECK: vshll.s8 %tmp1 = load <8 x i8>* %A %tmp2 = call <8 x i16> @llvm.arm.neon.vshiftls.v8i16(<8 x i8> %tmp1, <8 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >) ret <8 x i16> %tmp2 } define <4 x i32> @vshlls16(<4 x i16>* %A) nounwind { +;CHECK: vshlls16: +;CHECK: vshll.s16 %tmp1 = load <4 x i16>* %A %tmp2 = call <4 x i32> @llvm.arm.neon.vshiftls.v4i32(<4 x i16> %tmp1, <4 x i16> < i16 15, i16 15, i16 15, i16 15 >) ret <4 x i32> %tmp2 } define <2 x i64> @vshlls32(<2 x i32>* %A) nounwind { +;CHECK: vshlls32: +;CHECK: vshll.s32 %tmp1 = load <2 x i32>* %A %tmp2 = call <2 x i64> @llvm.arm.neon.vshiftls.v2i64(<2 x i32> %tmp1, <2 x i32> < i32 31, i32 31 >) ret <2 x i64> %tmp2 } define <8 x i16> @vshllu8(<8 x i8>* %A) nounwind { +;CHECK: vshllu8: +;CHECK: vshll.u8 %tmp1 = load <8 x i8>* %A %tmp2 = call <8 x i16> @llvm.arm.neon.vshiftlu.v8i16(<8 x i8> %tmp1, <8 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >) ret <8 x i16> %tmp2 } define <4 x i32> @vshllu16(<4 x i16>* %A) nounwind { +;CHECK: vshllu16: +;CHECK: vshll.u16 %tmp1 = load <4 x i16>* %A %tmp2 = call <4 x i32> @llvm.arm.neon.vshiftlu.v4i32(<4 x i16> %tmp1, <4 x i16> < i16 15, i16 15, i16 15, i16 15 >) ret <4 x i32> %tmp2 } define <2 x i64> @vshllu32(<2 x i32>* %A) nounwind { +;CHECK: vshllu32: +;CHECK: vshll.u32 %tmp1 = load <2 x i32>* %A %tmp2 = call <2 x i64> @llvm.arm.neon.vshiftlu.v2i64(<2 x i32> %tmp1, <2 x i32> < i32 31, i32 31 >) ret <2 x i64> %tmp2 @@ -48,18 +51,24 @@ define <2 x i64> @vshllu32(<2 x i32>* %A) nounwind { ; The following tests use the maximum shift count, so the signedness is ; irrelevant. Test both signed and unsigned versions. define <8 x i16> @vshlli8(<8 x i8>* %A) nounwind { +;CHECK: vshlli8: +;CHECK: vshll.i8 %tmp1 = load <8 x i8>* %A %tmp2 = call <8 x i16> @llvm.arm.neon.vshiftls.v8i16(<8 x i8> %tmp1, <8 x i8> < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 >) ret <8 x i16> %tmp2 } define <4 x i32> @vshlli16(<4 x i16>* %A) nounwind { +;CHECK: vshlli16: +;CHECK: vshll.i16 %tmp1 = load <4 x i16>* %A %tmp2 = call <4 x i32> @llvm.arm.neon.vshiftlu.v4i32(<4 x i16> %tmp1, <4 x i16> < i16 16, i16 16, i16 16, i16 16 >) ret <4 x i32> %tmp2 } define <2 x i64> @vshlli32(<2 x i32>* %A) nounwind { +;CHECK: vshlli32: +;CHECK: vshll.i32 %tmp1 = load <2 x i32>* %A %tmp2 = call <2 x i64> @llvm.arm.neon.vshiftls.v2i64(<2 x i32> %tmp1, <2 x i32> < i32 32, i32 32 >) ret <2 x i64> %tmp2 diff --git a/test/CodeGen/ARM/vshrn.ll b/test/CodeGen/ARM/vshrn.ll index 26834e7b26f..01324fa8f7a 100644 --- a/test/CodeGen/ARM/vshrn.ll +++ b/test/CodeGen/ARM/vshrn.ll @@ -1,21 +1,24 @@ -; RUN: llc < %s -march=arm -mattr=+neon > %t -; RUN: grep {vshrn\\.i16} %t | count 1 -; RUN: grep {vshrn\\.i32} %t | count 1 -; RUN: grep {vshrn\\.i64} %t | count 1 +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s define <8 x i8> @vshrns8(<8 x i16>* %A) nounwind { +;CHECK: vshrns8: +;CHECK: vshrn.i16 %tmp1 = load <8 x i16>* %A %tmp2 = call <8 x i8> @llvm.arm.neon.vshiftn.v8i8(<8 x i16> %tmp1, <8 x i16> < i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8 >) ret <8 x i8> %tmp2 } define <4 x i16> @vshrns16(<4 x i32>* %A) nounwind { +;CHECK: vshrns16: +;CHECK: vshrn.i32 %tmp1 = load <4 x i32>* %A %tmp2 = call <4 x i16> @llvm.arm.neon.vshiftn.v4i16(<4 x i32> %tmp1, <4 x i32> < i32 -16, i32 -16, i32 -16, i32 -16 >) ret <4 x i16> %tmp2 } define <2 x i32> @vshrns32(<2 x i64>* %A) nounwind { +;CHECK: vshrns32: +;CHECK: vshrn.i64 %tmp1 = load <2 x i64>* %A %tmp2 = call <2 x i32> @llvm.arm.neon.vshiftn.v2i32(<2 x i64> %tmp1, <2 x i64> < i64 -32, i64 -32 >) ret <2 x i32> %tmp2 diff --git a/test/CodeGen/ARM/vsra.ll b/test/CodeGen/ARM/vsra.ll index 10cefc24a21..acb672d00fa 100644 --- a/test/CodeGen/ARM/vsra.ll +++ b/test/CodeGen/ARM/vsra.ll @@ -1,22 +1,8 @@ -; RUN: llc < %s -march=arm -mattr=+neon > %t -; RUN: grep {vsra\\.s8} %t | count 2 -; RUN: grep {vsra\\.s16} %t | count 2 -; RUN: grep {vsra\\.s32} %t | count 2 -; RUN: grep {vsra\\.s64} %t | count 2 -; RUN: grep {vsra\\.u8} %t | count 2 -; RUN: grep {vsra\\.u16} %t | count 2 -; RUN: grep {vsra\\.u32} %t | count 2 -; RUN: grep {vsra\\.u64} %t | count 2 -; RUN: grep {vrsra\\.s8} %t | count 2 -; RUN: grep {vrsra\\.s16} %t | count 2 -; RUN: grep {vrsra\\.s32} %t | count 2 -; RUN: grep {vrsra\\.s64} %t | count 2 -; RUN: grep {vrsra\\.u8} %t | count 2 -; RUN: grep {vrsra\\.u16} %t | count 2 -; RUN: grep {vrsra\\.u32} %t | count 2 -; RUN: grep {vrsra\\.u64} %t | count 2 +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s define <8 x i8> @vsras8(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: vsras8: +;CHECK: vsra.s8 %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B %tmp3 = ashr <8 x i8> %tmp2, < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 > @@ -25,6 +11,8 @@ define <8 x i8> @vsras8(<8 x i8>* %A, <8 x i8>* %B) nounwind { } define <4 x i16> @vsras16(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK: vsras16: +;CHECK: vsra.s16 %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B %tmp3 = ashr <4 x i16> %tmp2, < i16 16, i16 16, i16 16, i16 16 > @@ -33,6 +21,8 @@ define <4 x i16> @vsras16(<4 x i16>* %A, <4 x i16>* %B) nounwind { } define <2 x i32> @vsras32(<2 x i32>* %A, <2 x i32>* %B) nounwind { +;CHECK: vsras32: +;CHECK: vsra.s32 %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B %tmp3 = ashr <2 x i32> %tmp2, < i32 32, i32 32 > @@ -41,6 +31,8 @@ define <2 x i32> @vsras32(<2 x i32>* %A, <2 x i32>* %B) nounwind { } define <1 x i64> @vsras64(<1 x i64>* %A, <1 x i64>* %B) nounwind { +;CHECK: vsras64: +;CHECK: vsra.s64 %tmp1 = load <1 x i64>* %A %tmp2 = load <1 x i64>* %B %tmp3 = ashr <1 x i64> %tmp2, < i64 64 > @@ -49,6 +41,8 @@ define <1 x i64> @vsras64(<1 x i64>* %A, <1 x i64>* %B) nounwind { } define <16 x i8> @vsraQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind { +;CHECK: vsraQs8: +;CHECK: vsra.s8 %tmp1 = load <16 x i8>* %A %tmp2 = load <16 x i8>* %B %tmp3 = ashr <16 x i8> %tmp2, < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 > @@ -57,6 +51,8 @@ define <16 x i8> @vsraQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind { } define <8 x i16> @vsraQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind { +;CHECK: vsraQs16: +;CHECK: vsra.s16 %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i16>* %B %tmp3 = ashr <8 x i16> %tmp2, < i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16 > @@ -65,6 +61,8 @@ define <8 x i16> @vsraQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind { } define <4 x i32> @vsraQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind { +;CHECK: vsraQs32: +;CHECK: vsra.s32 %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i32>* %B %tmp3 = ashr <4 x i32> %tmp2, < i32 32, i32 32, i32 32, i32 32 > @@ -73,6 +71,8 @@ define <4 x i32> @vsraQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind { } define <2 x i64> @vsraQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind { +;CHECK: vsraQs64: +;CHECK: vsra.s64 %tmp1 = load <2 x i64>* %A %tmp2 = load <2 x i64>* %B %tmp3 = ashr <2 x i64> %tmp2, < i64 64, i64 64 > @@ -81,6 +81,8 @@ define <2 x i64> @vsraQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind { } define <8 x i8> @vsrau8(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: vsrau8: +;CHECK: vsra.u8 %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B %tmp3 = lshr <8 x i8> %tmp2, < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 > @@ -89,6 +91,8 @@ define <8 x i8> @vsrau8(<8 x i8>* %A, <8 x i8>* %B) nounwind { } define <4 x i16> @vsrau16(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK: vsrau16: +;CHECK: vsra.u16 %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B %tmp3 = lshr <4 x i16> %tmp2, < i16 16, i16 16, i16 16, i16 16 > @@ -97,6 +101,8 @@ define <4 x i16> @vsrau16(<4 x i16>* %A, <4 x i16>* %B) nounwind { } define <2 x i32> @vsrau32(<2 x i32>* %A, <2 x i32>* %B) nounwind { +;CHECK: vsrau32: +;CHECK: vsra.u32 %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B %tmp3 = lshr <2 x i32> %tmp2, < i32 32, i32 32 > @@ -105,6 +111,8 @@ define <2 x i32> @vsrau32(<2 x i32>* %A, <2 x i32>* %B) nounwind { } define <1 x i64> @vsrau64(<1 x i64>* %A, <1 x i64>* %B) nounwind { +;CHECK: vsrau64: +;CHECK: vsra.u64 %tmp1 = load <1 x i64>* %A %tmp2 = load <1 x i64>* %B %tmp3 = lshr <1 x i64> %tmp2, < i64 64 > @@ -113,6 +121,8 @@ define <1 x i64> @vsrau64(<1 x i64>* %A, <1 x i64>* %B) nounwind { } define <16 x i8> @vsraQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind { +;CHECK: vsraQu8: +;CHECK: vsra.u8 %tmp1 = load <16 x i8>* %A %tmp2 = load <16 x i8>* %B %tmp3 = lshr <16 x i8> %tmp2, < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 > @@ -121,6 +131,8 @@ define <16 x i8> @vsraQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind { } define <8 x i16> @vsraQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind { +;CHECK: vsraQu16: +;CHECK: vsra.u16 %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i16>* %B %tmp3 = lshr <8 x i16> %tmp2, < i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16 > @@ -129,6 +141,8 @@ define <8 x i16> @vsraQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind { } define <4 x i32> @vsraQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind { +;CHECK: vsraQu32: +;CHECK: vsra.u32 %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i32>* %B %tmp3 = lshr <4 x i32> %tmp2, < i32 32, i32 32, i32 32, i32 32 > @@ -137,6 +151,8 @@ define <4 x i32> @vsraQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind { } define <2 x i64> @vsraQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind { +;CHECK: vsraQu64: +;CHECK: vsra.u64 %tmp1 = load <2 x i64>* %A %tmp2 = load <2 x i64>* %B %tmp3 = lshr <2 x i64> %tmp2, < i64 64, i64 64 > @@ -145,6 +161,8 @@ define <2 x i64> @vsraQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind { } define <8 x i8> @vrsras8(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: vrsras8: +;CHECK: vrsra.s8 %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B %tmp3 = call <8 x i8> @llvm.arm.neon.vrshifts.v8i8(<8 x i8> %tmp2, <8 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >) @@ -153,6 +171,8 @@ define <8 x i8> @vrsras8(<8 x i8>* %A, <8 x i8>* %B) nounwind { } define <4 x i16> @vrsras16(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK: vrsras16: +;CHECK: vrsra.s16 %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B %tmp3 = call <4 x i16> @llvm.arm.neon.vrshifts.v4i16(<4 x i16> %tmp2, <4 x i16> < i16 -16, i16 -16, i16 -16, i16 -16 >) @@ -161,6 +181,8 @@ define <4 x i16> @vrsras16(<4 x i16>* %A, <4 x i16>* %B) nounwind { } define <2 x i32> @vrsras32(<2 x i32>* %A, <2 x i32>* %B) nounwind { +;CHECK: vrsras32: +;CHECK: vrsra.s32 %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B %tmp3 = call <2 x i32> @llvm.arm.neon.vrshifts.v2i32(<2 x i32> %tmp2, <2 x i32> < i32 -32, i32 -32 >) @@ -169,6 +191,8 @@ define <2 x i32> @vrsras32(<2 x i32>* %A, <2 x i32>* %B) nounwind { } define <1 x i64> @vrsras64(<1 x i64>* %A, <1 x i64>* %B) nounwind { +;CHECK: vrsras64: +;CHECK: vrsra.s64 %tmp1 = load <1 x i64>* %A %tmp2 = load <1 x i64>* %B %tmp3 = call <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64> %tmp2, <1 x i64> < i64 -64 >) @@ -177,6 +201,8 @@ define <1 x i64> @vrsras64(<1 x i64>* %A, <1 x i64>* %B) nounwind { } define <8 x i8> @vrsrau8(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: vrsrau8: +;CHECK: vrsra.u8 %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B %tmp3 = call <8 x i8> @llvm.arm.neon.vrshiftu.v8i8(<8 x i8> %tmp2, <8 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >) @@ -185,6 +211,8 @@ define <8 x i8> @vrsrau8(<8 x i8>* %A, <8 x i8>* %B) nounwind { } define <4 x i16> @vrsrau16(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK: vrsrau16: +;CHECK: vrsra.u16 %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B %tmp3 = call <4 x i16> @llvm.arm.neon.vrshiftu.v4i16(<4 x i16> %tmp2, <4 x i16> < i16 -16, i16 -16, i16 -16, i16 -16 >) @@ -193,6 +221,8 @@ define <4 x i16> @vrsrau16(<4 x i16>* %A, <4 x i16>* %B) nounwind { } define <2 x i32> @vrsrau32(<2 x i32>* %A, <2 x i32>* %B) nounwind { +;CHECK: vrsrau32: +;CHECK: vrsra.u32 %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B %tmp3 = call <2 x i32> @llvm.arm.neon.vrshiftu.v2i32(<2 x i32> %tmp2, <2 x i32> < i32 -32, i32 -32 >) @@ -201,6 +231,8 @@ define <2 x i32> @vrsrau32(<2 x i32>* %A, <2 x i32>* %B) nounwind { } define <1 x i64> @vrsrau64(<1 x i64>* %A, <1 x i64>* %B) nounwind { +;CHECK: vrsrau64: +;CHECK: vrsra.u64 %tmp1 = load <1 x i64>* %A %tmp2 = load <1 x i64>* %B %tmp3 = call <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64> %tmp2, <1 x i64> < i64 -64 >) @@ -209,6 +241,8 @@ define <1 x i64> @vrsrau64(<1 x i64>* %A, <1 x i64>* %B) nounwind { } define <16 x i8> @vrsraQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind { +;CHECK: vrsraQs8: +;CHECK: vrsra.s8 %tmp1 = load <16 x i8>* %A %tmp2 = load <16 x i8>* %B %tmp3 = call <16 x i8> @llvm.arm.neon.vrshifts.v16i8(<16 x i8> %tmp2, <16 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >) @@ -217,6 +251,8 @@ define <16 x i8> @vrsraQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind { } define <8 x i16> @vrsraQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind { +;CHECK: vrsraQs16: +;CHECK: vrsra.s16 %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i16>* %B %tmp3 = call <8 x i16> @llvm.arm.neon.vrshifts.v8i16(<8 x i16> %tmp2, <8 x i16> < i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16 >) @@ -225,6 +261,8 @@ define <8 x i16> @vrsraQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind { } define <4 x i32> @vrsraQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind { +;CHECK: vrsraQs32: +;CHECK: vrsra.s32 %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i32>* %B %tmp3 = call <4 x i32> @llvm.arm.neon.vrshifts.v4i32(<4 x i32> %tmp2, <4 x i32> < i32 -32, i32 -32, i32 -32, i32 -32 >) @@ -233,6 +271,8 @@ define <4 x i32> @vrsraQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind { } define <2 x i64> @vrsraQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind { +;CHECK: vrsraQs64: +;CHECK: vrsra.s64 %tmp1 = load <2 x i64>* %A %tmp2 = load <2 x i64>* %B %tmp3 = call <2 x i64> @llvm.arm.neon.vrshifts.v2i64(<2 x i64> %tmp2, <2 x i64> < i64 -64, i64 -64 >) @@ -241,6 +281,8 @@ define <2 x i64> @vrsraQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind { } define <16 x i8> @vrsraQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind { +;CHECK: vrsraQu8: +;CHECK: vrsra.u8 %tmp1 = load <16 x i8>* %A %tmp2 = load <16 x i8>* %B %tmp3 = call <16 x i8> @llvm.arm.neon.vrshiftu.v16i8(<16 x i8> %tmp2, <16 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >) @@ -249,6 +291,8 @@ define <16 x i8> @vrsraQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind { } define <8 x i16> @vrsraQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind { +;CHECK: vrsraQu16: +;CHECK: vrsra.u16 %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i16>* %B %tmp3 = call <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16> %tmp2, <8 x i16> < i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16 >) @@ -257,6 +301,8 @@ define <8 x i16> @vrsraQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind { } define <4 x i32> @vrsraQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind { +;CHECK: vrsraQu32: +;CHECK: vrsra.u32 %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i32>* %B %tmp3 = call <4 x i32> @llvm.arm.neon.vrshiftu.v4i32(<4 x i32> %tmp2, <4 x i32> < i32 -32, i32 -32, i32 -32, i32 -32 >) @@ -265,6 +311,8 @@ define <4 x i32> @vrsraQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind { } define <2 x i64> @vrsraQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind { +;CHECK: vrsraQu64: +;CHECK: vrsra.u64 %tmp1 = load <2 x i64>* %A %tmp2 = load <2 x i64>* %B %tmp3 = call <2 x i64> @llvm.arm.neon.vrshiftu.v2i64(<2 x i64> %tmp2, <2 x i64> < i64 -64, i64 -64 >) diff --git a/test/CodeGen/ARM/vsub.ll b/test/CodeGen/ARM/vsub.ll index 8419a1bf257..588044264ab 100644 --- a/test/CodeGen/ARM/vsub.ll +++ b/test/CodeGen/ARM/vsub.ll @@ -1,11 +1,8 @@ -; RUN: llc < %s -march=arm -mattr=+neon > %t -; RUN: grep {vsub\\.i8} %t | count 2 -; RUN: grep {vsub\\.i16} %t | count 2 -; RUN: grep {vsub\\.i32} %t | count 2 -; RUN: grep {vsub\\.i64} %t | count 2 -; RUN: grep {vsub\\.f32} %t | count 2 +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s define <8 x i8> @vsubi8(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: vsubi8: +;CHECK: vsub.i8 %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B %tmp3 = sub <8 x i8> %tmp1, %tmp2 @@ -13,6 +10,8 @@ define <8 x i8> @vsubi8(<8 x i8>* %A, <8 x i8>* %B) nounwind { } define <4 x i16> @vsubi16(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK: vsubi16: +;CHECK: vsub.i16 %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B %tmp3 = sub <4 x i16> %tmp1, %tmp2 @@ -20,6 +19,8 @@ define <4 x i16> @vsubi16(<4 x i16>* %A, <4 x i16>* %B) nounwind { } define <2 x i32> @vsubi32(<2 x i32>* %A, <2 x i32>* %B) nounwind { +;CHECK: vsubi32: +;CHECK: vsub.i32 %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B %tmp3 = sub <2 x i32> %tmp1, %tmp2 @@ -27,6 +28,8 @@ define <2 x i32> @vsubi32(<2 x i32>* %A, <2 x i32>* %B) nounwind { } define <1 x i64> @vsubi64(<1 x i64>* %A, <1 x i64>* %B) nounwind { +;CHECK: vsubi64: +;CHECK: vsub.i64 %tmp1 = load <1 x i64>* %A %tmp2 = load <1 x i64>* %B %tmp3 = sub <1 x i64> %tmp1, %tmp2 @@ -34,6 +37,8 @@ define <1 x i64> @vsubi64(<1 x i64>* %A, <1 x i64>* %B) nounwind { } define <2 x float> @vsubf32(<2 x float>* %A, <2 x float>* %B) nounwind { +;CHECK: vsubf32: +;CHECK: vsub.f32 %tmp1 = load <2 x float>* %A %tmp2 = load <2 x float>* %B %tmp3 = sub <2 x float> %tmp1, %tmp2 @@ -41,6 +46,8 @@ define <2 x float> @vsubf32(<2 x float>* %A, <2 x float>* %B) nounwind { } define <16 x i8> @vsubQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind { +;CHECK: vsubQi8: +;CHECK: vsub.i8 %tmp1 = load <16 x i8>* %A %tmp2 = load <16 x i8>* %B %tmp3 = sub <16 x i8> %tmp1, %tmp2 @@ -48,6 +55,8 @@ define <16 x i8> @vsubQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind { } define <8 x i16> @vsubQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind { +;CHECK: vsubQi16: +;CHECK: vsub.i16 %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i16>* %B %tmp3 = sub <8 x i16> %tmp1, %tmp2 @@ -55,6 +64,8 @@ define <8 x i16> @vsubQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind { } define <4 x i32> @vsubQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind { +;CHECK: vsubQi32: +;CHECK: vsub.i32 %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i32>* %B %tmp3 = sub <4 x i32> %tmp1, %tmp2 @@ -62,6 +73,8 @@ define <4 x i32> @vsubQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind { } define <2 x i64> @vsubQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind { +;CHECK: vsubQi64: +;CHECK: vsub.i64 %tmp1 = load <2 x i64>* %A %tmp2 = load <2 x i64>* %B %tmp3 = sub <2 x i64> %tmp1, %tmp2 @@ -69,6 +82,8 @@ define <2 x i64> @vsubQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind { } define <4 x float> @vsubQf32(<4 x float>* %A, <4 x float>* %B) nounwind { +;CHECK: vsubQf32: +;CHECK: vsub.f32 %tmp1 = load <4 x float>* %A %tmp2 = load <4 x float>* %B %tmp3 = sub <4 x float> %tmp1, %tmp2 diff --git a/test/CodeGen/ARM/vsubhn.ll b/test/CodeGen/ARM/vsubhn.ll index f1eafa80d9c..93645ef788b 100644 --- a/test/CodeGen/ARM/vsubhn.ll +++ b/test/CodeGen/ARM/vsubhn.ll @@ -1,9 +1,8 @@ -; RUN: llc < %s -march=arm -mattr=+neon > %t -; RUN: grep {vsubhn\\.i16} %t | count 1 -; RUN: grep {vsubhn\\.i32} %t | count 1 -; RUN: grep {vsubhn\\.i64} %t | count 1 +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s define <8 x i8> @vsubhni16(<8 x i16>* %A, <8 x i16>* %B) nounwind { +;CHECK: vsubhni16: +;CHECK: vsubhn.i16 %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i16>* %B %tmp3 = call <8 x i8> @llvm.arm.neon.vsubhn.v8i8(<8 x i16> %tmp1, <8 x i16> %tmp2) @@ -11,6 +10,8 @@ define <8 x i8> @vsubhni16(<8 x i16>* %A, <8 x i16>* %B) nounwind { } define <4 x i16> @vsubhni32(<4 x i32>* %A, <4 x i32>* %B) nounwind { +;CHECK: vsubhni32: +;CHECK: vsubhn.i32 %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i32>* %B %tmp3 = call <4 x i16> @llvm.arm.neon.vsubhn.v4i16(<4 x i32> %tmp1, <4 x i32> %tmp2) @@ -18,6 +19,8 @@ define <4 x i16> @vsubhni32(<4 x i32>* %A, <4 x i32>* %B) nounwind { } define <2 x i32> @vsubhni64(<2 x i64>* %A, <2 x i64>* %B) nounwind { +;CHECK: vsubhni64: +;CHECK: vsubhn.i64 %tmp1 = load <2 x i64>* %A %tmp2 = load <2 x i64>* %B %tmp3 = call <2 x i32> @llvm.arm.neon.vsubhn.v2i32(<2 x i64> %tmp1, <2 x i64> %tmp2) diff --git a/test/CodeGen/ARM/vsubl.ll b/test/CodeGen/ARM/vsubl.ll index 6cd867fbd20..9a9bcdb6a03 100644 --- a/test/CodeGen/ARM/vsubl.ll +++ b/test/CodeGen/ARM/vsubl.ll @@ -1,12 +1,8 @@ -; RUN: llc < %s -march=arm -mattr=+neon > %t -; RUN: grep {vsubl\\.s8} %t | count 1 -; RUN: grep {vsubl\\.s16} %t | count 1 -; RUN: grep {vsubl\\.s32} %t | count 1 -; RUN: grep {vsubl\\.u8} %t | count 1 -; RUN: grep {vsubl\\.u16} %t | count 1 -; RUN: grep {vsubl\\.u32} %t | count 1 +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s define <8 x i16> @vsubls8(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: vsubls8: +;CHECK: vsubl.s8 %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B %tmp3 = call <8 x i16> @llvm.arm.neon.vsubls.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2) @@ -14,6 +10,8 @@ define <8 x i16> @vsubls8(<8 x i8>* %A, <8 x i8>* %B) nounwind { } define <4 x i32> @vsubls16(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK: vsubls16: +;CHECK: vsubl.s16 %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B %tmp3 = call <4 x i32> @llvm.arm.neon.vsubls.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2) @@ -21,6 +19,8 @@ define <4 x i32> @vsubls16(<4 x i16>* %A, <4 x i16>* %B) nounwind { } define <2 x i64> @vsubls32(<2 x i32>* %A, <2 x i32>* %B) nounwind { +;CHECK: vsubls32: +;CHECK: vsubl.s32 %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B %tmp3 = call <2 x i64> @llvm.arm.neon.vsubls.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2) @@ -28,6 +28,8 @@ define <2 x i64> @vsubls32(<2 x i32>* %A, <2 x i32>* %B) nounwind { } define <8 x i16> @vsublu8(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: vsublu8: +;CHECK: vsubl.u8 %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B %tmp3 = call <8 x i16> @llvm.arm.neon.vsublu.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2) @@ -35,6 +37,8 @@ define <8 x i16> @vsublu8(<8 x i8>* %A, <8 x i8>* %B) nounwind { } define <4 x i32> @vsublu16(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK: vsublu16: +;CHECK: vsubl.u16 %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B %tmp3 = call <4 x i32> @llvm.arm.neon.vsublu.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2) @@ -42,6 +46,8 @@ define <4 x i32> @vsublu16(<4 x i16>* %A, <4 x i16>* %B) nounwind { } define <2 x i64> @vsublu32(<2 x i32>* %A, <2 x i32>* %B) nounwind { +;CHECK: vsublu32: +;CHECK: vsubl.u32 %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B %tmp3 = call <2 x i64> @llvm.arm.neon.vsublu.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2) diff --git a/test/CodeGen/ARM/vsubw.ll b/test/CodeGen/ARM/vsubw.ll index d83b19ce6ff..e6b6b5c9f83 100644 --- a/test/CodeGen/ARM/vsubw.ll +++ b/test/CodeGen/ARM/vsubw.ll @@ -1,12 +1,8 @@ -; RUN: llc < %s -march=arm -mattr=+neon > %t -; RUN: grep {vsubw\\.s8} %t | count 1 -; RUN: grep {vsubw\\.s16} %t | count 1 -; RUN: grep {vsubw\\.s32} %t | count 1 -; RUN: grep {vsubw\\.u8} %t | count 1 -; RUN: grep {vsubw\\.u16} %t | count 1 -; RUN: grep {vsubw\\.u32} %t | count 1 +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s define <8 x i16> @vsubws8(<8 x i16>* %A, <8 x i8>* %B) nounwind { +;CHECK: vsubws8: +;CHECK: vsubw.s8 %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i8>* %B %tmp3 = call <8 x i16> @llvm.arm.neon.vsubws.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2) @@ -14,6 +10,8 @@ define <8 x i16> @vsubws8(<8 x i16>* %A, <8 x i8>* %B) nounwind { } define <4 x i32> @vsubws16(<4 x i32>* %A, <4 x i16>* %B) nounwind { +;CHECK: vsubws16: +;CHECK: vsubw.s16 %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i16>* %B %tmp3 = call <4 x i32> @llvm.arm.neon.vsubws.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2) @@ -21,6 +19,8 @@ define <4 x i32> @vsubws16(<4 x i32>* %A, <4 x i16>* %B) nounwind { } define <2 x i64> @vsubws32(<2 x i64>* %A, <2 x i32>* %B) nounwind { +;CHECK: vsubws32: +;CHECK: vsubw.s32 %tmp1 = load <2 x i64>* %A %tmp2 = load <2 x i32>* %B %tmp3 = call <2 x i64> @llvm.arm.neon.vsubws.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2) @@ -28,6 +28,8 @@ define <2 x i64> @vsubws32(<2 x i64>* %A, <2 x i32>* %B) nounwind { } define <8 x i16> @vsubwu8(<8 x i16>* %A, <8 x i8>* %B) nounwind { +;CHECK: vsubwu8: +;CHECK: vsubw.u8 %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i8>* %B %tmp3 = call <8 x i16> @llvm.arm.neon.vsubwu.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2) @@ -35,6 +37,8 @@ define <8 x i16> @vsubwu8(<8 x i16>* %A, <8 x i8>* %B) nounwind { } define <4 x i32> @vsubwu16(<4 x i32>* %A, <4 x i16>* %B) nounwind { +;CHECK: vsubwu16: +;CHECK: vsubw.u16 %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i16>* %B %tmp3 = call <4 x i32> @llvm.arm.neon.vsubwu.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2) @@ -42,6 +46,8 @@ define <4 x i32> @vsubwu16(<4 x i32>* %A, <4 x i16>* %B) nounwind { } define <2 x i64> @vsubwu32(<2 x i64>* %A, <2 x i32>* %B) nounwind { +;CHECK: vsubwu32: +;CHECK: vsubw.u32 %tmp1 = load <2 x i64>* %A %tmp2 = load <2 x i32>* %B %tmp3 = call <2 x i64> @llvm.arm.neon.vsubwu.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2)