%tmp3 = call <4 x float> @llvm.arm.neon.vabds.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
ret <4 x float> %tmp3
}
+
+; CHECK: vabdls_8xi8
+define <8 x i16> @vabdls_8xi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = load <8 x i8>* %B
+; CHECK: vabdl.s8 q8, d16, d17 @ encoding: [0xa1,0x07,0xc0,0xf2]
+ %tmp3 = call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+ %tmp4 = zext <8 x i8> %tmp3 to <8 x i16>
+ ret <8 x i16> %tmp4
+}
+
+; CHECK: vabdls_4xi16
+define <4 x i32> @vabdls_4xi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = load <4 x i16>* %B
+; CHECK: vabdl.s16 q8, d16, d17 @ encoding: [0xa1,0x07,0xd0,0xf2]
+ %tmp3 = call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+ %tmp4 = zext <4 x i16> %tmp3 to <4 x i32>
+ ret <4 x i32> %tmp4
+}
+
+; CHECK: vabdls_2xi32
+define <2 x i64> @vabdls_2xi32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = load <2 x i32>* %B
+; CHECK: vabdl.s32 q8, d16, d17 @ encoding: [0xa1,0x07,0xe0,0xf2]
+ %tmp3 = call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+ %tmp4 = zext <2 x i32> %tmp3 to <2 x i64>
+ ret <2 x i64> %tmp4
+}
+
+; CHECK: vabdlu_8xi8
+define <8 x i16> @vabdlu_8xi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = load <8 x i8>* %B
+; CHECK: vabdl.u8 q8, d16, d17 @ encoding: [0xa1,0x07,0xc0,0xf3]
+ %tmp3 = call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+ %tmp4 = zext <8 x i8> %tmp3 to <8 x i16>
+ ret <8 x i16> %tmp4
+}
+
+; CHECK: vabdlu_4xi16
+define <4 x i32> @vabdlu_4xi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = load <4 x i16>* %B
+; CHECK: vabdl.u16 q8, d16, d17 @ encoding: [0xa1,0x07,0xd0,0xf3]
+ %tmp3 = call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+ %tmp4 = zext <4 x i16> %tmp3 to <4 x i32>
+ ret <4 x i32> %tmp4
+}
+
+; CHECK: vabdlu_2xi3
+define <2 x i64> @vabdlu_2xi32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+ %tmp1 = load <2 x i32>* %A
+ %tmp2 = load <2 x i32>* %B
+ %tmp3 = call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+ %tmp4 = zext <2 x i32> %tmp3 to <2 x i64>
+ ret <2 x i64> %tmp4
+}