From: Charlie Turner <charlie.turner@arm.com> Date: Tue, 17 Nov 2015 13:21:35 +0000 (+0000) Subject: [ARM] Match VABDL from log2 shuffles. X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=377cc21860b673e2a34fde23e8871578bf201e36;p=oota-llvm.git [ARM] Match VABDL from log2 shuffles. Differential Revision: http://reviews.llvm.org/D14664 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@253334 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 5e9e3876fe3..af0552a0664 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -5009,6 +5009,29 @@ defm VABDLs : N3VLIntExt_QHS<0,1,0b0111,0, IIC_VSUBi4Q, defm VABDLu : N3VLIntExt_QHS<1,1,0b0111,0, IIC_VSUBi4Q, "vabdl", "u", uabsdiff, zext, 1>; +def abd_shr : + PatFrag<(ops node:$in1, node:$in2, node:$shift), + (NEONvshrs (sub (zext node:$in1), + (zext node:$in2)), (i32 $shift))>; + +def : Pat<(xor (v4i32 (bitconvert (v8i16 (abd_shr (v8i8 DPR:$opA), (v8i8 DPR:$opB), 15)))), + (v4i32 (bitconvert (v8i16 (add (sub (zext (v8i8 DPR:$opA)), + (zext (v8i8 DPR:$opB))), + (v8i16 (abd_shr (v8i8 DPR:$opA), (v8i8 DPR:$opB), 15))))))), + (VABDLuv8i16 DPR:$opA, DPR:$opB)>; + +def : Pat<(xor (v4i32 (abd_shr (v4i16 DPR:$opA), (v4i16 DPR:$opB), 31)), + (v4i32 (add (sub (zext (v4i16 DPR:$opA)), + (zext (v4i16 DPR:$opB))), + (abd_shr (v4i16 DPR:$opA), (v4i16 DPR:$opB), 31)))), + (VABDLuv4i32 DPR:$opA, DPR:$opB)>; + +def : Pat<(xor (v4i32 (bitconvert (v2i64 (abd_shr (v2i32 DPR:$opA), (v2i32 DPR:$opB), 63)))), + (v4i32 (bitconvert (v2i64 (add (sub (zext (v2i32 DPR:$opA)), + (zext (v2i32 DPR:$opB))), + (abd_shr (v2i32 DPR:$opA), (v2i32 DPR:$opB), 63)))))), + (VABDLuv2i64 DPR:$opA, DPR:$opB)>; + // VABA : Vector Absolute Difference and Accumulate defm VABAs : N3VIntOp_QHS<0,0,0b0111,1, IIC_VABAD, IIC_VABAQ, "vaba", "s", sabsdiff, add>; diff --git a/test/CodeGen/ARM/neon_vabs.ll b/test/CodeGen/ARM/neon_vabs.ll index 7a02512198b..d32e7b78879 100644 --- a/test/CodeGen/ARM/neon_vabs.ll +++ b/test/CodeGen/ARM/neon_vabs.ll @@ -89,3 +89,41 @@ define <2 x i32> @test10(<2 x i32> %a) nounwind { %abs = select <2 x i1> %b, <2 x i32> %tmp1neg, <2 x i32> %a ret <2 x i32> %abs } + +;; Check that absdiff patterns as emitted by log2 shuffles are +;; matched by VABD. + +define <4 x i32> @test11(<4 x i16> %a, <4 x i16> %b) nounwind { +; CHECK-LABEL: test11: +; CHECK: vabdl.u16 q + %zext1 = zext <4 x i16> %a to <4 x i32> + %zext2 = zext <4 x i16> %b to <4 x i32> + %diff = sub <4 x i32> %zext1, %zext2 + %shift1 = ashr <4 x i32> %diff, <i32 31, i32 31, i32 31, i32 31> + %add1 = add <4 x i32> %shift1, %diff + %res = xor <4 x i32> %shift1, %add1 + ret <4 x i32> %res +} +define <8 x i16> @test12(<8 x i8> %a, <8 x i8> %b) nounwind { +; CHECK-LABEL: test12: +; CHECK: vabdl.u8 q + %zext1 = zext <8 x i8> %a to <8 x i16> + %zext2 = zext <8 x i8> %b to <8 x i16> + %diff = sub <8 x i16> %zext1, %zext2 + %shift1 = ashr <8 x i16> %diff,<i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15> + %add1 = add <8 x i16> %shift1, %diff + %res = xor <8 x i16> %shift1, %add1 + ret <8 x i16> %res +} + +define <2 x i64> @test13(<2 x i32> %a, <2 x i32> %b) nounwind { +; CHECK-LABEL: test13: +; CHECK: vabdl.u32 q + %zext1 = zext <2 x i32> %a to <2 x i64> + %zext2 = zext <2 x i32> %b to <2 x i64> + %diff = sub <2 x i64> %zext1, %zext2 + %shift1 = ashr <2 x i64> %diff,<i64 63, i64 63> + %add1 = add <2 x i64> %shift1, %diff + %res = xor <2 x i64> %shift1, %add1 + ret <2 x i64> %res +}