From ed839120c4a9e268c52f14b6443589456ec2b008 Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Tue, 1 Apr 2014 12:22:37 +0000 Subject: [PATCH] ARM64: add intrinsic for pmull (p64 x p64 = p128) operations. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@205302 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/IR/IntrinsicsARM64.td | 8 ++++++-- lib/Target/ARM64/ARM64InstrInfo.td | 7 +++++++ test/CodeGen/ARM64/vmul.ll | 18 ++++++++++++++++++ 3 files changed, 31 insertions(+), 2 deletions(-) diff --git a/include/llvm/IR/IntrinsicsARM64.td b/include/llvm/IR/IntrinsicsARM64.td index b280d005d0c..d7f307e9ff6 100644 --- a/include/llvm/IR/IntrinsicsARM64.td +++ b/include/llvm/IR/IntrinsicsARM64.td @@ -74,8 +74,7 @@ let TargetPrefix = "arm64" in { // All intrinsics start with "llvm.arm64.". [IntrNoMem]>; class AdvSIMD_2VectorArg_Long_Intrinsic : Intrinsic<[llvm_anyvector_ty], - [LLVMTruncatedType<0>, - LLVMTruncatedType<0>], + [LLVMTruncatedType<0>, LLVMTruncatedType<0>], [IntrNoMem]>; class AdvSIMD_2VectorArg_Wide_Intrinsic : Intrinsic<[llvm_anyvector_ty], @@ -178,6 +177,11 @@ let Properties = [IntrNoMem] in { def int_arm64_neon_umull : AdvSIMD_2VectorArg_Long_Intrinsic; def int_arm64_neon_pmull : AdvSIMD_2VectorArg_Long_Intrinsic; + // 64-bit polynomial multiply really returns an i128, which is not legal. Fake + // it with a v16i8. + def int_arm64_neon_pmull64 : + Intrinsic<[llvm_v16i8_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>; + // Vector Extending Multiply def int_arm64_neon_fmulx : AdvSIMD_2FloatArg_Intrinsic; diff --git a/lib/Target/ARM64/ARM64InstrInfo.td b/lib/Target/ARM64/ARM64InstrInfo.td index c9a714b8dda..ee066a38f4f 100644 --- a/lib/Target/ARM64/ARM64InstrInfo.td +++ b/lib/Target/ARM64/ARM64InstrInfo.td @@ -2726,6 +2726,13 @@ defm USUBL : SIMDLongThreeVectorBHS<1, 0b0010, "usubl", defm USUBW : SIMDWideThreeVectorBHS< 1, 0b0011, "usubw", BinOpFrag<(sub node:$LHS, (zext node:$RHS))>>; +// Patterns for 64-bit pmull +def : Pat<(int_arm64_neon_pmull64 V64:$Rn, V64:$Rm), + (PMULLv1i64 V64:$Rn, V64:$Rm)>; +def : Pat<(int_arm64_neon_pmull64 (vector_extract (v2i64 V128:$Rn), (i64 1)), + (vector_extract (v2i64 V128:$Rm), (i64 1))), + (PMULLv2i64 V128:$Rn, V128:$Rm)>; + // CodeGen patterns for addhn and subhn instructions, which can actually be // written in LLVM IR without too much difficulty. diff --git a/test/CodeGen/ARM64/vmul.ll b/test/CodeGen/ARM64/vmul.ll index 433c09d37f8..3ef0a76e204 100644 --- a/test/CodeGen/ARM64/vmul.ll +++ b/test/CodeGen/ARM64/vmul.ll @@ -1983,3 +1983,21 @@ define i64 @sqdmlsl_d(i32 %A, i32 %B, i64 %C) nounwind { %tmp5 = call i64 @llvm.arm64.neon.sqsub.i64(i64 %C, i64 %tmp4) ret i64 %tmp5 } + +define <16 x i8> @test_pmull_64(i64 %l, i64 %r) nounwind { +; CHECK-LABEL: test_pmull_64: +; CHECK: pmull.1q + %val = call <16 x i8> @llvm.arm64.neon.pmull64(i64 %l, i64 %r) + ret <16 x i8> %val +} + +define <16 x i8> @test_pmull_high_64(<2 x i64> %l, <2 x i64> %r) nounwind { +; CHECK-LABEL: test_pmull_high_64: +; CHECK: pmull2.1q + %l_hi = extractelement <2 x i64> %l, i32 1 + %r_hi = extractelement <2 x i64> %r, i32 1 + %val = call <16 x i8> @llvm.arm64.neon.pmull64(i64 %l_hi, i64 %r_hi) + ret <16 x i8> %val +} + +declare <16 x i8> @llvm.arm64.neon.pmull64(i64, i64) -- 2.34.1