From: Bob Wilson Date: Fri, 7 Aug 2009 23:53:05 +0000 (+0000) Subject: Add new intrinsics for Neon VTRN, VZIP and VUZP operations. Modeling these X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=9e699cc72639f25b20e73f51ab06ab7a0884ea62;p=oota-llvm.git Add new intrinsics for Neon VTRN, VZIP and VUZP operations. Modeling these as vector shuffles did not work out well. Shuffles that produce double-wide vectors accurately represent the operation but make it hard to do anything with the results. I considered splitting them up into 2 shuffles, one to write each register separately, but there doesn't seem to be a good way to reunite them for codegen. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@78437 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/llvm/IntrinsicsARM.td b/include/llvm/IntrinsicsARM.td index efe5bff4371..e9a7e2e91da 100644 --- a/include/llvm/IntrinsicsARM.td +++ b/include/llvm/IntrinsicsARM.td @@ -66,6 +66,12 @@ let TargetPrefix = "arm" in { // All intrinsics start with "llvm.arm.". LLVMTruncatedElementVectorType<0>, LLVMTruncatedElementVectorType<0>], [IntrNoMem]>; + class Neon_2Result_Intrinsic + : Intrinsic<[llvm_anyint_ty, LLVMMatchType<0>], + [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>; + class Neon_2Result_Float_Intrinsic + : Intrinsic<[llvm_anyfloat_ty, LLVMMatchType<0>], + [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>; class Neon_CvtFxToFP_Intrinsic : Intrinsic<[llvm_anyfloat_ty], [llvm_anyint_ty, llvm_i32_ty], [IntrNoMem]>; class Neon_CvtFPToFx_Intrinsic @@ -288,6 +294,18 @@ def int_arm_neon_vqmovnsu : Neon_1Arg_Narrow_Intrinsic; def int_arm_neon_vmovls : Neon_1Arg_Long_Intrinsic; def int_arm_neon_vmovlu : Neon_1Arg_Long_Intrinsic; +// Vector Transpose. +def int_arm_neon_vtrni : Neon_2Result_Intrinsic; +def int_arm_neon_vtrnf : Neon_2Result_Float_Intrinsic; + +// Vector Interleave (vzip). +def int_arm_neon_vzipi : Neon_2Result_Intrinsic; +def int_arm_neon_vzipf : Neon_2Result_Float_Intrinsic; + +// Vector Deinterleave (vuzp). +def int_arm_neon_vuzpi : Neon_2Result_Intrinsic; +def int_arm_neon_vuzpf : Neon_2Result_Float_Intrinsic; + let TargetPrefix = "arm" in { // De-interleaving vector loads from N-element structures.