From: Chad Rosier Date: Mon, 2 Dec 2013 21:05:16 +0000 (+0000) Subject: [AArch64] Implemented vcopy_lane patterns using scalar DUP instruction. X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=d4809bb0e389787a1682d45e01290a3da8d631e3;p=oota-llvm.git [AArch64] Implemented vcopy_lane patterns using scalar DUP instruction. Patch by Ana Pazos! git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@196151 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/AArch64/AArch64InstrNEON.td b/lib/Target/AArch64/AArch64InstrNEON.td index 6a339c8b094..581ebae2481 100644 --- a/lib/Target/AArch64/AArch64InstrNEON.td +++ b/lib/Target/AArch64/AArch64InstrNEON.td @@ -5731,28 +5731,13 @@ multiclass NeonI_Scalar_DUP_Elt_pattern; } -multiclass NeonI_SDUP { - def : Pat<(ResTy (GetLow VPR128:$Rn)), - (ResTy (EXTRACT_SUBREG (OpTy VPR128:$Rn), sub_64))>; - def : Pat<(ResTy (GetHigh VPR128:$Rn)), - (ResTy (DUPdv_D (OpTy VPR128:$Rn), 1))>; -} - -defm : NeonI_SDUP; -defm : NeonI_SDUP; -defm : NeonI_SDUP; -defm : NeonI_SDUP; -defm : NeonI_SDUP; -defm : NeonI_SDUP; - // Patterns for vector extract of FP data using scalar DUP instructions defm : NeonI_Scalar_DUP_Elt_pattern; defm : NeonI_Scalar_DUP_Elt_pattern; -multiclass NeonI_Scalar_DUP_Vec_pattern { @@ -5764,14 +5749,87 @@ multiclass NeonI_Scalar_DUP_Vec_pattern; } -// Patterns for extract subvectors of v1ix data using scalar DUP instructions -defm : NeonI_Scalar_DUP_Vec_pattern; -defm : NeonI_Scalar_DUP_Vec_pattern; -defm : NeonI_Scalar_DUP_Vec_pattern; +// Patterns for extract subvectors of v1ix data using scalar DUP instructions. +defm : NeonI_Scalar_DUP_Ext_Vec_pattern; +defm : NeonI_Scalar_DUP_Ext_Vec_pattern; +defm : NeonI_Scalar_DUP_Ext_Vec_pattern; + +multiclass NeonI_Scalar_DUP_Copy_pattern1 { + + def : Pat<(ResTy (vector_insert (ResTy undef), + (ElemTy (vector_extract (OpTy VPR128:$Rn), OpImm:$Imm)), + (neon_uimm0_bare:$Imm))), + (ResTy (DUPI (OpTy VPR128:$Rn), OpImm:$Imm))>; + + def : Pat<(ResTy (vector_insert (ResTy undef), + (ElemTy (vector_extract (OpNTy VPR64:$Rn), OpNImm:$Imm)), + (OpNImm:$Imm))), + (ResTy (DUPI + (ExTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), + OpNImm:$Imm))>; +} + +multiclass NeonI_Scalar_DUP_Copy_pattern2 { + + def : Pat<(ResTy (scalar_to_vector + (ElemTy (vector_extract (OpTy VPR128:$Rn), OpImm:$Imm)))), + (ResTy (DUPI (OpTy VPR128:$Rn), OpImm:$Imm))>; + + def : Pat<(ResTy (scalar_to_vector + (ElemTy (vector_extract (OpNTy VPR64:$Rn), OpNImm:$Imm)))), + (ResTy (DUPI + (ExTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), + OpNImm:$Imm))>; +} + +// Patterns for vector copy to v1ix and v1fx vectors using scalar DUP +// instructions. +defm : NeonI_Scalar_DUP_Copy_pattern1; +defm : NeonI_Scalar_DUP_Copy_pattern1; +defm : NeonI_Scalar_DUP_Copy_pattern1; +defm : NeonI_Scalar_DUP_Copy_pattern1; +defm : NeonI_Scalar_DUP_Copy_pattern1; +defm : NeonI_Scalar_DUP_Copy_pattern1; +defm : NeonI_Scalar_DUP_Copy_pattern2; +defm : NeonI_Scalar_DUP_Copy_pattern2; +defm : NeonI_Scalar_DUP_Copy_pattern2; +defm : NeonI_Scalar_DUP_Copy_pattern2; +defm : NeonI_Scalar_DUP_Copy_pattern2; +defm : NeonI_Scalar_DUP_Copy_pattern2; multiclass NeonI_Scalar_DUP_alias; defm : NeonI_Scalar_DUP_alias<"mov", ".s", DUPsv_S, neon_uimm2_bare, FPR32>; defm : NeonI_Scalar_DUP_alias<"mov", ".d", DUPdv_D, neon_uimm1_bare, FPR64>; +multiclass NeonI_SDUP { + def : Pat<(ResTy (GetLow VPR128:$Rn)), + (ResTy (EXTRACT_SUBREG (OpTy VPR128:$Rn), sub_64))>; + def : Pat<(ResTy (GetHigh VPR128:$Rn)), + (ResTy (DUPdv_D (OpTy VPR128:$Rn), 1))>; +} + +defm : NeonI_SDUP; +defm : NeonI_SDUP; +defm : NeonI_SDUP; +defm : NeonI_SDUP; +defm : NeonI_SDUP; +defm : NeonI_SDUP; //===----------------------------------------------------------------------===// // Non-Instruction Patterns @@ -7110,13 +7182,10 @@ def : Pat<(v1i32 (scalar_to_vector GPR32:$src)), def : Pat<(v1i64 (scalar_to_vector GPR64:$src)), (FMOVdx $src)>; -def : Pat<(v1f32 (scalar_to_vector (f32 FPR32:$Rn))), - (v1f32 FPR32:$Rn)>; -def : Pat<(v1f64 (scalar_to_vector (f64 FPR64:$Rn))), - (v1f64 FPR64:$Rn)>; - def : Pat<(v1f64 (scalar_to_vector (f64 FPR64:$src))), (FMOVdd $src)>; +def : Pat<(v1f32 (scalar_to_vector (f32 FPR32:$src))), + (FMOVss $src)>; def : Pat<(v2f64 (scalar_to_vector (f64 FPR64:$src))), (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), diff --git a/test/CodeGen/AArch64/neon-scalar-copy.ll b/test/CodeGen/AArch64/neon-scalar-copy.ll index 59f62374d4f..d433ff595d1 100644 --- a/test/CodeGen/AArch64/neon-scalar-copy.ll +++ b/test/CodeGen/AArch64/neon-scalar-copy.ll @@ -78,3 +78,11 @@ define <1 x i64> @test_vector_dup_dv2D(<2 x i64> %v1) { ret <1 x i64> %shuffle.i } +define <1 x i64> @test_vector_copy_dup_dv2D(<1 x i64> %a, <2 x i64> %c) { + ;CHECK: test_vector_copy_dup_dv2D + ;CHECK: dup {{d[0-31]+}}, {{v[0-31]+}}.d[1] + %vget_lane = extractelement <2 x i64> %c, i32 1 + %vset_lane = insertelement <1 x i64> undef, i64 %vget_lane, i32 0 + ret <1 x i64> %vset_lane +} +