From 513a00db789a07510f7ce715e92e53343a9d8d53 Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Fri, 13 Dec 2013 17:56:44 +0000 Subject: [PATCH] [AArch64] Simplify the Neon Scalar3Same patterns for floating-point reciprocal step, floating-point reciprocal square root step, floating-point absolute difference, and integer/floating-point compare instructions. Also, move the scalar general arithmetic operation patterns closer to similar code. No functional change intended. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@197250 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AArch64/AArch64InstrNEON.td | 156 +++++++++++-------------- 1 file changed, 65 insertions(+), 91 deletions(-) diff --git a/lib/Target/AArch64/AArch64InstrNEON.td b/lib/Target/AArch64/AArch64InstrNEON.td index ade7cbf7ff0..e9b1298a24c 100644 --- a/lib/Target/AArch64/AArch64InstrNEON.td +++ b/lib/Target/AArch64/AArch64InstrNEON.td @@ -4155,19 +4155,12 @@ multiclass Neon_Scalar3Same_BHSD_size_patterns { def: Pat<(v1i8 (opnode (v1i8 FPR8:$Rn), (v1i8 FPR8:$Rm))), (INSTB FPR8:$Rn, FPR8:$Rm)>; - def: Pat<(v1i16 (opnode (v1i16 FPR16:$Rn), (v1i16 FPR16:$Rm))), (INSTH FPR16:$Rn, FPR16:$Rm)>; - def: Pat<(v1i32 (opnode (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))), (INSTS FPR32:$Rn, FPR32:$Rm)>; } -class Neon_Scalar3Same_cmp_D_size_patterns - : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn), (v1i64 FPR64:$Rm))), - (INSTD FPR64:$Rn, FPR64:$Rm)>; - multiclass Neon_Scalar3Same_HS_size_patterns { @@ -4177,33 +4170,13 @@ multiclass Neon_Scalar3Same_HS_size_patterns; } -multiclass Neon_Scalar3Same_fabd_SD_size_patterns { - def : Pat<(f32 (opnode (f32 FPR32:$Rn), (f32 FPR32:$Rm))), - (INSTS FPR32:$Rn, FPR32:$Rm)>; - def : Pat<(f64 (opnode (f64 FPR64:$Rn), (f64 FPR64:$Rm))), - (INSTD FPR64:$Rn, FPR64:$Rm)>; -} - multiclass Neon_Scalar3Same_SD_size_patterns { - def : Pat<(f32 (opnode (f32 FPR32:$Rn), (f32 FPR32:$Rm))), + ValueType SResTy, ValueType STy, + Instruction INSTS, ValueType DResTy, + ValueType DTy, Instruction INSTD> { + def : Pat<(SResTy (opnode (STy FPR32:$Rn), (STy FPR32:$Rm))), (INSTS FPR32:$Rn, FPR32:$Rm)>; - def : Pat<(f64 (opnode (f64 FPR64:$Rn), (f64 FPR64:$Rm))), - (INSTD FPR64:$Rn, FPR64:$Rm)>; - def : Pat<(v1f64 (opnodeV (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), - (INSTD FPR64:$Rn, FPR64:$Rm)>; -} - -multiclass Neon_Scalar3Same_cmp_SD_size_patterns { - def : Pat<(v1i32 (opnode (f32 FPR32:$Rn), (f32 FPR32:$Rm))), - (INSTS FPR32:$Rn, FPR32:$Rm)>; - def : Pat<(v1i64 (opnode (f64 FPR64:$Rn), (f64 FPR64:$Rm))), + def : Pat<(DResTy (opnode (DTy FPR64:$Rn), (DTy FPR64:$Rm))), (INSTD FPR64:$Rn, FPR64:$Rm)>; } @@ -4875,15 +4848,17 @@ defm FMULX : NeonI_Scalar3Same_SD_sizes<0b0, 0b0, 0b11011, "fmulx", 1>; // Scalar Floating-point Reciprocal Step defm FRECPS : NeonI_Scalar3Same_SD_sizes<0b0, 0b0, 0b11111, "frecps", 0>; -defm : Neon_Scalar3Same_SD_size_patterns; +defm : Neon_Scalar3Same_SD_size_patterns; +def : Pat<(v1f64 (int_arm_neon_vrecps (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), + (FRECPSddd FPR64:$Rn, FPR64:$Rm)>; // Scalar Floating-point Reciprocal Square Root Step defm FRSQRTS : NeonI_Scalar3Same_SD_sizes<0b0, 0b1, 0b11111, "frsqrts", 0>; -defm : Neon_Scalar3Same_SD_size_patterns; +defm : Neon_Scalar3Same_SD_size_patterns; +def : Pat<(v1f64 (int_arm_neon_vrsqrts (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), + (FRSQRTSddd FPR64:$Rn, FPR64:$Rm)>; def : Pat<(v1f64 (fsqrt (v1f64 FPR64:$Rn))), (FSQRTdd FPR64:$Rn)>; // Patterns to match llvm.aarch64.* intrinsic for @@ -5092,7 +5067,7 @@ def : Neon_ScalarFloatRound_pattern; // Scalar Compare Bitwise Equal def CMEQddd: NeonI_Scalar3Same_D_size<0b1, 0b10001, "cmeq">; -def : Neon_Scalar3Same_cmp_D_size_patterns; +defm : Neon_Scalar3Same_D_size_patterns; class Neon_Scalar3Same_cmp_D_size_v1_patterns; // Scalar Compare Signed Greather Than Or Equal def CMGEddd: NeonI_Scalar3Same_D_size<0b0, 0b00111, "cmge">; -def : Neon_Scalar3Same_cmp_D_size_patterns; +defm : Neon_Scalar3Same_D_size_patterns; def : Neon_Scalar3Same_cmp_D_size_v1_patterns; // Scalar Compare Unsigned Higher Or Same def CMHSddd: NeonI_Scalar3Same_D_size<0b1, 0b00111, "cmhs">; -def : Neon_Scalar3Same_cmp_D_size_patterns; +defm : Neon_Scalar3Same_D_size_patterns; def : Neon_Scalar3Same_cmp_D_size_v1_patterns; // Scalar Compare Unsigned Higher def CMHIddd: NeonI_Scalar3Same_D_size<0b1, 0b00110, "cmhi">; -def : Neon_Scalar3Same_cmp_D_size_patterns; +defm : Neon_Scalar3Same_D_size_patterns; def : Neon_Scalar3Same_cmp_D_size_v1_patterns; // Scalar Compare Signed Greater Than def CMGTddd: NeonI_Scalar3Same_D_size<0b0, 0b00110, "cmgt">; -def : Neon_Scalar3Same_cmp_D_size_patterns; +defm : Neon_Scalar3Same_D_size_patterns; def : Neon_Scalar3Same_cmp_D_size_v1_patterns; // Scalar Compare Bitwise Test Bits def CMTSTddd: NeonI_Scalar3Same_D_size<0b0, 0b10001, "cmtst">; -def : Neon_Scalar3Same_cmp_D_size_patterns; -def : Neon_Scalar3Same_cmp_D_size_patterns; +defm : Neon_Scalar3Same_D_size_patterns; +defm : Neon_Scalar3Same_D_size_patterns; // Scalar Compare Bitwise Equal To Zero def CMEQddi: NeonI_Scalar2SameMisc_cmpz_D_size<0b0, 0b01001, "cmeq">; @@ -5161,8 +5136,8 @@ def : Neon_Scalar2SameMisc_cmpz_D_V1_size_patterns; // Scalar Floating-point Compare Mask Equal defm FCMEQ: NeonI_Scalar3Same_SD_sizes<0b0, 0b0, 0b11100, "fcmeq">; -defm : Neon_Scalar3Same_cmp_SD_size_patterns; +defm : Neon_Scalar3Same_SD_size_patterns; def : Neon_Scalar3Same_cmp_V1_D_size_patterns; // Scalar Floating-point Compare Mask Equal To Zero @@ -5174,8 +5149,8 @@ def : Pat<(v1i64 (Neon_cmpz (v1f64 FPR64:$Rn), (f32 fpz32:$FPImm), SETEQ)), // Scalar Floating-point Compare Mask Greater Than Or Equal defm FCMGE: NeonI_Scalar3Same_SD_sizes<0b1, 0b0, 0b11100, "fcmge">; -defm : Neon_Scalar3Same_cmp_SD_size_patterns; +defm : Neon_Scalar3Same_SD_size_patterns; def : Neon_Scalar3Same_cmp_V1_D_size_patterns; // Scalar Floating-point Compare Mask Greater Than Or Equal To Zero @@ -5185,8 +5160,8 @@ defm : Neon_Scalar2SameMisc_cmpz_SD_size_patterns; -defm : Neon_Scalar3Same_cmp_SD_size_patterns; +defm : Neon_Scalar3Same_SD_size_patterns; def : Neon_Scalar3Same_cmp_V1_D_size_patterns; // Scalar Floating-point Compare Mask Greather Than Zero @@ -5206,22 +5181,22 @@ defm : Neon_Scalar2SameMisc_cmpz_SD_size_patterns; -defm : Neon_Scalar3Same_cmp_SD_size_patterns; +defm : Neon_Scalar3Same_SD_size_patterns; def : Pat<(v1i64 (int_aarch64_neon_vcage (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), (FACGEddd FPR64:$Rn, FPR64:$Rm)>; // Scalar Floating-point Absolute Compare Mask Greater Than defm FACGT: NeonI_Scalar3Same_SD_sizes<0b1, 0b1, 0b11101, "facgt">; -defm : Neon_Scalar3Same_cmp_SD_size_patterns; +defm : Neon_Scalar3Same_SD_size_patterns; def : Pat<(v1i64 (int_aarch64_neon_vcagt (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), (FACGTddd FPR64:$Rn, FPR64:$Rm)>; -// Scakar Floating-point Absolute Difference +// Scalar Floating-point Absolute Difference defm FABD: NeonI_Scalar3Same_SD_sizes<0b1, 0b1, 0b11010, "fabd">; -defm : Neon_Scalar3Same_fabd_SD_size_patterns; +defm : Neon_Scalar3Same_SD_size_patterns; // Scalar Absolute Value defm ABS : NeonI_Scalar2SameMisc_D_size<0b0, 0b01011, "abs">; @@ -5481,7 +5456,6 @@ defm : Neon_ScalarXIndexedElem_MUL_MULX_Patterns; - // Scalar Floating Point fused multiply-add (scalar, by element) def FMLAssv_4S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"fmla", 0b0001, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> { @@ -5766,38 +5740,6 @@ defm : Neon_ScalarXIndexedElem_MLAL_Patterns; -// Scalar general arithmetic operation -class Neon_Scalar_GeneralMath2D_pattern - : Pat<(v1f64 (opnode (v1f64 FPR64:$Rn))), (INST FPR64:$Rn)>; - -class Neon_Scalar_GeneralMath3D_pattern - : Pat<(v1f64 (opnode (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), - (INST FPR64:$Rn, FPR64:$Rm)>; - -class Neon_Scalar_GeneralMath4D_pattern - : Pat<(v1f64 (opnode (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm), - (v1f64 FPR64:$Ra))), - (INST FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>; - -def : Neon_Scalar_GeneralMath3D_pattern; -def : Neon_Scalar_GeneralMath3D_pattern; -def : Neon_Scalar_GeneralMath3D_pattern; -def : Neon_Scalar_GeneralMath3D_pattern; -def : Neon_Scalar_GeneralMath3D_pattern; -def : Neon_Scalar_GeneralMath3D_pattern; -def : Neon_Scalar_GeneralMath3D_pattern; -def : Neon_Scalar_GeneralMath3D_pattern; -def : Neon_Scalar_GeneralMath3D_pattern; - -def : Neon_Scalar_GeneralMath2D_pattern; -def : Neon_Scalar_GeneralMath2D_pattern; - -def : Neon_Scalar_GeneralMath4D_pattern; -def : Neon_Scalar_GeneralMath4D_pattern; - // Scalar Signed saturating doubling multiply returning // high half (scalar, by element) def SQDMULHhhv_4H : NeonI_ScalarXIndexedElemArith<"sqdmulh", @@ -5884,6 +5826,38 @@ defm : Neon_ScalarXIndexedElem_MUL_Patterns; +// Scalar general arithmetic operation +class Neon_Scalar_GeneralMath2D_pattern + : Pat<(v1f64 (opnode (v1f64 FPR64:$Rn))), (INST FPR64:$Rn)>; + +class Neon_Scalar_GeneralMath3D_pattern + : Pat<(v1f64 (opnode (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), + (INST FPR64:$Rn, FPR64:$Rm)>; + +class Neon_Scalar_GeneralMath4D_pattern + : Pat<(v1f64 (opnode (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm), + (v1f64 FPR64:$Ra))), + (INST FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>; + +def : Neon_Scalar_GeneralMath3D_pattern; +def : Neon_Scalar_GeneralMath3D_pattern; +def : Neon_Scalar_GeneralMath3D_pattern; +def : Neon_Scalar_GeneralMath3D_pattern; +def : Neon_Scalar_GeneralMath3D_pattern; +def : Neon_Scalar_GeneralMath3D_pattern; +def : Neon_Scalar_GeneralMath3D_pattern; +def : Neon_Scalar_GeneralMath3D_pattern; +def : Neon_Scalar_GeneralMath3D_pattern; + +def : Neon_Scalar_GeneralMath2D_pattern; +def : Neon_Scalar_GeneralMath2D_pattern; + +def : Neon_Scalar_GeneralMath4D_pattern; +def : Neon_Scalar_GeneralMath4D_pattern; + // Scalar Copy - DUP element to scalar class NeonI_Scalar_DUP