From: Matt Arsenault Date: Sat, 22 Aug 2015 00:50:41 +0000 (+0000) Subject: AMDGPU: Improve accuracy of instruction rates for some FP instructions X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=e48caeb48f4af56d1bb401951fb9d38496186050;p=oota-llvm.git AMDGPU: Improve accuracy of instruction rates for some FP instructions git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@245774 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/AMDGPU/CIInstructions.td b/lib/Target/AMDGPU/CIInstructions.td index fd569097ae8..9ec6fd12499 100644 --- a/lib/Target/AMDGPU/CIInstructions.td +++ b/lib/Target/AMDGPU/CIInstructions.td @@ -41,6 +41,7 @@ def HasFlatAddressSpace : Predicate<"Subtarget->hasFlatAddressSpace()">; let SubtargetPredicate = isCIVI in { +let SchedRW = [WriteDoubleAdd] in { defm V_TRUNC_F64 : VOP1Inst , "v_trunc_f64", VOP_F64_F64, ftrunc >; @@ -53,12 +54,16 @@ defm V_FLOOR_F64 : VOP1Inst , "v_floor_f64", defm V_RNDNE_F64 : VOP1Inst , "v_rndne_f64", VOP_F64_F64, frint >; +} // End SchedRW = [WriteDoubleAdd] + +let SchedRW = [WriteQuarterRate32] in { defm V_LOG_LEGACY_F32 : VOP1Inst , "v_log_legacy_f32", VOP_F32_F32 >; defm V_EXP_LEGACY_F32 : VOP1Inst , "v_exp_legacy_f32", VOP_F32_F32 >; +} // End SchedRW = [WriteQuarterRate32] //===----------------------------------------------------------------------===// // VOP3 Instructions diff --git a/lib/Target/AMDGPU/SIInstructions.td b/lib/Target/AMDGPU/SIInstructions.td index 8ad9d53831c..833a19f2d43 100644 --- a/lib/Target/AMDGPU/SIInstructions.td +++ b/lib/Target/AMDGPU/SIInstructions.td @@ -1286,7 +1286,9 @@ defm V_SQRT_F64 : VOP1Inst , "v_sqrt_f64", VOP_F64_F64, fsqrt >; -} // let SchedRW = [WriteDouble] +} // End SchedRW = [WriteDouble] + +let SchedRW = [WriteQuarterRate32] in { defm V_SIN_F32 : VOP1Inst , "v_sin_f32", VOP_F32_F32, AMDGPUsin @@ -1294,6 +1296,9 @@ defm V_SIN_F32 : VOP1Inst , "v_sin_f32", defm V_COS_F32 : VOP1Inst , "v_cos_f32", VOP_F32_F32, AMDGPUcos >; + +} // End SchedRW = [WriteQuarterRate32] + defm V_NOT_B32 : VOP1Inst , "v_not_b32", VOP_I32_I32>; defm V_BFREV_B32 : VOP1Inst , "v_bfrev_b32", VOP_I32_I32>; defm V_FFBH_U32 : VOP1Inst , "v_ffbh_u32", VOP_I32_I32>; @@ -1302,10 +1307,18 @@ defm V_FFBH_I32 : VOP1Inst , "v_ffbh_i32", VOP_I32_I32>; defm V_FREXP_EXP_I32_F64 : VOP1Inst , "v_frexp_exp_i32_f64", VOP_I32_F64 >; + +let SchedRW = [WriteDoubleAdd] in { defm V_FREXP_MANT_F64 : VOP1Inst , "v_frexp_mant_f64", VOP_F64_F64 >; -defm V_FRACT_F64 : VOP1Inst , "v_fract_f64", VOP_F64_F64>; + +defm V_FRACT_F64 : VOP1Inst , "v_fract_f64", + VOP_F64_F64 +>; +} // End SchedRW = [WriteDoubleAdd] + + defm V_FREXP_EXP_I32_F32 : VOP1Inst , "v_frexp_exp_i32_f32", VOP_I32_F32 >; @@ -1337,7 +1350,7 @@ defm V_RSQ_LEGACY_F32 : VOP1InstSI , "v_rsq_legacy_f32", VOP_F32_F32, AMDGPUrsq_legacy >; -} // End let SchedRW = [WriteQuarterRate32] +} // End SchedRW = [WriteQuarterRate32] let SchedRW = [WriteDouble] in { @@ -1698,15 +1711,15 @@ defm V_DIV_FIXUP_F32 : VOP3Inst < vop3<0x15f, 0x1de>, "v_div_fixup_f32", VOP_F32_F32_F32_F32, AMDGPUdiv_fixup >; -let SchedRW = [WriteDouble] in { +let SchedRW = [WriteDoubleAdd] in { defm V_DIV_FIXUP_F64 : VOP3Inst < vop3<0x160, 0x1df>, "v_div_fixup_f64", VOP_F64_F64_F64_F64, AMDGPUdiv_fixup >; -} // let SchedRW = [WriteDouble] +} // End SchedRW = [WriteDouble] -let SchedRW = [WriteDouble] in { +let SchedRW = [WriteDoubleAdd] in { let isCommutable = 1 in { defm V_ADD_F64 : VOP3Inst , "v_add_f64", @@ -1729,7 +1742,7 @@ defm V_LDEXP_F64 : VOP3Inst , "v_ldexp_f64", VOP_F64_F64_I32, AMDGPUldexp >; -} // let SchedRW = [WriteDouble] +} // let SchedRW = [WriteDoubleAdd] let isCommutable = 1, SchedRW = [WriteQuarterRate32] in { @@ -1760,6 +1773,7 @@ defm V_DIV_SCALE_F64 : VOP3b_64 , "v_div_scale_f64", []>; let isCommutable = 1, Uses = [VCC] in { +let SchedRW = [WriteFloatFMA] in { // v_div_fmas_f32: // result = src0 * src1 + src2 // if (vcc) @@ -1768,6 +1782,7 @@ let isCommutable = 1, Uses = [VCC] in { defm V_DIV_FMAS_F32 : VOP3_VCC_Inst , "v_div_fmas_f32", VOP_F32_F32_F32_F32, AMDGPUdiv_fmas >; +} let SchedRW = [WriteDouble] in { // v_div_fmas_f64: diff --git a/test/CodeGen/AMDGPU/sint_to_fp.f64.ll b/test/CodeGen/AMDGPU/sint_to_fp.f64.ll index 0db7cdc171b..a94ccc32e61 100644 --- a/test/CodeGen/AMDGPU/sint_to_fp.f64.ll +++ b/test/CodeGen/AMDGPU/sint_to_fp.f64.ll @@ -46,9 +46,9 @@ define void @s_sint_to_fp_i64_to_f64(double addrspace(1)* %out, i64 %in) { ; SI-LABEL: @v_sint_to_fp_i64_to_f64 ; SI: buffer_load_dwordx2 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}} -; SI: v_cvt_f64_i32_e32 [[HI_CONV:v\[[0-9]+:[0-9]+\]]], v[[HI]] -; SI: v_ldexp_f64 [[LDEXP:v\[[0-9]+:[0-9]+\]]], [[HI_CONV]], 32 -; SI: v_cvt_f64_u32_e32 [[LO_CONV:v\[[0-9]+:[0-9]+\]]], v[[LO]] +; SI-DAG: v_cvt_f64_i32_e32 [[HI_CONV:v\[[0-9]+:[0-9]+\]]], v[[HI]] +; SI-DAG: v_cvt_f64_u32_e32 [[LO_CONV:v\[[0-9]+:[0-9]+\]]], v[[LO]] +; SI-DAG: v_ldexp_f64 [[LDEXP:v\[[0-9]+:[0-9]+\]]], [[HI_CONV]], 32 ; SI: v_add_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[LDEXP]], [[LO_CONV]] ; SI: buffer_store_dwordx2 [[RESULT]] define void @v_sint_to_fp_i64_to_f64(double addrspace(1)* %out, i64 addrspace(1)* %in) { diff --git a/test/CodeGen/AMDGPU/uint_to_fp.f64.ll b/test/CodeGen/AMDGPU/uint_to_fp.f64.ll index 6f608df5e9f..65fe580792a 100644 --- a/test/CodeGen/AMDGPU/uint_to_fp.f64.ll +++ b/test/CodeGen/AMDGPU/uint_to_fp.f64.ll @@ -4,9 +4,9 @@ declare i32 @llvm.r600.read.tidig.x() nounwind readnone ; SI-LABEL: {{^}}v_uint_to_fp_i64_to_f64 ; SI: buffer_load_dwordx2 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}} -; SI: v_cvt_f64_u32_e32 [[HI_CONV:v\[[0-9]+:[0-9]+\]]], v[[HI]] -; SI: v_ldexp_f64 [[LDEXP:v\[[0-9]+:[0-9]+\]]], [[HI_CONV]], 32 -; SI: v_cvt_f64_u32_e32 [[LO_CONV:v\[[0-9]+:[0-9]+\]]], v[[LO]] +; SI-DAG: v_cvt_f64_u32_e32 [[HI_CONV:v\[[0-9]+:[0-9]+\]]], v[[HI]] +; SI-DAG: v_cvt_f64_u32_e32 [[LO_CONV:v\[[0-9]+:[0-9]+\]]], v[[LO]] +; SI-DAG: v_ldexp_f64 [[LDEXP:v\[[0-9]+:[0-9]+\]]], [[HI_CONV]], 32 ; SI: v_add_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[LDEXP]], [[LO_CONV]] ; SI: buffer_store_dwordx2 [[RESULT]] define void @v_uint_to_fp_i64_to_f64(double addrspace(1)* %out, i64 addrspace(1)* %in) {