From 5200d84bdc1d2962e47b12c0d758fcf8107922db Mon Sep 17 00:00:00 2001 From: Quentin Colombet Date: Wed, 6 Aug 2014 00:22:39 +0000 Subject: [PATCH] [X86][SchedModel] Fixed some wrong scheduling model found by code inspection. Source: Agner Fog's Instruction tables. Related to git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@214940 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrMMX.td | 21 +++++++++----- lib/Target/X86/X86InstrSSE.td | 52 +++++++++++++++++++++++------------ 2 files changed, 48 insertions(+), 25 deletions(-) diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td index ecf80a1ac9b..abc244b34a2 100644 --- a/lib/Target/X86/X86InstrMMX.td +++ b/lib/Target/X86/X86InstrMMX.td @@ -38,12 +38,17 @@ def MMX_PHADDSUBD : OpndItins< >; } +let Sched = WriteVecLogic in +def MMX_INTALU_ITINS_VECLOGICSCHED : OpndItins< + IIC_MMX_ALU_RR, IIC_MMX_ALU_RM +>; + let Sched = WriteVecIMul in def MMX_PMUL_ITINS : OpndItins< IIC_MMX_PMUL, IIC_MMX_PMUL >; -let Sched = WriteVecALU in { +let Sched = WriteVecIMul in { def MMX_PSADBW_ITINS : OpndItins< IIC_MMX_PSADBW, IIC_MMX_PSADBW >; @@ -167,12 +172,14 @@ multiclass ssse3_palign_mm { def R64irr : MMXSS3AI<0x0F, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src1, VR64:$src2, i8imm:$src3), !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), - [(set VR64:$dst, (IntId VR64:$src1, VR64:$src2, (i8 imm:$src3)))]>; + [(set VR64:$dst, (IntId VR64:$src1, VR64:$src2, (i8 imm:$src3)))]>, + Sched<[WriteShuffle]>; def R64irm : MMXSS3AI<0x0F, MRMSrcMem, (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2, i8imm:$src3), !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), [(set VR64:$dst, (IntId VR64:$src1, - (bitconvert (load_mmx addr:$src2)), (i8 imm:$src3)))]>; + (bitconvert (load_mmx addr:$src2)), (i8 imm:$src3)))]>, + Sched<[WriteShuffleLd, ReadAfterLd]>; } multiclass sse12_cvt_pint opc, RegisterClass SrcRC, RegisterClass DstRC, @@ -427,13 +434,13 @@ let Constraints = "$src1 = $dst" in // Logical Instructions defm MMX_PAND : MMXI_binop_rm_int<0xDB, "pand", int_x86_mmx_pand, - MMX_INTALU_ITINS, 1>; + MMX_INTALU_ITINS_VECLOGICSCHED, 1>; defm MMX_POR : MMXI_binop_rm_int<0xEB, "por" , int_x86_mmx_por, - MMX_INTALU_ITINS, 1>; + MMX_INTALU_ITINS_VECLOGICSCHED, 1>; defm MMX_PXOR : MMXI_binop_rm_int<0xEF, "pxor", int_x86_mmx_pxor, - MMX_INTALU_ITINS, 1>; + MMX_INTALU_ITINS_VECLOGICSCHED, 1>; defm MMX_PANDN : MMXI_binop_rm_int<0xDF, "pandn", int_x86_mmx_pandn, - MMX_INTALU_ITINS>; + MMX_INTALU_ITINS_VECLOGICSCHED>; // Shift Instructions defm MMX_PSRLW : MMXI_binop_rmi_int<0xD1, 0x71, MRM2r, "psrlw", diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 2bb898e7465..46bf9b8a9f2 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -181,6 +181,7 @@ def SSE_MPSADBW_ITINS : OpndItins< IIC_SSE_MPSADBW_RR, IIC_SSE_MPSADBW_RM >; +let Sched = WriteVecIMul in def SSE_PMULLD_ITINS : OpndItins< IIC_SSE_PMULLD_RR, IIC_SSE_PMULLD_RM >; @@ -218,11 +219,21 @@ def DEFAULT_ITINS_BLENDSCHED : OpndItins< IIC_ALU_NONMEM, IIC_ALU_MEM >; +let Sched = WriteVarBlend in +def DEFAULT_ITINS_VARBLENDSCHED : OpndItins< + IIC_ALU_NONMEM, IIC_ALU_MEM +>; + let Sched = WriteFBlend in def SSE_INTALU_ITINS_FBLEND_P : OpndItins< IIC_SSE_INTALU_P_RR, IIC_SSE_INTALU_P_RM >; +let Sched = WriteBlend in +def SSE_INTALU_ITINS_BLEND_P : OpndItins< + IIC_SSE_INTALU_P_RR, IIC_SSE_INTALU_P_RM +>; + //===----------------------------------------------------------------------===// // SSE 1 & 2 Instructions Classes //===----------------------------------------------------------------------===// @@ -7308,7 +7319,7 @@ let Constraints = "$src1 = $dst" in { let Predicates = [HasAVX] in { defm VPMULLD : SS48I_binop_rm<0x40, "vpmulld", mul, v4i32, VR128, - memopv2i64, i128mem, 0, SSE_INTALU_ITINS_P>, + memopv2i64, i128mem, 0, SSE_PMULLD_ITINS>, VEX_4V; defm VPCMPEQQ : SS48I_binop_rm<0x29, "vpcmpeqq", X86pcmpeq, v2i64, VR128, memopv2i64, i128mem, 0, SSE_INTALU_ITINS_P>, @@ -7316,7 +7327,7 @@ let Predicates = [HasAVX] in { } let Predicates = [HasAVX2] in { defm VPMULLDY : SS48I_binop_rm<0x40, "vpmulld", mul, v8i32, VR256, - memopv4i64, i256mem, 0, SSE_INTALU_ITINS_P>, + memopv4i64, i256mem, 0, SSE_PMULLD_ITINS>, VEX_4V, VEX_L; defm VPCMPEQQY : SS48I_binop_rm<0x29, "vpcmpeqq", X86pcmpeq, v4i64, VR256, memopv4i64, i256mem, 0, SSE_INTALU_ITINS_P>, @@ -7422,7 +7433,7 @@ let Constraints = "$src1 = $dst" in { 1, SSE_INTALU_ITINS_FBLEND_P>; defm PBLENDW : SS41I_binop_rmi_int<0x0E, "pblendw", int_x86_sse41_pblendw, VR128, memopv2i64, i128mem, - 1, SSE_INTALU_ITINS_FBLEND_P>; + 1, SSE_INTALU_ITINS_BLEND_P>; defm MPSADBW : SS41I_binop_rmi_int<0x42, "mpsadbw", int_x86_sse41_mpsadbw, VR128, memopv2i64, i128mem, 1, SSE_MPSADBW_ITINS>; @@ -7555,7 +7566,7 @@ let Uses = [XMM0], Constraints = "$src1 = $dst" in { !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), [(set VR128:$dst, (IntId VR128:$src1, VR128:$src2, XMM0))], - itins.rr>; + itins.rr>, Sched<[itins.Sched]>; def rm0 : SS48I; + itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>; } } let ExeDomain = SSEPackedDouble in defm BLENDVPD : SS41I_ternary_int<0x15, "blendvpd", memopv2f64, f128mem, - int_x86_sse41_blendvpd>; + int_x86_sse41_blendvpd, + DEFAULT_ITINS_FBLENDSCHED>; let ExeDomain = SSEPackedSingle in defm BLENDVPS : SS41I_ternary_int<0x14, "blendvps", memopv4f32, f128mem, - int_x86_sse41_blendvps>; + int_x86_sse41_blendvps, + DEFAULT_ITINS_FBLENDSCHED>; defm PBLENDVB : SS41I_ternary_int<0x10, "pblendvb", memopv2i64, i128mem, - int_x86_sse41_pblendvb>; + int_x86_sse41_pblendvb, + DEFAULT_ITINS_VARBLENDSCHED>; // Aliases with the implicit xmm0 argument def : InstAlias<"blendvpd\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}", @@ -8763,14 +8777,14 @@ let Predicates = [HasAVX] in { // multiclass avx2_perm opc, string OpcodeStr, PatFrag mem_frag, - ValueType OpVT> { + ValueType OpVT, X86FoldableSchedWrite Sched> { def Yrr : AVX28I, - Sched<[WriteFShuffle256]>, VEX_4V, VEX_L; + Sched<[Sched]>, VEX_4V, VEX_L; def Yrm : AVX28I opc, string OpcodeStr, PatFrag mem_frag, [(set VR256:$dst, (OpVT (X86VPermv VR256:$src1, (bitconvert (mem_frag addr:$src2)))))]>, - Sched<[WriteFShuffle256Ld, ReadAfterLd]>, VEX_4V, VEX_L; + Sched<[Sched.Folded, ReadAfterLd]>, VEX_4V, VEX_L; } -defm VPERMD : avx2_perm<0x36, "vpermd", loadv4i64, v8i32>; +defm VPERMD : avx2_perm<0x36, "vpermd", loadv4i64, v8i32, WriteShuffle256>; let ExeDomain = SSEPackedSingle in -defm VPERMPS : avx2_perm<0x16, "vpermps", loadv8f32, v8f32>; +defm VPERMPS : avx2_perm<0x16, "vpermps", loadv8f32, v8f32, WriteFShuffle256>; multiclass avx2_perm_imm opc, string OpcodeStr, PatFrag mem_frag, - ValueType OpVT> { + ValueType OpVT, X86FoldableSchedWrite Sched> { def Yri : AVX2AIi8, - Sched<[WriteShuffle256]>, VEX, VEX_L; + Sched<[Sched]>, VEX, VEX_L; def Ymi : AVX2AIi8 opc, string OpcodeStr, PatFrag mem_frag, [(set VR256:$dst, (OpVT (X86VPermi (mem_frag addr:$src1), (i8 imm:$src2))))]>, - Sched<[WriteShuffle256Ld, ReadAfterLd]>, VEX, VEX_L; + Sched<[Sched.Folded, ReadAfterLd]>, VEX, VEX_L; } -defm VPERMQ : avx2_perm_imm<0x00, "vpermq", loadv4i64, v4i64>, VEX_W; +defm VPERMQ : avx2_perm_imm<0x00, "vpermq", loadv4i64, v4i64, + WriteShuffle256>, VEX_W; let ExeDomain = SSEPackedDouble in -defm VPERMPD : avx2_perm_imm<0x01, "vpermpd", loadv4f64, v4f64>, VEX_W; +defm VPERMPD : avx2_perm_imm<0x01, "vpermpd", loadv4f64, v4f64, + WriteFShuffle256>, VEX_W; //===----------------------------------------------------------------------===// // VPERM2I128 - Permute Floating-Point Values in 128-bit chunks -- 2.34.1