From 1eb67a4f84d49d88454e2b6142d30e81c839209f Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Fri, 21 Mar 2014 19:34:41 +0000 Subject: [PATCH] [AArch64] Add SchedRW lists to NEON instructions. Previously, only regular AArch64 instructions were annotated with SchedRW lists. This patch does the same for NEON enabling these instructions to be scheduled by the MIScheduler. Additionally, store operations are now modeled and a few SchedRW lists were updated for bug fixes (e.g. multiple def operands). Reviewers: apazos, mcrosier, atrick Patch by Dave Estes ! git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@204505 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AArch64/AArch64InstrInfo.td | 129 +++-- lib/Target/AArch64/AArch64InstrNEON.td | 603 +++++++++++++++------- lib/Target/AArch64/AArch64Schedule.td | 8 + lib/Target/AArch64/AArch64ScheduleA53.td | 18 +- test/CodeGen/AArch64/misched-basic-A53.ll | 31 +- 5 files changed, 542 insertions(+), 247 deletions(-) diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td index 3dc66a1f238..7d7a641a2e3 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.td +++ b/lib/Target/AArch64/AArch64InstrInfo.td @@ -463,7 +463,7 @@ defm SUBSww :addsub_exts<0b0, 0b1, 0b1, "subs\t$Rd, ", SetRD, (outs GPR32:$Rd)>; -let Rd = 0b11111, isCompare = 1 in { +let SchedRW = [WriteCMP, ReadCMP, ReadCMP], Rd = 0b11111, isCompare = 1 in { defm CMNx : addsub_exts<0b1, 0b0, 0b1, "cmn\t", SetNZCV, (outs), extends_to_i64>, addsub_xxtx< 0b0, 0b1, "cmn\t", SetNZCV, (outs)>; @@ -689,7 +689,7 @@ multiclass addsubimm_varieties shift, [(set NZCV, (A64cmp Ty:$Rn, cmp_imm_operand:$Imm12))], NoItinerary>, - Sched<[WriteALU, ReadALU]> { + Sched<[WriteCMP, ReadCMP]> { let Rd = 0b11111; let Defs = [NZCV]; let isCompare = 1; @@ -1086,7 +1086,7 @@ def BFMwwii : A64I_bitfield<0b0, 0b01, 0b0, (outs GPR32:$Rd), (ins GPR32:$src, GPR32:$Rn, bitfield32_imm:$ImmR, bitfield32_imm:$ImmS), "bfm\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary>, - Sched<[WriteALU, ReadALU]> { + Sched<[WriteALU, ReadALU, ReadALU]> { let DecoderMethod = "DecodeBitfieldInstruction"; let Constraints = "$src = $Rd"; } @@ -1095,7 +1095,7 @@ def BFMxxii : A64I_bitfield<0b1, 0b01, 0b1, (outs GPR64:$Rd), (ins GPR64:$src, GPR64:$Rn, bitfield64_imm:$ImmR, bitfield64_imm:$ImmS), "bfm\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary>, - Sched<[WriteALU, ReadALU]> { + Sched<[WriteALU, ReadALU, ReadALU]> { let DecoderMethod = "DecodeBitfieldInstruction"; let Constraints = "$src = $Rd"; } @@ -1295,7 +1295,7 @@ defm UBFX : A64I_bitfield_extract<0b10, "ubfx", A64Ubfx>; def BFXILwwii : A64I_bitfield<0b0, 0b01, 0b0, (outs GPR32:$Rd), (ins GPR32:$src, GPR32:$Rn, bitfield32_imm:$ImmR, bfx32_width:$ImmS), "bfxil\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary>, - Sched<[WriteALU, ReadALU]> { + Sched<[WriteALU, ReadALU, ReadALU]> { // As above, no disassembler allowed. let isAsmParserOnly = 1; let Constraints = "$src = $Rd"; @@ -1304,7 +1304,7 @@ def BFXILwwii : A64I_bitfield<0b0, 0b01, 0b0, (outs GPR32:$Rd), def BFXILxxii : A64I_bitfield<0b1, 0b01, 0b1, (outs GPR64:$Rd), (ins GPR64:$src, GPR64:$Rn, bitfield64_imm:$ImmR, bfx64_width:$ImmS), "bfxil\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary>, - Sched<[WriteALU, ReadALU]> { + Sched<[WriteALU, ReadALU, ReadALU]> { // As above, no disassembler allowed. let isAsmParserOnly = 1; let Constraints = "$src = $Rd"; @@ -1407,7 +1407,7 @@ defm UBFIZ : A64I_bitfield_insert<0b10, "ubfiz">; def BFIwwii : A64I_bitfield<0b0, 0b01, 0b0, (outs GPR32:$Rd), (ins GPR32:$src, GPR32:$Rn, bfi32_lsb:$ImmR, bfi32_width:$ImmS), "bfi\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary>, - Sched<[WriteALU, ReadALU]> { + Sched<[WriteALU, ReadALU, ReadALU]> { // As above, no disassembler allowed. let isAsmParserOnly = 1; let Constraints = "$src = $Rd"; @@ -1416,7 +1416,7 @@ def BFIwwii : A64I_bitfield<0b0, 0b01, 0b0, (outs GPR32:$Rd), def BFIxxii : A64I_bitfield<0b1, 0b01, 0b1, (outs GPR64:$Rd), (ins GPR64:$src, GPR64:$Rn, bfi64_lsb:$ImmR, bfi64_width:$ImmS), "bfi\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary>, - Sched<[WriteALU, ReadALU]> { + Sched<[WriteALU, ReadALU, ReadALU]> { // As above, no disassembler allowed. let isAsmParserOnly = 1; let Constraints = "$src = $Rd"; @@ -1560,7 +1560,8 @@ class A64I_condcmpregImpl (outs), (ins GPR:$Rn, GPR:$Rm, uimm4:$NZCVImm, cond_code_op:$Cond), !strconcat(asmop, "\t$Rn, $Rm, $NZCVImm, $Cond"), - [], NoItinerary> { + [], NoItinerary>, + Sched<[WriteCMP, ReadCMP, ReadCMP]> { let Defs = [NZCV]; } @@ -1608,7 +1609,7 @@ multiclass A64I_condselSizes op2, string asmop, !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Cond"), [(set i32:$Rd, (select i32:$Rn, i32:$Rm))], NoItinerary>, - Sched<[WriteCMP, ReadCMP]>; + Sched<[WriteCMP, ReadCMP, ReadCMP]>; def xxxc : A64I_condsel<0b1, op, 0b0, op2, @@ -1617,7 +1618,7 @@ multiclass A64I_condselSizes op2, string asmop, !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Cond"), [(set i64:$Rd, (select i64:$Rn, i64:$Rm))], NoItinerary>, - Sched<[WriteCMP, ReadCMP]>; + Sched<[WriteCMP, ReadCMP, ReadCMP]>; } } @@ -1797,7 +1798,8 @@ multiclass dp_2src_crc { def X_wwx : A64I_dp_2src<0b1, {0, 1, 0, c, 1, 1}, 0b0, !strconcat(asmop, "x\t$Rd, $Rn, $Rm"), (outs GPR32:$Rd), (ins GPR32:$Rn, GPR64:$Rm), [], - NoItinerary>; + NoItinerary>, + Sched<[WriteALU, ReadALU, ReadALU]>; } multiclass dp_2src_zext opcode, string asmop, SDPatternOperator op> { @@ -2630,7 +2632,7 @@ let mayLoad = 1 in { (outs), (ins prefetch_op:$Rt, ldrlit_label:$Imm19), "prfm\t$Rt, $Imm19", [], NoItinerary>, - Sched<[WriteLd]>; + Sched<[WriteLd, ReadLd]>; } //===----------------------------------------------------------------------===// @@ -2685,19 +2687,23 @@ class A64I_SRexs_impl size, bits<3> opcode, string asm, dag outs, multiclass A64I_SRex opcode, string prefix> { def _byte: A64I_SRexs_impl<0b00, opcode, !strconcat(asmstr, "b"), (outs GPR32:$Rs), (ins GPR32:$Rt, GPR64xsp0:$Rn), - [], NoItinerary>; + [], NoItinerary>, + Sched<[WriteSt, ReadSt, ReadSt]>; def _hword: A64I_SRexs_impl<0b01, opcode, !strconcat(asmstr, "h"), (outs GPR32:$Rs), (ins GPR32:$Rt, GPR64xsp0:$Rn), - [],NoItinerary>; + [],NoItinerary>, + Sched<[WriteSt, ReadSt, ReadSt]>; def _word: A64I_SRexs_impl<0b10, opcode, asmstr, (outs GPR32:$Rs), (ins GPR32:$Rt, GPR64xsp0:$Rn), - [], NoItinerary>; + [], NoItinerary>, + Sched<[WriteSt, ReadSt, ReadSt]>; def _dword: A64I_SRexs_impl<0b11, opcode, asmstr, (outs GPR32:$Rs), (ins GPR64:$Rt, GPR64xsp0:$Rn), - [], NoItinerary>; + [], NoItinerary>, + Sched<[WriteSt, ReadSt, ReadSt]>; } defm STXR : A64I_SRex<"stxr", 0b000, "STXR">; @@ -2792,22 +2798,26 @@ multiclass A64I_SLex opcode, string prefix> { def _byte: A64I_SLexs_impl<0b00, opcode, !strconcat(asmstr, "b"), (outs), (ins GPR32:$Rt, GPR64xsp0:$Rn), [(atomic_store_release_8 i64:$Rn, i32:$Rt)], - NoItinerary>; + NoItinerary>, + Sched<[WriteSt, ReadSt, ReadSt]>; def _hword: A64I_SLexs_impl<0b01, opcode, !strconcat(asmstr, "h"), (outs), (ins GPR32:$Rt, GPR64xsp0:$Rn), [(atomic_store_release_16 i64:$Rn, i32:$Rt)], - NoItinerary>; + NoItinerary>, + Sched<[WriteSt, ReadSt, ReadSt]>; def _word: A64I_SLexs_impl<0b10, opcode, asmstr, (outs), (ins GPR32:$Rt, GPR64xsp0:$Rn), [(atomic_store_release_32 i64:$Rn, i32:$Rt)], - NoItinerary>; + NoItinerary>, + Sched<[WriteSt, ReadSt, ReadSt]>; def _dword: A64I_SLexs_impl<0b11, opcode, asmstr, (outs), (ins GPR64:$Rt, GPR64xsp0:$Rn), [(atomic_store_release_64 i64:$Rn, i64:$Rt)], - NoItinerary>; + NoItinerary>, + Sched<[WriteSt, ReadSt, ReadSt]>; } defm STLR : A64I_SLex<"stlr", 0b101, "STLR">; @@ -2832,12 +2842,14 @@ multiclass A64I_SPex opcode> { def _word: A64I_SPexs_impl<0b10, opcode, asmstr, (outs), (ins GPR32:$Rs, GPR32:$Rt, GPR32:$Rt2, GPR64xsp0:$Rn), - [], NoItinerary>; + [], NoItinerary>, + Sched<[WriteSt, ReadSt, ReadSt, ReadSt]>; def _dword: A64I_SPexs_impl<0b11, opcode, asmstr, (outs), (ins GPR32:$Rs, GPR64:$Rt, GPR64:$Rt2, GPR64xsp0:$Rn), - [], NoItinerary>; + [], NoItinerary>, + Sched<[WriteSt, ReadSt, ReadSt, ReadSt]>; } defm STXP : A64I_SPex<"stxp", 0b010>; @@ -2865,13 +2877,13 @@ multiclass A64I_LPex opcode> { (outs GPR32:$Rt, GPR32:$Rt2), (ins GPR64xsp0:$Rn), [], NoItinerary>, - Sched<[WriteLd]>; + Sched<[WriteLd, WriteLd, ReadLd]>; def _dword: A64I_LPexs_impl<0b11, opcode, asmstr, (outs GPR64:$Rt, GPR64:$Rt2), (ins GPR64xsp0:$Rn), [], NoItinerary>, - Sched<[WriteLd]>; + Sched<[WriteLd, WriteLd, ReadLd]>; } defm LDXP : A64I_LPex<"ldxp", 0b010>; @@ -3085,7 +3097,8 @@ multiclass A64I_LDRSTR_unsigned size, bit v, def _STR : A64I_LSunsigimm { + [], NoItinerary>, + Sched<[WriteSt, ReadSt, ReadSt]> { let mayStore = 1; } def : InstAlias<"str" # asmsuffix # " $Rt, [$Rn]", @@ -3126,13 +3139,15 @@ multiclass A64I_LDRSTR_unsigned size, bit v, (outs), (ins GPR:$Rt, GPR64xsp:$Rn, GPR32:$Rm, params.regextWm:$Ext), "str" # asmsuffix # "\t$Rt, [$Rn, $Rm, $Ext]", - [], NoItinerary>; + [], NoItinerary>, + Sched<[WriteSt, ReadSt, ReadSt, ReadSt]>; def _Xm_RegOffset_STR : A64I_LSregoff; + [], NoItinerary>, + Sched<[WriteSt, ReadSt, ReadSt, ReadSt]>; } def : InstAlias<"str" # asmsuffix # " $Rt, [$Rn, $Rm]", (!cast(prefix # "_Xm_RegOffset_STR") GPR:$Rt, GPR64xsp:$Rn, @@ -3142,7 +3157,8 @@ multiclass A64I_LDRSTR_unsigned size, bit v, def _STUR : A64I_LSunalimm { + [], NoItinerary>, + Sched<[WriteSt, ReadSt, ReadSt]> { let mayStore = 1; } def : InstAlias<"stur" # asmsuffix # " $Rt, [$Rn]", @@ -3163,7 +3179,8 @@ multiclass A64I_LDRSTR_unsigned size, bit v, (outs GPR64xsp:$Rn_wb), (ins GPR:$Rt, GPR64xsp:$Rn, simm9:$SImm9), "str" # asmsuffix # "\t$Rt, [$Rn], $SImm9", - [], NoItinerary> { + [], NoItinerary>, + Sched<[WriteSt, ReadSt, ReadSt]> { let Constraints = "$Rn = $Rn_wb"; let mayStore = 1; @@ -3176,7 +3193,7 @@ multiclass A64I_LDRSTR_unsigned size, bit v, (ins GPR64xsp:$Rn, simm9:$SImm9), "ldr" # asmsuffix # "\t$Rt, [$Rn], $SImm9", [], NoItinerary>, - Sched<[WriteLd, ReadLd]> { + Sched<[WriteLd, WriteLd, ReadLd]> { let mayLoad = 1; let Constraints = "$Rn = $Rn_wb"; let DecoderMethod = "DecodeSingleIndexedInstruction"; @@ -3187,7 +3204,8 @@ multiclass A64I_LDRSTR_unsigned size, bit v, (outs GPR64xsp:$Rn_wb), (ins GPR:$Rt, GPR64xsp:$Rn, simm9:$SImm9), "str" # asmsuffix # "\t$Rt, [$Rn, $SImm9]!", - [], NoItinerary> { + [], NoItinerary>, + Sched<[WriteSt, ReadSt, ReadSt]> { let Constraints = "$Rn = $Rn_wb"; let mayStore = 1; @@ -3200,7 +3218,7 @@ multiclass A64I_LDRSTR_unsigned size, bit v, (ins GPR64xsp:$Rn, simm9:$SImm9), "ldr" # asmsuffix # "\t$Rt, [$Rn, $SImm9]!", [], NoItinerary>, - Sched<[WriteLd, ReadLd]> { + Sched<[WriteLd, WriteLd, ReadLd]> { let mayLoad = 1; let Constraints = "$Rn = $Rn_wb"; let DecoderMethod = "DecodeSingleIndexedInstruction"; @@ -3340,7 +3358,7 @@ multiclass A64I_LDR_signed size, string asmopcode, AddrParams params, (ins GPR64xsp:$Rn, simm9:$SImm9), "ldrs" # asmopcode # "\t$Rt, [$Rn], $SImm9", [], NoItinerary>, - Sched<[WriteLd, ReadLd]> { + Sched<[WriteLd, WriteLd, ReadLd]> { let Constraints = "$Rn = $Rn_wb"; let DecoderMethod = "DecodeSingleIndexedInstruction"; } @@ -3350,7 +3368,7 @@ multiclass A64I_LDR_signed size, string asmopcode, AddrParams params, (ins GPR64xsp:$Rn, simm9:$SImm9), "ldrs" # asmopcode # "\t$Rt, [$Rn], $SImm9", [], NoItinerary>, - Sched<[WriteLd, ReadLd]> { + Sched<[WriteLd, WriteLd, ReadLd]> { let Constraints = "$Rn = $Rn_wb"; let DecoderMethod = "DecodeSingleIndexedInstruction"; } @@ -3361,7 +3379,7 @@ multiclass A64I_LDR_signed size, string asmopcode, AddrParams params, (ins GPR64xsp:$Rn, simm9:$SImm9), "ldrs" # asmopcode # "\t$Rt, [$Rn, $SImm9]!", [], NoItinerary>, - Sched<[WriteLd, ReadLd]> { + Sched<[WriteLd, WriteLd, ReadLd]> { let Constraints = "$Rn = $Rn_wb"; let DecoderMethod = "DecodeSingleIndexedInstruction"; } @@ -3371,7 +3389,7 @@ multiclass A64I_LDR_signed size, string asmopcode, AddrParams params, (ins GPR64xsp:$Rn, simm9:$SImm9), "ldrs" # asmopcode # "\t$Rt, [$Rn, $SImm9]!", [], NoItinerary>, - Sched<[WriteLd, ReadLd]> { + Sched<[WriteLd, WriteLd, ReadLd]> { let Constraints = "$Rn = $Rn_wb"; let DecoderMethod = "DecodeSingleIndexedInstruction"; } @@ -3431,7 +3449,7 @@ def LDRSWx_PostInd (ins GPR64xsp:$Rn, simm9:$SImm9), "ldrsw\t$Rt, [$Rn], $SImm9", [], NoItinerary>, - Sched<[WriteLd, ReadLd]> { + Sched<[WriteLd, WriteLd, ReadLd]> { let mayLoad = 1; let Constraints = "$Rn = $Rn_wb"; let DecoderMethod = "DecodeSingleIndexedInstruction"; @@ -3442,7 +3460,7 @@ def LDRSWx_PreInd : A64I_LSpreind<0b10, 0b0, 0b10, (ins GPR64xsp:$Rn, simm9:$SImm9), "ldrsw\t$Rt, [$Rn, $SImm9]!", [], NoItinerary>, - Sched<[WriteLd, ReadLd]> { + Sched<[WriteLd, WriteLd, ReadLd]> { let mayLoad = 1; let Constraints = "$Rn = $Rn_wb"; let DecoderMethod = "DecodeSingleIndexedInstruction"; @@ -3652,7 +3670,7 @@ multiclass A64I_LSPsimple opc, bit v, RegisterClass SomeReg, (outs SomeReg:$Rt, SomeReg:$Rt2), (ins GPR64xsp:$Rn, simm7:$SImm7), "ldp\t$Rt, $Rt2, [$Rn, $SImm7]", [], NoItinerary>, - Sched<[WriteLd, ReadLd]> { + Sched<[WriteLd, WriteLd, ReadLd]> { let mayLoad = 1; let DecoderMethod = "DecodeLDSTPairInstruction"; } @@ -3666,7 +3684,8 @@ multiclass A64I_LSPsimple opc, bit v, RegisterClass SomeReg, GPR64xsp:$Rn, simm7:$SImm7), "stp\t$Rt, $Rt2, [$Rn], $SImm7", - [], NoItinerary> { + [], NoItinerary>, + Sched<[WriteSt, ReadSt, ReadSt, ReadSt]> { let mayStore = 1; let Constraints = "$Rn = $Rn_wb"; @@ -3679,16 +3698,17 @@ multiclass A64I_LSPsimple opc, bit v, RegisterClass SomeReg, (ins GPR64xsp:$Rn, simm7:$SImm7), "ldp\t$Rt, $Rt2, [$Rn], $SImm7", [], NoItinerary>, - Sched<[WriteLd, ReadLd]> { + Sched<[WriteLd, WriteLd, WriteLd, ReadLd]> { let mayLoad = 1; let Constraints = "$Rn = $Rn_wb"; let DecoderMethod = "DecodeLDSTPairInstruction"; } def _PreInd_STR : A64I_LSPpreind { + (ins SomeReg:$Rt, SomeReg:$Rt2, GPR64xsp:$Rn, simm7:$SImm7), + "stp\t$Rt, $Rt2, [$Rn, $SImm7]!", + [], NoItinerary>, + Sched<[WriteSt, ReadSt, ReadSt, ReadSt]> { let mayStore = 1; let Constraints = "$Rn = $Rn_wb"; let DecoderMethod = "DecodeLDSTPairInstruction"; @@ -3699,15 +3719,16 @@ multiclass A64I_LSPsimple opc, bit v, RegisterClass SomeReg, (ins GPR64xsp:$Rn, simm7:$SImm7), "ldp\t$Rt, $Rt2, [$Rn, $SImm7]!", [], NoItinerary>, - Sched<[WriteLd, ReadLd]> { + Sched<[WriteLd, WriteLd, WriteLd, ReadLd]> { let mayLoad = 1; let Constraints = "$Rn = $Rn_wb"; let DecoderMethod = "DecodeLDSTPairInstruction"; } def _NonTemp_STR : A64I_LSPnontemp { + (ins SomeReg:$Rt, SomeReg:$Rt2, GPR64xsp:$Rn, simm7:$SImm7), + "stnp\t$Rt, $Rt2, [$Rn, $SImm7]", [], NoItinerary>, + Sched<[WriteSt, ReadSt, ReadSt, ReadSt]> { let mayStore = 1; let DecoderMethod = "DecodeLDSTPairInstruction"; } @@ -3719,7 +3740,7 @@ multiclass A64I_LSPsimple opc, bit v, RegisterClass SomeReg, (outs SomeReg:$Rt, SomeReg:$Rt2), (ins GPR64xsp:$Rn, simm7:$SImm7), "ldnp\t$Rt, $Rt2, [$Rn, $SImm7]", [], NoItinerary>, - Sched<[WriteLd, ReadLd]> { + Sched<[WriteLd, WriteLd, ReadLd]> { let mayLoad = 1; let DecoderMethod = "DecodeLDSTPairInstruction"; } @@ -3745,7 +3766,7 @@ def LDPSWx : A64I_LSPoffset<0b01, 0b0, 0b1, (outs GPR64:$Rt, GPR64:$Rt2), (ins GPR64xsp:$Rn, word_simm7:$SImm7), "ldpsw\t$Rt, $Rt2, [$Rn, $SImm7]", [], NoItinerary>, - Sched<[WriteLd, ReadLd]> { + Sched<[WriteLd, WriteLd, ReadLd]> { let mayLoad = 1; let DecoderMethod = "DecodeLDSTPairInstruction"; } @@ -3756,7 +3777,8 @@ def LDPSWx_PostInd : A64I_LSPpostind<0b01, 0b0, 0b1, (outs GPR64:$Rt, GPR64:$Rt2, GPR64:$Rn_wb), (ins GPR64xsp:$Rn, word_simm7:$SImm7), "ldpsw\t$Rt, $Rt2, [$Rn], $SImm7", - [], NoItinerary> { + [], NoItinerary>, + Sched<[WriteLd, WriteLd, WriteLd, ReadLd]> { let mayLoad = 1; let Constraints = "$Rn = $Rn_wb"; let DecoderMethod = "DecodeLDSTPairInstruction"; @@ -3767,7 +3789,7 @@ def LDPSWx_PreInd : A64I_LSPpreind<0b01, 0b0, 0b1, (ins GPR64xsp:$Rn, word_simm7:$SImm7), "ldpsw\t$Rt, $Rt2, [$Rn, $SImm7]!", [], NoItinerary>, - Sched<[WriteLd, ReadLd]> { + Sched<[WriteLd, WriteLd, WriteLd, ReadLd]> { let mayLoad = 1; let Constraints = "$Rn = $Rn_wb"; let DecoderMethod = "DecodeLDSTPairInstruction"; @@ -4150,7 +4172,8 @@ let isMoveImm = 1, isReMaterializable = 1, (ins movz64_imm:$FullImm)>; } -let Constraints = "$src = $Rd" in +let Constraints = "$src = $Rd", + SchedRW = [WriteALU, ReadALU] in defm MOVK : A64I_movwSizes<0b11, "movk", (ins GPR32:$src, movk32_imm:$FullImm), (ins GPR64:$src, movk64_imm:$FullImm)>; diff --git a/lib/Target/AArch64/AArch64InstrNEON.td b/lib/Target/AArch64/AArch64InstrNEON.td index 3b919b388b2..0b97e3bdf5a 100644 --- a/lib/Target/AArch64/AArch64InstrNEON.td +++ b/lib/Target/AArch64/AArch64InstrNEON.td @@ -122,14 +122,16 @@ multiclass NeonI_3VSame_B_sizes size, bits<5> opcode, asmop # "\t$Rd.8b, $Rn.8b, $Rm.8b", [(set (v8i8 VPR64:$Rd), (v8i8 (opnode8B (v8i8 VPR64:$Rn), (v8i8 VPR64:$Rm))))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; def _16B : NeonI_3VSame<0b1, u, size, opcode, (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm), asmop # "\t$Rd.16b, $Rn.16b, $Rm.16b", [(set (v16i8 VPR128:$Rd), (v16i8 (opnode16B (v16i8 VPR128:$Rn), (v16i8 VPR128:$Rm))))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; } } @@ -143,28 +145,32 @@ multiclass NeonI_3VSame_HS_sizes opcode, asmop # "\t$Rd.4h, $Rn.4h, $Rm.4h", [(set (v4i16 VPR64:$Rd), (v4i16 (opnode (v4i16 VPR64:$Rn), (v4i16 VPR64:$Rm))))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; def _8H : NeonI_3VSame<0b1, u, 0b01, opcode, (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm), asmop # "\t$Rd.8h, $Rn.8h, $Rm.8h", [(set (v8i16 VPR128:$Rd), (v8i16 (opnode (v8i16 VPR128:$Rn), (v8i16 VPR128:$Rm))))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; def _2S : NeonI_3VSame<0b0, u, 0b10, opcode, (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm), asmop # "\t$Rd.2s, $Rn.2s, $Rm.2s", [(set (v2i32 VPR64:$Rd), (v2i32 (opnode (v2i32 VPR64:$Rn), (v2i32 VPR64:$Rm))))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; def _4S : NeonI_3VSame<0b1, u, 0b10, opcode, (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm), asmop # "\t$Rd.4s, $Rn.4s, $Rm.4s", [(set (v4i32 VPR128:$Rd), (v4i32 (opnode (v4i32 VPR128:$Rn), (v4i32 VPR128:$Rm))))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; } } multiclass NeonI_3VSame_BHS_sizes opcode, @@ -177,14 +183,16 @@ multiclass NeonI_3VSame_BHS_sizes opcode, asmop # "\t$Rd.8b, $Rn.8b, $Rm.8b", [(set (v8i8 VPR64:$Rd), (v8i8 (opnode (v8i8 VPR64:$Rn), (v8i8 VPR64:$Rm))))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; def _16B : NeonI_3VSame<0b1, u, 0b00, opcode, (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm), asmop # "\t$Rd.16b, $Rn.16b, $Rm.16b", [(set (v16i8 VPR128:$Rd), (v16i8 (opnode (v16i8 VPR128:$Rn), (v16i8 VPR128:$Rm))))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; } } @@ -198,7 +206,8 @@ multiclass NeonI_3VSame_BHSD_sizes opcode, asmop # "\t$Rd.2d, $Rn.2d, $Rm.2d", [(set (v2i64 VPR128:$Rd), (v2i64 (opnode (v2i64 VPR128:$Rn), (v2i64 VPR128:$Rm))))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; } } @@ -214,21 +223,24 @@ multiclass NeonI_3VSame_SD_sizes opcode, asmop # "\t$Rd.2s, $Rn.2s, $Rm.2s", [(set (ResTy2S VPR64:$Rd), (ResTy2S (opnode (v2f32 VPR64:$Rn), (v2f32 VPR64:$Rm))))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; def _4S : NeonI_3VSame<0b1, u, {size, 0b0}, opcode, (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm), asmop # "\t$Rd.4s, $Rn.4s, $Rm.4s", [(set (ResTy4S VPR128:$Rd), (ResTy4S (opnode (v4f32 VPR128:$Rn), (v4f32 VPR128:$Rm))))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; def _2D : NeonI_3VSame<0b1, u, {size, 0b1}, opcode, (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm), asmop # "\t$Rd.2d, $Rn.2d, $Rm.2d", [(set (ResTy2D VPR128:$Rd), (ResTy2D (opnode (v2f64 VPR128:$Rn), (v2f64 VPR128:$Rm))))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; } } @@ -286,9 +298,11 @@ def : Pat<(v1i32 (sub FPR32:$Rn, FPR32:$Rm)), // Vector Multiply (Integer and Floating-Point) +let SchedRW = [WriteFPMul, ReadFPMul, ReadFPMul] in { defm MULvvv : NeonI_3VSame_BHS_sizes<0b0, 0b10011, "mul", mul, 1>; defm FMULvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11011, "fmul", fmul, v2f32, v4f32, v2f64, 1>; +} // Patterns to match mul of v1i8/v1i16/v1i32 types def : Pat<(v1i8 (mul FPR8:$Rn, FPR8:$Rm)), @@ -309,8 +323,10 @@ def : Pat<(v1i32 (mul FPR32:$Rn, FPR32:$Rm)), // Vector Multiply (Polynomial) +let SchedRW = [WriteFPMul, ReadFPMul, ReadFPMul] in { defm PMULvvv : NeonI_3VSame_B_sizes<0b1, 0b00, 0b10011, "pmul", int_arm_neon_vmulp, int_arm_neon_vmulp, 1>; +} // Vector Multiply-accumulate and Multiply-subtract (Integer) @@ -324,7 +340,8 @@ class NeonI_3VSame_Constraint_impl { + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU, ReadFPALU]> { let Constraints = "$src = $Rd"; } @@ -335,6 +352,7 @@ def Neon_mls : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm), (sub node:$Ra, (mul node:$Rn, node:$Rm))>; +let SchedRW = [WriteFPMAC, ReadFPMAC, ReadFPMAC] in { def MLAvvv_8B: NeonI_3VSame_Constraint_impl<"mla", ".8b", VPR64, v8i8, 0b0, 0b0, 0b00, 0b10010, Neon_mla>; def MLAvvv_16B: NeonI_3VSame_Constraint_impl<"mla", ".16b", VPR128, v16i8, @@ -360,6 +378,7 @@ def MLSvvv_2S: NeonI_3VSame_Constraint_impl<"mls", ".2s", VPR64, v2i32, 0b0, 0b1, 0b10, 0b10010, Neon_mls>; def MLSvvv_4S: NeonI_3VSame_Constraint_impl<"mls", ".4s", VPR128, v4i32, 0b1, 0b1, 0b10, 0b10010, Neon_mls>; +} // Vector Multiply-accumulate and Multiply-subtract (Floating Point) @@ -369,7 +388,8 @@ def Neon_fmla : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm), def Neon_fmls : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm), (fsub node:$Ra, (fmul_su node:$Rn, node:$Rm))>; -let Predicates = [HasNEON, UseFusedMAC] in { +let Predicates = [HasNEON, UseFusedMAC], + SchedRW = [WriteFPMAC, ReadFPMAC, ReadFPMAC] in { def FMLAvvv_2S: NeonI_3VSame_Constraint_impl<"fmla", ".2s", VPR64, v2f32, 0b0, 0b0, 0b00, 0b11001, Neon_fmla>; def FMLAvvv_4S: NeonI_3VSame_Constraint_impl<"fmla", ".4s", VPR128, v4f32, @@ -403,8 +423,10 @@ def : Pat<(v2f64 (fma (fneg VPR128:$Rn), VPR128:$Rm, VPR128:$Ra)), // Vector Divide (Floating-Point) +let SchedRW = [WriteFPDiv, ReadFPDiv, ReadFPDiv] in { defm FDIVvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11111, "fdiv", fdiv, v2f32, v4f32, v2f64, 0>; +} // Vector Bitwise Operations @@ -770,49 +792,56 @@ multiclass NeonI_cmpz_sizes opcode, string asmop, CondCode CC> asmop # "\t$Rd.8b, $Rn.8b, $Imm", [(set (v8i8 VPR64:$Rd), (v8i8 (Neon_cmpz (v8i8 VPR64:$Rn), (i32 imm:$Imm), CC)))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; def _16B : NeonI_2VMisc<0b1, u, 0b00, opcode, (outs VPR128:$Rd), (ins VPR128:$Rn, neon_uimm0:$Imm), asmop # "\t$Rd.16b, $Rn.16b, $Imm", [(set (v16i8 VPR128:$Rd), (v16i8 (Neon_cmpz (v16i8 VPR128:$Rn), (i32 imm:$Imm), CC)))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; def _4H : NeonI_2VMisc<0b0, u, 0b01, opcode, (outs VPR64:$Rd), (ins VPR64:$Rn, neon_uimm0:$Imm), asmop # "\t$Rd.4h, $Rn.4h, $Imm", [(set (v4i16 VPR64:$Rd), (v4i16 (Neon_cmpz (v4i16 VPR64:$Rn), (i32 imm:$Imm), CC)))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; def _8H : NeonI_2VMisc<0b1, u, 0b01, opcode, (outs VPR128:$Rd), (ins VPR128:$Rn, neon_uimm0:$Imm), asmop # "\t$Rd.8h, $Rn.8h, $Imm", [(set (v8i16 VPR128:$Rd), (v8i16 (Neon_cmpz (v8i16 VPR128:$Rn), (i32 imm:$Imm), CC)))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; def _2S : NeonI_2VMisc<0b0, u, 0b10, opcode, (outs VPR64:$Rd), (ins VPR64:$Rn, neon_uimm0:$Imm), asmop # "\t$Rd.2s, $Rn.2s, $Imm", [(set (v2i32 VPR64:$Rd), (v2i32 (Neon_cmpz (v2i32 VPR64:$Rn), (i32 imm:$Imm), CC)))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; def _4S : NeonI_2VMisc<0b1, u, 0b10, opcode, (outs VPR128:$Rd), (ins VPR128:$Rn, neon_uimm0:$Imm), asmop # "\t$Rd.4s, $Rn.4s, $Imm", [(set (v4i32 VPR128:$Rd), (v4i32 (Neon_cmpz (v4i32 VPR128:$Rn), (i32 imm:$Imm), CC)))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; def _2D : NeonI_2VMisc<0b1, u, 0b11, opcode, (outs VPR128:$Rd), (ins VPR128:$Rn, neon_uimm0:$Imm), asmop # "\t$Rd.2d, $Rn.2d, $Imm", [(set (v2i64 VPR128:$Rd), (v2i64 (Neon_cmpz (v2i64 VPR128:$Rn), (i32 imm:$Imm), CC)))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; } // Vector Compare Mask Equal to Zero (Integer) @@ -879,21 +908,24 @@ multiclass NeonI_fpcmpz_sizes opcode, asmop # "\t$Rd.2s, $Rn.2s, $FPImm", [(set (v2i32 VPR64:$Rd), (v2i32 (Neon_cmpz (v2f32 VPR64:$Rn), (f32 fpzz32:$FPImm), CC)))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; def _4S : NeonI_2VMisc<0b1, u, {size, 0b0}, opcode, (outs VPR128:$Rd), (ins VPR128:$Rn, fpzz32:$FPImm), asmop # "\t$Rd.4s, $Rn.4s, $FPImm", [(set (v4i32 VPR128:$Rd), (v4i32 (Neon_cmpz (v4f32 VPR128:$Rn), (f32 fpzz32:$FPImm), CC)))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; def _2D : NeonI_2VMisc<0b1, u, {size, 0b1}, opcode, (outs VPR128:$Rd), (ins VPR128:$Rn, fpzz32:$FPImm), asmop # "\t$Rd.2d, $Rn.2d, $FPImm", [(set (v2i64 VPR128:$Rd), (v2i64 (Neon_cmpz (v2f64 VPR128:$Rn), (f32 fpzz32:$FPImm), CC)))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; } // Vector Compare Mask Equal to Zero (Floating Point) @@ -1051,6 +1083,7 @@ defm FADDP : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11010, "faddp", int_arm_neon_vpadd, v2f32, v4f32, v2f64, 1>; +let SchedRW = [WriteFPMul, ReadFPMul, ReadFPMul] in { // Vector Saturating Doubling Multiply High defm SQDMULHvvv : NeonI_3VSame_HS_sizes<0b0, 0b10110, "sqdmulh", int_arm_neon_vqdmulh, 1>; @@ -1063,6 +1096,7 @@ defm SQRDMULHvvv : NeonI_3VSame_HS_sizes<0b1, 0b10110, "sqrdmulh", defm FMULXvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11011, "fmulx", int_aarch64_neon_vmulx, v2f32, v4f32, v2f64, 1>; +} // Patterns to match llvm.aarch64.* intrinsic for // ADDP, SMINP, UMINP, SMAXP, UMAXP having i32 as output @@ -1202,7 +1236,8 @@ multiclass NeonI_mov_imm_lsl_sizes { + NoItinerary>, + Sched<[WriteFPALU]> { bits<2> Simm; let cmode = {0b0, Simm{1}, Simm{0}, 0b0}; } @@ -1215,7 +1250,8 @@ multiclass NeonI_mov_imm_lsl_sizes { + NoItinerary>, + Sched<[WriteFPALU]> { bits<2> Simm; let cmode = {0b0, Simm{1}, Simm{0}, 0b0}; } @@ -1229,7 +1265,8 @@ multiclass NeonI_mov_imm_lsl_sizes { + NoItinerary>, + Sched<[WriteFPALU]> { bit Simm; let cmode = {0b1, 0b0, Simm, 0b0}; } @@ -1242,7 +1279,8 @@ multiclass NeonI_mov_imm_lsl_sizes { + NoItinerary>, + Sched<[WriteFPALU]> { bit Simm; let cmode = {0b1, 0b0, Simm, 0b0}; } @@ -1263,7 +1301,8 @@ multiclass NeonI_mov_imm_with_constraint_lsl_sizes { + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]> { bits<2> Simm; let cmode = {0b0, Simm{1}, Simm{0}, 0b1}; } @@ -1277,7 +1316,8 @@ multiclass NeonI_mov_imm_with_constraint_lsl_sizes { + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]> { bits<2> Simm; let cmode = {0b0, Simm{1}, Simm{0}, 0b1}; } @@ -1292,7 +1332,8 @@ multiclass NeonI_mov_imm_with_constraint_lsl_sizes { + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]> { bit Simm; let cmode = {0b1, 0b0, Simm, 0b1}; } @@ -1306,7 +1347,8 @@ multiclass NeonI_mov_imm_with_constraint_lsl_sizes { + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]> { bit Simm; let cmode = {0b1, 0b0, Simm, 0b1}; } @@ -1325,7 +1367,8 @@ multiclass NeonI_mov_imm_msl_sizes { + NoItinerary>, + Sched<[WriteFPALU]> { bit Simm; let cmode = {0b1, 0b1, 0b0, Simm}; } @@ -1338,7 +1381,8 @@ multiclass NeonI_mov_imm_msl_sizes { + NoItinerary>, + Sched<[WriteFPALU]> { bit Simm; let cmode = {0b1, 0b1, 0b0, Simm}; } @@ -1565,7 +1609,8 @@ def MOVIvi_8B : NeonI_1VModImm<0b0, 0b0, "movi\t$Rd.8b, $Imm", [(set (v8i8 VPR64:$Rd), (v8i8 (Neon_movi (timm:$Imm), (i32 imm))))], - NoItinerary> { + NoItinerary>, + Sched<[WriteFPALU]> { let cmode = 0b1110; } @@ -1574,7 +1619,8 @@ def MOVIvi_16B : NeonI_1VModImm<0b1, 0b0, "movi\t$Rd.16b, $Imm", [(set (v16i8 VPR128:$Rd), (v16i8 (Neon_movi (timm:$Imm), (i32 imm))))], - NoItinerary> { + NoItinerary>, + Sched<[WriteFPALU]> { let cmode = 0b1110; } } @@ -1586,7 +1632,8 @@ def MOVIvi_2D : NeonI_1VModImm<0b1, 0b1, "movi\t $Rd.2d, $Imm", [(set (v2i64 VPR128:$Rd), (v2i64 (Neon_movi (timm:$Imm), (i32 imm))))], - NoItinerary> { + NoItinerary>, + Sched<[WriteFPALU]> { let cmode = 0b1110; } } @@ -1599,7 +1646,8 @@ def MOVIdi : NeonI_1VModImm<0b0, 0b1, "movi\t $Rd, $Imm", [(set (v1i64 FPR64:$Rd), (v1i64 (Neon_movi (timm:$Imm), (i32 imm))))], - NoItinerary> { + NoItinerary>, + Sched<[WriteFPALU]> { let cmode = 0b1110; } } @@ -1613,7 +1661,8 @@ class NeonI_FMOV_impl { + NoItinerary>, + Sched<[WriteFPALU]> { let cmode = 0b1111; } @@ -1692,7 +1741,8 @@ class N2VShift opcode, string asmop, string T, [(set (Ty VPRC:$Rd), (Ty (OpNode (Ty VPRC:$Rn), (Ty (Neon_vdup (i32 ImmTy:$Imm))))))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; multiclass NeonI_N2VShL opcode, string asmop> { // 64-bit vector types. @@ -1873,7 +1923,8 @@ class N2VShiftLong opcode, string asmop, string DestT, (DestTy (shl (DestTy (ExtOp (SrcTy VPR64:$Rn))), (DestTy (Neon_vdup (i32 ImmTy:$Imm))))))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; class N2VShiftLongHigh opcode, string asmop, string DestT, string SrcT, ValueType DestTy, ValueType SrcTy, @@ -1887,7 +1938,8 @@ class N2VShiftLongHigh opcode, string asmop, string DestT, (DestTy (ExtOp (SrcTy (getTop VPR128:$Rn)))), (DestTy (Neon_vdup (i32 ImmTy:$Imm))))))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; multiclass NeonI_N2VShLL opcode, string asmop, SDNode ExtOp> { @@ -1988,7 +2040,8 @@ class N2VShift_RQ opcode, string asmop, string T, asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm", [(set (Ty VPRC:$Rd), (Ty (OpNode (Ty VPRC:$Rn), (i32 ImmTy:$Imm))))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; // shift right (vector by immediate) multiclass NeonI_N2VShR_RQ opcode, string asmop, @@ -2091,7 +2144,8 @@ class N2VShiftAdd opcode, string asmop, string T, [(set (Ty VPRC:$Rd), (Ty (add (Ty VPRC:$src), (Ty (OpNode (Ty VPRC:$Rn), (Ty (Neon_vdup (i32 ImmTy:$Imm))))))))], - NoItinerary> { + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU]> { let Constraints = "$src = $Rd"; } @@ -2146,7 +2200,8 @@ class N2VShiftAdd_R opcode, string asmop, string T, asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm", [(set (Ty VPRC:$Rd), (Ty (add (Ty VPRC:$src), (Ty (OpNode (Ty VPRC:$Rn), (i32 ImmTy:$Imm))))))], - NoItinerary> { + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU]> { let Constraints = "$src = $Rd"; } @@ -2201,7 +2256,8 @@ class N2VShiftIns opcode, string asmop, string T, asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm", [(set (Ty VPRC:$Rd), (Ty (OpNode (Ty VPRC:$src), (Ty VPRC:$Rn), (i32 ImmTy:$Imm))))], - NoItinerary> { + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU]> { let Constraints = "$src = $Rd"; } @@ -2295,14 +2351,16 @@ class N2VShR_Narrow opcode, string asmop, string DestT, : NeonI_2VShiftImm; + [], NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; class N2VShR_Narrow_Hi opcode, string asmop, string DestT, string SrcT, Operand ImmTy> : NeonI_2VShiftImm { + [], NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU]> { let Constraints = "$src = $Rd"; } @@ -2461,7 +2519,8 @@ class N2VCvt_Fx opcode, string asmop, string T, asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm", [(set (DestTy VPRC:$Rd), (DestTy (IntOp (SrcTy VPRC:$Rn), (i32 ImmTy:$Imm))))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; multiclass NeonI_N2VCvt_Fx2fp opcode, string asmop, SDPatternOperator IntOp> { @@ -2539,28 +2598,32 @@ multiclass NeonI_2VAcross_1 opcode, asmop # "\t$Rd, $Rn.8b", [(set (v1i16 FPR16:$Rd), (v1i16 (opnode (v8i8 VPR64:$Rn))))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; def _1h16b: NeonI_2VAcross<0b1, u, 0b00, opcode, (outs FPR16:$Rd), (ins VPR128:$Rn), asmop # "\t$Rd, $Rn.16b", [(set (v1i16 FPR16:$Rd), (v1i16 (opnode (v16i8 VPR128:$Rn))))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; def _1s4h: NeonI_2VAcross<0b0, u, 0b01, opcode, (outs FPR32:$Rd), (ins VPR64:$Rn), asmop # "\t$Rd, $Rn.4h", [(set (v1i32 FPR32:$Rd), (v1i32 (opnode (v4i16 VPR64:$Rn))))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; def _1s8h: NeonI_2VAcross<0b1, u, 0b01, opcode, (outs FPR32:$Rd), (ins VPR128:$Rn), asmop # "\t$Rd, $Rn.8h", [(set (v1i32 FPR32:$Rd), (v1i32 (opnode (v8i16 VPR128:$Rn))))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; // _1d2s doesn't exist! @@ -2569,7 +2632,8 @@ multiclass NeonI_2VAcross_1 opcode, asmop # "\t$Rd, $Rn.4s", [(set (v1i64 FPR64:$Rd), (v1i64 (opnode (v4i32 VPR128:$Rn))))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; } defm SADDLV : NeonI_2VAcross_1<0b0, 0b00011, "saddlv", int_aarch64_neon_saddlv>; @@ -2585,28 +2649,32 @@ multiclass NeonI_2VAcross_2 opcode, asmop # "\t$Rd, $Rn.8b", [(set (v1i8 FPR8:$Rd), (v1i8 (opnode (v8i8 VPR64:$Rn))))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; def _1b16b: NeonI_2VAcross<0b1, u, 0b00, opcode, (outs FPR8:$Rd), (ins VPR128:$Rn), asmop # "\t$Rd, $Rn.16b", [(set (v1i8 FPR8:$Rd), (v1i8 (opnode (v16i8 VPR128:$Rn))))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; def _1h4h: NeonI_2VAcross<0b0, u, 0b01, opcode, (outs FPR16:$Rd), (ins VPR64:$Rn), asmop # "\t$Rd, $Rn.4h", [(set (v1i16 FPR16:$Rd), (v1i16 (opnode (v4i16 VPR64:$Rn))))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; def _1h8h: NeonI_2VAcross<0b1, u, 0b01, opcode, (outs FPR16:$Rd), (ins VPR128:$Rn), asmop # "\t$Rd, $Rn.8h", [(set (v1i16 FPR16:$Rd), (v1i16 (opnode (v8i16 VPR128:$Rn))))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; // _1s2s doesn't exist! @@ -2615,7 +2683,8 @@ multiclass NeonI_2VAcross_2 opcode, asmop # "\t$Rd, $Rn.4s", [(set (v1i32 FPR32:$Rd), (v1i32 (opnode (v4i32 VPR128:$Rn))))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; } defm SMAXV : NeonI_2VAcross_2<0b0, 0b01010, "smaxv", int_aarch64_neon_smaxv>; @@ -2635,7 +2704,8 @@ multiclass NeonI_2VAcross_3 opcode, bits<2> size, asmop # "\t$Rd, $Rn.4s", [(set (f32 FPR32:$Rd), (f32 (opnode (v4f32 VPR128:$Rn))))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; } defm FMAXNMV : NeonI_2VAcross_3<0b1, 0b01100, 0b00, "fmaxnmv", @@ -2658,7 +2728,8 @@ class NeonI_Permute size, bits<3> opcode, asmop # "\t$Rd." # OpS # ", $Rn." # OpS # ", $Rm." # OpS, [(set (Ty OpVPR:$Rd), (Ty (opnode (Ty OpVPR:$Rn), (Ty OpVPR:$Rm))))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; multiclass NeonI_Perm_pat opcode, string asmop, SDPatternOperator opnode> { @@ -2717,7 +2788,8 @@ class NeonI_3VDL size, bits<4> opcode, [(set (ResTy VPR128:$Rd), (ResTy (opnode (ResTy (ext (OpTy OpVPR:$Rn))), (ResTy (ext (OpTy OpVPR:$Rm))))))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; multiclass NeonI_3VDL_s opcode, string asmop, SDPatternOperator opnode, @@ -2792,7 +2864,8 @@ class NeonI_3VDW size, bits<4> opcode, [(set (ResTy VPR128:$Rd), (ResTy (opnode (ResTy VPR128:$Rn), (ResTy (ext (OpTy OpVPR:$Rm))))))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; multiclass NeonI_3VDW_s opcode, string asmop, SDPatternOperator opnode> { @@ -2873,7 +2946,8 @@ class NeonI_3VDN_addhn_2Op size, bits<4> opcode, (ResTy (get_hi (OpTy (opnode (OpTy VPR128:$Rn), (OpTy VPR128:$Rm))))))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; multiclass NeonI_3VDN_addhn_2Op opcode, string asmop, SDPatternOperator opnode, bit Commutable = 0> { @@ -2901,7 +2975,8 @@ class NeonI_3VD_2Op size, bits<4> opcode, asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS, [(set (ResTy ResVPR:$Rd), (ResTy (opnode (OpTy OpVPR:$Rn), (OpTy OpVPR:$Rm))))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; // normal narrow pattern multiclass NeonI_3VDN_2Op opcode, string asmop, @@ -2925,7 +3000,8 @@ class NeonI_3VDN_3Op size, bits<4> opcode, : NeonI_3VDiff { + [], NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU, ReadFPALU]> { let Constraints = "$src = $Rd"; let neverHasSideEffects = 1; } @@ -2990,7 +3066,8 @@ class NeonI_3VDL_Ext size, bits<4> opcode, [(set (ResTy VPR128:$Rd), (ResTy (zext (OpSTy (opnode (OpTy OpVPR:$Rn), (OpTy OpVPR:$Rm))))))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; multiclass NeonI_3VDL_zext opcode, string asmop, SDPatternOperator opnode, bit Commutable = 0> { @@ -3058,7 +3135,8 @@ class NeonI_3VDL_Aba size, bits<4> opcode, (ResTy VPR128:$src), (ResTy (zext (OpSTy (subop (OpTy OpVPR:$Rn), (OpTy OpVPR:$Rm))))))))], - NoItinerary> { + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU, ReadFPALU]> { let Constraints = "$src = $Rd"; } @@ -3098,7 +3176,8 @@ defm UABAL2vvv : NeonI_3VDL2_Aba_v1<0b1, 0b0101, "uabal2", add, // Long pattern with 2 operands multiclass NeonI_3VDL_2Op opcode, string asmop, SDPatternOperator opnode, bit Commutable = 0> { - let isCommutable = Commutable in { + let isCommutable = Commutable, + SchedRW = [WriteFPMul, ReadFPMul, ReadFPMul] in { def _8h8b : NeonI_3VD_2Op<0b0, u, 0b00, opcode, asmop, "8h", "8b", opnode, VPR128, VPR64, v8i16, v8i8>; def _4s4h : NeonI_3VD_2Op<0b0, u, 0b01, opcode, asmop, "4s", "4h", @@ -3120,7 +3199,8 @@ class NeonI_3VDL2_2Op_mull size, bits<4> opcode, asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS, [(set (ResTy VPR128:$Rd), (ResTy (opnode (OpTy VPR128:$Rn), (OpTy VPR128:$Rm))))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPMul, ReadFPMul, ReadFPMul]>; multiclass NeonI_3VDL2_2Op_mull_v1 opcode, string asmop, string opnode, bit Commutable = 0> { @@ -3154,7 +3234,8 @@ class NeonI_3VDL_3Op size, bits<4> opcode, (ResTy (opnode (ResTy VPR128:$src), (OpTy VPR64:$Rn), (OpTy VPR64:$Rm))))], - NoItinerary> { + NoItinerary>, + Sched<[WriteFPMAC, ReadFPMAC, ReadFPMAC, ReadFPMAC]> { let Constraints = "$src = $Rd"; } @@ -3202,7 +3283,8 @@ class NeonI_3VDL2_3Op_mlas size, bits<4> opcode, (ResTy (subop (ResTy VPR128:$src), (ResTy (opnode (OpTy OpVPR:$Rn), (OpTy OpVPR:$Rm))))))], - NoItinerary> { + NoItinerary>, + Sched<[WriteFPMAC, ReadFPMAC, ReadFPMAC, ReadFPMAC]> { let Constraints = "$src = $Rd"; } @@ -3254,8 +3336,10 @@ multiclass NeonI_3VDL_v2 opcode, string asmop, } } +let SchedRW = [WriteFPMul, ReadFPMul, ReadFPMul] in { defm SQDMULLvvv : NeonI_3VDL_v2<0b0, 0b1101, "sqdmull", int_arm_neon_vqdmull, 1>; +} multiclass NeonI_3VDL2_2Op_mull_v2 opcode, string asmop, string opnode, bit Commutable = 0> { @@ -3299,6 +3383,7 @@ multiclass NeonI_3VDL_v3 opcode, string asmop, } } +let SchedRW = [WriteFPMul, ReadFPMul, ReadFPMul] in defm PMULLvvv : NeonI_3VDL_v3<0b0, 0b1110, "pmull", int_arm_neon_vmullp, int_aarch64_neon_vmull_p64, 1>; @@ -3319,7 +3404,8 @@ multiclass NeonI_3VDL2_2Op_mull_v3 opcode, string asmop, (i64 (vector_extract (v2i64 VPR128:$Rn), 1)))), (v1i64 (scalar_to_vector (i64 (vector_extract (v2i64 VPR128:$Rm), 1)))))))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPMul, ReadFPMul, ReadFPMul]>; } def : Pat<(v16i8 (int_aarch64_neon_vmull_p64 @@ -3355,7 +3441,8 @@ class NeonI_LDVList opcode, bits<2> size, (outs VecList:$Rt), (ins GPR64xsp:$Rn), asmop # "\t$Rt, [$Rn]", [], - NoItinerary> { + NoItinerary>, + Sched<[WriteVecLd, ReadVecLd]> { let mayLoad = 1; let neverHasSideEffects = 1; } @@ -3409,7 +3496,8 @@ class NeonI_STVList opcode, bits<2> size, (outs), (ins GPR64xsp:$Rn, VecList:$Rt), asmop # "\t$Rt, [$Rn]", [], - NoItinerary> { + NoItinerary>, + Sched<[WriteVecSt, ReadVecSt, ReadVecSt]> { let mayStore = 1; let neverHasSideEffects = 1; } @@ -3642,7 +3730,8 @@ multiclass NeonI_LDWB_VList opcode, bits<2> size, (ins GPR64xsp:$Rn, ImmTy:$amt), asmop # "\t$Rt, [$Rn], $amt", [], - NoItinerary> { + NoItinerary>, + Sched<[WriteVecLd, WriteVecLd, ReadVecLd]> { let Rm = 0b11111; } @@ -3651,7 +3740,8 @@ multiclass NeonI_LDWB_VList opcode, bits<2> size, (ins GPR64xsp:$Rn, GPR64noxzr:$Rm), asmop # "\t$Rt, [$Rn], $Rm", [], - NoItinerary>; + NoItinerary>, + Sched<[WriteVecLd, WriteVecLd, ReadVecLd, ReadVecLd]>; } } @@ -3725,7 +3815,8 @@ multiclass NeonI_STWB_VList opcode, bits<2> size, (ins GPR64xsp:$Rn, ImmTy:$amt, VecList:$Rt), asmop # "\t$Rt, [$Rn], $amt", [], - NoItinerary> { + NoItinerary>, + Sched<[WriteVecSt, ReadVecSt, ReadVecSt]> { let Rm = 0b11111; } @@ -3734,7 +3825,8 @@ multiclass NeonI_STWB_VList opcode, bits<2> size, (ins GPR64xsp:$Rn, GPR64noxzr:$Rm, VecList:$Rt), asmop # "\t$Rt, [$Rn], $Rm", [], - NoItinerary>; + NoItinerary>, + Sched<[WriteVecSt, ReadVecSt, ReadVecSt, ReadVecSt]>; } } @@ -3838,7 +3930,8 @@ class NeonI_LDN_Dup opcode, bits<2> size, (outs VecList:$Rt), (ins GPR64xsp:$Rn), asmop # "\t$Rt, [$Rn]", [], - NoItinerary> { + NoItinerary>, + Sched<[WriteVecLd, ReadVecLd]> { let mayLoad = 1; let neverHasSideEffects = 1; } @@ -3932,7 +4025,8 @@ class NeonI_LDN_Lane op2_1, bit op0, RegisterOperand VList, (ins GPR64xsp:$Rn, VList:$src, ImmOp:$lane), asmop # "\t$Rt[$lane], [$Rn]", [], - NoItinerary> { + NoItinerary>, + Sched<[WriteVecLd, ReadVecLd, ReadVecLd]> { let mayLoad = 1; let neverHasSideEffects = 1; let hasExtraDefRegAllocReq = 1; @@ -4017,7 +4111,8 @@ class NeonI_STN_Lane op2_1, bit op0, RegisterOperand VList, (outs), (ins GPR64xsp:$Rn, VList:$Rt, ImmOp:$lane), asmop # "\t$Rt[$lane], [$Rn]", [], - NoItinerary> { + NoItinerary>, + Sched<[WriteVecSt, ReadVecSt, ReadVecSt]> { let mayStore = 1; let neverHasSideEffects = 1; let hasExtraDefRegAllocReq = 1; @@ -4109,16 +4204,18 @@ multiclass NeonI_LDN_WB_Dup opcode, bits<2> size, (ins GPR64xsp:$Rn, ImmTy:$amt), asmop # "\t$Rt, [$Rn], $amt", [], - NoItinerary> { - let Rm = 0b11111; - } + NoItinerary>, + Sched<[WriteVecLd, WriteVecLd, ReadVecLd]> { + let Rm = 0b11111; + } def _register : NeonI_LdOne_Dup_Post; + NoItinerary>, + Sched<[WriteVecLd, WriteVecLd, ReadVecLd, ReadVecLd]>; } } @@ -4182,7 +4279,8 @@ let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1, VList:$src, ImmOp:$lane), asmop # "\t$Rt[$lane], [$Rn], $amt", [], - NoItinerary> { + NoItinerary>, + Sched<[WriteVecLd, WriteVecLd, ReadVecLd, ReadVecLd]> { let Rm = 0b11111; } @@ -4194,7 +4292,8 @@ let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1, VList:$src, ImmOp:$lane), asmop # "\t$Rt[$lane], [$Rn], $Rm", [], - NoItinerary>; + NoItinerary>, + Sched<[WriteVecLd, WriteVecLd, ReadVecLd, ReadVecLd, ReadVecLd]>; } multiclass LD_Lane_WB_BHSD { + NoItinerary>, + Sched<[WriteVecSt, ReadVecSt, ReadVecSt]> { let Rm = 0b11111; } @@ -4294,7 +4394,8 @@ let mayStore = 1, neverHasSideEffects = 1, ImmOp:$lane), asmop # "\t$Rt[$lane], [$Rn], $Rm", [], - NoItinerary>; + NoItinerary>, + Sched<[WriteVecSt, ReadVecSt, ReadVecSt, ReadVecSt]>; } multiclass ST_Lane_WB_BHSD size, bits<5> opcode, string asmop, (outs FPRC:$Rd), (ins FPRC:$Rn, FPRC:$Rm), !strconcat(asmop, "\t$Rd, $Rn, $Rm"), [], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; class NeonI_Scalar3Same_D_size opcode, string asmop> : NeonI_Scalar3Same_size; @@ -4465,7 +4567,8 @@ class NeonI_Scalar3Diff_size size, bits<4> opcode, string asmop, (outs FPRCD:$Rd), (ins FPRCS:$Rn, FPRCS:$Rm), !strconcat(asmop, "\t$Rd, $Rn, $Rm"), [], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; multiclass NeonI_Scalar3Diff_HS_size opcode, string asmop> { def shh : NeonI_Scalar3Diff_size; @@ -4478,12 +4581,14 @@ multiclass NeonI_Scalar3Diff_ml_HS_size opcode, string asmop> { (outs FPR32:$Rd), (ins FPR32:$Src, FPR16:$Rn, FPR16:$Rm), !strconcat(asmop, "\t$Rd, $Rn, $Rm"), [], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU, ReadFPALU]>; def dss : NeonI_Scalar3Diff; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU, ReadFPALU]>; } } @@ -4513,7 +4618,8 @@ class NeonI_Scalar2SameMisc_size size, bits<5> opcode, string asm (outs FPRCD:$Rd), (ins FPRCS:$Rn), !strconcat(asmop, "\t$Rd, $Rn"), [], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; multiclass NeonI_Scalar2SameMisc_SD_size opcode, string asmop> { @@ -4550,7 +4656,8 @@ class NeonI_Scalar2SameMisc_accum_size size, bits<5> opcode, (outs FPRC:$Rd), (ins FPRC:$Src, FPRC:$Rn), !strconcat(asmop, "\t$Rd, $Rn"), [], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; multiclass NeonI_Scalar2SameMisc_accum_BHSD_size opcode, string asmop> { @@ -4610,7 +4717,8 @@ class NeonI_Scalar2SameMisc_cmpz_D_size opcode, string asmop> (outs FPR64:$Rd), (ins FPR64:$Rn, neon_uimm0:$Imm), !strconcat(asmop, "\t$Rd, $Rn, $Imm"), [], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; multiclass NeonI_Scalar2SameMisc_cmpz_SD_size opcode, string asmop> { @@ -4618,12 +4726,14 @@ multiclass NeonI_Scalar2SameMisc_cmpz_SD_size opcode, (outs FPR32:$Rd), (ins FPR32:$Rn, fpzz32:$FPImm), !strconcat(asmop, "\t$Rd, $Rn, $FPImm"), [], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; def ddi : NeonI_Scalar2SameMisc; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; } class Neon_Scalar2SameMisc_cmpz_D_size_patterns opcode, string asmop, : NeonI_ScalarShiftImm; + [], NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; multiclass NeonI_ScalarShiftRightImm_D_size opcode, string asmop> { @@ -4772,7 +4883,8 @@ class NeonI_ScalarShiftRightImm_accum_D_size opcode, string asmop (outs FPR64:$Rd), (ins FPR64:$Src, FPR64:$Rn, shr_imm64:$Imm), !strconcat(asmop, "\t$Rd, $Rn, $Imm"), - [], NoItinerary> { + [], NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU]> { bits<6> Imm; let Inst{22} = 0b1; // immh:immb = 1xxxxxx let Inst{21-16} = Imm; @@ -4784,7 +4896,8 @@ class NeonI_ScalarShiftLeftImm_accum_D_size opcode, string asmop> (outs FPR64:$Rd), (ins FPR64:$Src, FPR64:$Rn, shl_imm64:$Imm), !strconcat(asmop, "\t$Rd, $Rn, $Imm"), - [], NoItinerary> { + [], NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU]> { bits<6> Imm; let Inst{22} = 0b1; // immh:immb = 1xxxxxx let Inst{21-16} = Imm; @@ -4797,7 +4910,8 @@ class NeonI_ScalarShiftImm_narrow_size opcode, string asmop, : NeonI_ScalarShiftImm; + [], NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; multiclass NeonI_ScalarShiftImm_narrow_HSD_size opcode, string asmop> { @@ -5111,10 +5225,13 @@ defm : Neon_Scalar3Same_BHSD_size_patterns; // Scalar Integer Saturating Doubling Multiply Half High +let SchedRW = [WriteFPMul, ReadFPMul, ReadFPMul, ReadFPMul] in defm SQDMULH : NeonI_Scalar3Same_HS_sizes<0b0, 0b10110, "sqdmulh", 1>; // Scalar Integer Saturating Rounding Doubling Multiply Half High +let SchedRW = [WriteFPMul, ReadFPMul, ReadFPMul] in { defm SQRDMULH : NeonI_Scalar3Same_HS_sizes<0b1, 0b10110, "sqrdmulh", 1>; +} // Patterns to match llvm.arm.* intrinsic for // Scalar Integer Saturating Doubling Multiply Half High and @@ -5124,8 +5241,10 @@ defm : Neon_Scalar3Same_HS_size_patterns; +let SchedRW = [WriteFPMul, ReadFPMul, ReadFPMul, ReadFPMul] in { // Scalar Floating-point Multiply Extended defm FMULX : NeonI_Scalar3Same_SD_sizes<0b0, 0b0, 0b11011, "fmulx", 1>; +} // Scalar Floating-point Reciprocal Step defm FRECPS : NeonI_Scalar3Same_SD_sizes<0b0, 0b0, 0b11111, "frecps", 0>; @@ -5218,18 +5337,24 @@ defm : Neon_Scalar3Same_BHSD_size_patterns; defm : Neon_Scalar3Same_D_size_patterns; +let SchedRW = [WriteFPMAC, ReadFPMAC, ReadFPMAC, ReadFPMAC] in { // Signed Saturating Doubling Multiply-Add Long defm SQDMLAL : NeonI_Scalar3Diff_ml_HS_size<0b0, 0b1001, "sqdmlal">; +} defm : Neon_Scalar3Diff_ml_HS_size_patterns; // Signed Saturating Doubling Multiply-Subtract Long +let SchedRW = [WriteFPMAC, ReadFPMAC, ReadFPMAC, ReadFPMAC] in { defm SQDMLSL : NeonI_Scalar3Diff_ml_HS_size<0b0, 0b1011, "sqdmlsl">; +} defm : Neon_Scalar3Diff_ml_HS_size_patterns; // Signed Saturating Doubling Multiply Long +let SchedRW = [WriteFPMul, ReadFPMul, ReadFPMul, ReadFPMul] in { defm SQDMULL : NeonI_Scalar3Diff_HS_size<0b0, 0b1101, "sqdmull">; +} defm : Neon_Scalar3Diff_HS_size_patterns; @@ -5557,7 +5682,8 @@ multiclass NeonI_ScalarPair_D_sizes opcode, (outs FPR64:$Rd), (ins VPR128:$Rn), !strconcat(asmop, "\t$Rd, $Rn.2d"), [], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; } } @@ -5569,7 +5695,8 @@ multiclass NeonI_ScalarPair_SD_sizes opcode, (outs FPR32:$Rd), (ins VPR64:$Rn), !strconcat(asmop, "\t$Rd, $Rn.2s"), [], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; } } @@ -5642,7 +5769,8 @@ class NeonI_ScalarXIndexedElemArith opcode, (ins OpFPR:$Rn, OpVPR:$MRm, OpImm:$Imm), asmop # "\t$Rd, $Rn, $MRm" # rmlane # "[$Imm]", [], - NoItinerary> { + NoItinerary>, + Sched<[WriteFPMul, ReadFPMul, ReadFPMul]> { bits<3> Imm; bits<5> MRm; } @@ -5659,7 +5787,8 @@ class NeonI_ScalarXIndexedElemArith_Constraint_Impl opcode (ins ResFPR:$src, OpFPR:$Rn, OpVPR:$MRm, OpImm:$Imm), asmop # "\t$Rd, $Rn, $MRm" # rmlane # "[$Imm]", [], - NoItinerary> { + NoItinerary>, + Sched<[WriteFPMAC, ReadFPMAC, ReadFPMAC, ReadFPMAC]> { let Constraints = "$src = $Rd"; bits<3> Imm; bits<5> MRm; @@ -6170,7 +6299,8 @@ class NeonI_Scalar_DUP { + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]> { bits<4> Imm; } @@ -6581,7 +6711,8 @@ class NeonI_Extract op2, string asmop, asmop # "\t$Rd." # OpS # ", $Rn." # OpS # ", $Rm." # OpS # ", $Index", [], - NoItinerary>{ + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>{ bits<4> Index; } @@ -6622,7 +6753,8 @@ class NI_TBL op2, bits<2> len, bit op, (outs OpVPR:$Rd), (ins VecList:$Rn, OpVPR:$Rm), asmop # "\t$Rd." # OpS # ", $Rn, $Rm." # OpS, [], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; // The vectors in look up table are always 16b multiclass NI_TBL_pat len, bit op, string asmop, string List> { @@ -6646,7 +6778,8 @@ class NI_TBX op2, bits<2> len, bit op, (outs OpVPR:$Rd), (ins OpVPR:$src, VecList:$Rn, OpVPR:$Rm), asmop # "\t$Rd." # OpS # ", $Rn, $Rm." # OpS, [], - NoItinerary> { + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU, ReadFPALU]> { let Constraints = "$src = $Rd"; } @@ -6674,7 +6807,8 @@ class NeonI_INS_main { + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU]> { bits<4> Imm; let Constraints = "$src = $Rd"; } @@ -6732,7 +6866,8 @@ class NeonI_INS_element ResImm:$Immd, ResImm:$Immn), asmop # "\t$Rd." # Res # "[$Immd], $Rn." # Res # "[$Immn]", [], - NoItinerary> { + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU]> { let Constraints = "$src = $Rd"; bits<4> Immd; bits<4> Immn; @@ -6876,7 +7011,8 @@ class NeonI_SMOV { + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]> { bits<4> Imm; } @@ -6970,7 +7106,8 @@ class NeonI_UMOV { + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]> { bits<4> Imm; } @@ -7128,7 +7265,8 @@ class NeonI_DUP_Elt { + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]> { bits<4> Imm; } @@ -7234,7 +7372,8 @@ class NeonI_DUP; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; def DUP16b : NeonI_DUP<0b1, "dup", ".16b", VPR128, v16i8, GPR32, i32> { let Inst{20-16} = 0b00001; @@ -7335,7 +7474,8 @@ class NI_2VE size, bits<4> opcode, asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Re." # EleOpS # "[$Index]", [], - NoItinerary> { + NoItinerary>, + Sched<[WriteFPMAC, ReadFPMAC, ReadFPMAC, ReadFPMAC]> { bits<3> Index; bits<5> Re; @@ -7434,7 +7574,8 @@ class NI_2VE_2op size, bits<4> opcode, asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Re." # EleOpS # "[$Index]", [], - NoItinerary> { + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU]> { bits<3> Index; bits<5> Re; } @@ -7473,9 +7614,11 @@ multiclass NI_2VE_v1_2op opcode, string asmop> { } } +let SchedRW = [WriteFPMul, ReadFPMul, ReadFPMul] in { defm MULve : NI_2VE_v1_2op<0b0, 0b1000, "mul">; defm SQDMULHve : NI_2VE_v1_2op<0b0, 0b1100, "sqdmulh">; defm SQRDMULHve : NI_2VE_v1_2op<0b0, 0b1101, "sqrdmulh">; +} // Pattern for lane in 128-bit vector class NI_2VE_mul_laneq opcode, string asmop> { } } +let SchedRW = [WriteFPMul, ReadFPMul, ReadFPMul] in { defm FMULve : NI_2VE_v2_2op<0b0, 0b1001, "fmul">; defm FMULXve : NI_2VE_v2_2op<0b1, 0b1001, "fmulx">; +} class NI_2VE_mul_lane_2d opcode, string asmop> { } } +let SchedRW = [WriteFPMul, ReadFPMul, ReadFPMul] in { defm SMULLve : NI_2VE_v3_2op<0b0, 0b1010, "smull">; defm UMULLve : NI_2VE_v3_2op<0b1, 0b1010, "umull">; defm SQDMULLve : NI_2VE_v3_2op<0b0, 0b1011, "sqdmull">; +} def : Pat<(v1f64 (scalar_to_vector (f64 FPR64:$src))), (FMOVdd $src)>; @@ -8074,7 +8221,8 @@ class NeonI_REV size, bit Q, bit U, asmop # "\t$Rd." # Res # ", $Rn." # Res, [(set (ResTy ResVPR:$Rd), (ResTy (Neon_Rev (ResTy ResVPR:$Rn))))], - NoItinerary> ; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; def REV64_16b : NeonI_REV<"rev64", "16b", 0b00, 0b1, 0b0, 0b00000, VPR128, v16i8, Neon_rev64>; @@ -8113,42 +8261,48 @@ multiclass NeonI_PairwiseAdd opcode, asmop # "\t$Rd.8h, $Rn.16b", [(set (v8i16 VPR128:$Rd), (v8i16 (Neon_Padd (v16i8 VPR128:$Rn))))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; def 8b4h : NeonI_2VMisc<0b0, U, 0b00, opcode, (outs VPR64:$Rd), (ins VPR64:$Rn), asmop # "\t$Rd.4h, $Rn.8b", [(set (v4i16 VPR64:$Rd), (v4i16 (Neon_Padd (v8i8 VPR64:$Rn))))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; def 8h4s : NeonI_2VMisc<0b1, U, 0b01, opcode, (outs VPR128:$Rd), (ins VPR128:$Rn), asmop # "\t$Rd.4s, $Rn.8h", [(set (v4i32 VPR128:$Rd), (v4i32 (Neon_Padd (v8i16 VPR128:$Rn))))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; def 4h2s : NeonI_2VMisc<0b0, U, 0b01, opcode, (outs VPR64:$Rd), (ins VPR64:$Rn), asmop # "\t$Rd.2s, $Rn.4h", [(set (v2i32 VPR64:$Rd), (v2i32 (Neon_Padd (v4i16 VPR64:$Rn))))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; def 4s2d : NeonI_2VMisc<0b1, U, 0b10, opcode, (outs VPR128:$Rd), (ins VPR128:$Rn), asmop # "\t$Rd.2d, $Rn.4s", [(set (v2i64 VPR128:$Rd), (v2i64 (Neon_Padd (v4i32 VPR128:$Rn))))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; def 2s1d : NeonI_2VMisc<0b0, U, 0b10, opcode, (outs VPR64:$Rd), (ins VPR64:$Rn), asmop # "\t$Rd.1d, $Rn.2s", [(set (v1i64 VPR64:$Rd), (v1i64 (Neon_Padd (v2i32 VPR64:$Rn))))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; } defm SADDLP : NeonI_PairwiseAdd<"saddlp", 0b0, 0b00010, @@ -8170,7 +8324,8 @@ multiclass NeonI_PairwiseAddAcc opcode, [(set (v8i16 VPR128:$Rd), (v8i16 (Neon_Padd (v8i16 VPR128:$src), (v16i8 VPR128:$Rn))))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; def 8b4h : NeonI_2VMisc<0b0, U, 0b00, opcode, (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn), @@ -8178,7 +8333,8 @@ multiclass NeonI_PairwiseAddAcc opcode, [(set (v4i16 VPR64:$Rd), (v4i16 (Neon_Padd (v4i16 VPR64:$src), (v8i8 VPR64:$Rn))))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; def 8h4s : NeonI_2VMisc<0b1, U, 0b01, opcode, (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn), @@ -8186,7 +8342,8 @@ multiclass NeonI_PairwiseAddAcc opcode, [(set (v4i32 VPR128:$Rd), (v4i32 (Neon_Padd (v4i32 VPR128:$src), (v8i16 VPR128:$Rn))))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; def 4h2s : NeonI_2VMisc<0b0, U, 0b01, opcode, (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn), @@ -8194,7 +8351,8 @@ multiclass NeonI_PairwiseAddAcc opcode, [(set (v2i32 VPR64:$Rd), (v2i32 (Neon_Padd (v2i32 VPR64:$src), (v4i16 VPR64:$Rn))))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; def 4s2d : NeonI_2VMisc<0b1, U, 0b10, opcode, (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn), @@ -8202,7 +8360,8 @@ multiclass NeonI_PairwiseAddAcc opcode, [(set (v2i64 VPR128:$Rd), (v2i64 (Neon_Padd (v2i64 VPR128:$src), (v4i32 VPR128:$Rn))))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; def 2s1d : NeonI_2VMisc<0b0, U, 0b10, opcode, (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn), @@ -8210,7 +8369,8 @@ multiclass NeonI_PairwiseAddAcc opcode, [(set (v1i64 VPR64:$Rd), (v1i64 (Neon_Padd (v1i64 VPR64:$src), (v2i32 VPR64:$Rn))))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; } } @@ -8223,37 +8383,44 @@ multiclass NeonI_2VMisc_BHSDsize_1Arg opcode> { def 16b : NeonI_2VMisc<0b1, U, 0b00, opcode, (outs VPR128:$Rd), (ins VPR128:$Rn), asmop # "\t$Rd.16b, $Rn.16b", - [], NoItinerary>; + [], NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; def 8h : NeonI_2VMisc<0b1, U, 0b01, opcode, (outs VPR128:$Rd), (ins VPR128:$Rn), asmop # "\t$Rd.8h, $Rn.8h", - [], NoItinerary>; + [], NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; def 4s : NeonI_2VMisc<0b1, U, 0b10, opcode, (outs VPR128:$Rd), (ins VPR128:$Rn), asmop # "\t$Rd.4s, $Rn.4s", - [], NoItinerary>; + [], NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; def 2d : NeonI_2VMisc<0b1, U, 0b11, opcode, (outs VPR128:$Rd), (ins VPR128:$Rn), asmop # "\t$Rd.2d, $Rn.2d", - [], NoItinerary>; + [], NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; def 8b : NeonI_2VMisc<0b0, U, 0b00, opcode, (outs VPR64:$Rd), (ins VPR64:$Rn), asmop # "\t$Rd.8b, $Rn.8b", - [], NoItinerary>; + [], NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; def 4h : NeonI_2VMisc<0b0, U, 0b01, opcode, (outs VPR64:$Rd), (ins VPR64:$Rn), asmop # "\t$Rd.4h, $Rn.4h", - [], NoItinerary>; + [], NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; def 2s : NeonI_2VMisc<0b0, U, 0b10, opcode, (outs VPR64:$Rd), (ins VPR64:$Rn), asmop # "\t$Rd.2s, $Rn.2s", - [], NoItinerary>; + [], NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; } defm SQABS : NeonI_2VMisc_BHSDsize_1Arg<"sqabs", 0b0, 0b00111>; @@ -8323,37 +8490,44 @@ multiclass NeonI_2VMisc_BHSDsize_2Args opcode> { def 16b : NeonI_2VMisc<0b1, U, 0b00, opcode, (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn), asmop # "\t$Rd.16b, $Rn.16b", - [], NoItinerary>; + [], NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; def 8h : NeonI_2VMisc<0b1, U, 0b01, opcode, (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn), asmop # "\t$Rd.8h, $Rn.8h", - [], NoItinerary>; + [], NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; def 4s : NeonI_2VMisc<0b1, U, 0b10, opcode, (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn), asmop # "\t$Rd.4s, $Rn.4s", - [], NoItinerary>; + [], NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; def 2d : NeonI_2VMisc<0b1, U, 0b11, opcode, (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn), asmop # "\t$Rd.2d, $Rn.2d", - [], NoItinerary>; + [], NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; def 8b : NeonI_2VMisc<0b0, U, 0b00, opcode, (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn), asmop # "\t$Rd.8b, $Rn.8b", - [], NoItinerary>; + [], NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; def 4h : NeonI_2VMisc<0b0, U, 0b01, opcode, (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn), asmop # "\t$Rd.4h, $Rn.4h", - [], NoItinerary>; + [], NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; def 2s : NeonI_2VMisc<0b0, U, 0b10, opcode, (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn), asmop # "\t$Rd.2s, $Rn.2s", - [], NoItinerary>; + [], NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; } } @@ -8401,42 +8575,48 @@ multiclass NeonI_2VMisc_BHSsizes; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; def 8h : NeonI_2VMisc<0b1, U, 0b01, 0b00100, (outs VPR128:$Rd), (ins VPR128:$Rn), asmop # "\t$Rd.8h, $Rn.8h", [(set (v8i16 VPR128:$Rd), (v8i16 (Neon_Op (v8i16 VPR128:$Rn))))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; def 4s : NeonI_2VMisc<0b1, U, 0b10, 0b00100, (outs VPR128:$Rd), (ins VPR128:$Rn), asmop # "\t$Rd.4s, $Rn.4s", [(set (v4i32 VPR128:$Rd), (v4i32 (Neon_Op (v4i32 VPR128:$Rn))))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; def 8b : NeonI_2VMisc<0b0, U, 0b00, 0b00100, (outs VPR64:$Rd), (ins VPR64:$Rn), asmop # "\t$Rd.8b, $Rn.8b", [(set (v8i8 VPR64:$Rd), (v8i8 (Neon_Op (v8i8 VPR64:$Rn))))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; def 4h : NeonI_2VMisc<0b0, U, 0b01, 0b00100, (outs VPR64:$Rd), (ins VPR64:$Rn), asmop # "\t$Rd.4h, $Rn.4h", [(set (v4i16 VPR64:$Rd), (v4i16 (Neon_Op (v4i16 VPR64:$Rn))))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; def 2s : NeonI_2VMisc<0b0, U, 0b10, 0b00100, (outs VPR64:$Rd), (ins VPR64:$Rn), asmop # "\t$Rd.2s, $Rn.2s", [(set (v2i32 VPR64:$Rd), (v2i32 (Neon_Op (v2i32 VPR64:$Rn))))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; } defm CLS : NeonI_2VMisc_BHSsizes<"cls", 0b0, int_arm_neon_vcls>; @@ -8447,12 +8627,14 @@ multiclass NeonI_2VMisc_Bsize size, def 16b : NeonI_2VMisc<0b1, U, size, Opcode, (outs VPR128:$Rd), (ins VPR128:$Rn), asmop # "\t$Rd.16b, $Rn.16b", - [], NoItinerary>; + [], NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; def 8b : NeonI_2VMisc<0b0, U, size, Opcode, (outs VPR64:$Rd), (ins VPR64:$Rn), asmop # "\t$Rd.8b, $Rn.8b", - [], NoItinerary>; + [], NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; } defm CNT : NeonI_2VMisc_Bsize<"cnt", 0b0, 0b00, 0b00101>; @@ -8510,21 +8692,24 @@ multiclass NeonI_2VMisc_SDsizes opcode, asmop # "\t$Rd.4s, $Rn.4s", [(set (v4f32 VPR128:$Rd), (v4f32 (Neon_Op (v4f32 VPR128:$Rn))))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; def 2d : NeonI_2VMisc<0b1, U, 0b11, opcode, (outs VPR128:$Rd), (ins VPR128:$Rn), asmop # "\t$Rd.2d, $Rn.2d", [(set (v2f64 VPR128:$Rd), (v2f64 (Neon_Op (v2f64 VPR128:$Rn))))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; def 2s : NeonI_2VMisc<0b0, U, 0b10, opcode, (outs VPR64:$Rd), (ins VPR64:$Rn), asmop # "\t$Rd.2s, $Rn.2s", [(set (v2f32 VPR64:$Rd), (v2f32 (Neon_Op (v2f32 VPR64:$Rn))))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; } defm FABS : NeonI_2VMisc_SDsizes<"fabs", 0b0, 0b01111, fabs>; @@ -8534,33 +8719,39 @@ multiclass NeonI_2VMisc_HSD_Narrow opcode> { def 8h8b : NeonI_2VMisc<0b0, U, 0b00, opcode, (outs VPR64:$Rd), (ins VPR128:$Rn), asmop # "\t$Rd.8b, $Rn.8h", - [], NoItinerary>; + [], NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; def 4s4h : NeonI_2VMisc<0b0, U, 0b01, opcode, (outs VPR64:$Rd), (ins VPR128:$Rn), asmop # "\t$Rd.4h, $Rn.4s", - [], NoItinerary>; + [], NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; def 2d2s : NeonI_2VMisc<0b0, U, 0b10, opcode, (outs VPR64:$Rd), (ins VPR128:$Rn), asmop # "\t$Rd.2s, $Rn.2d", - [], NoItinerary>; + [], NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; let Constraints = "$Rd = $src" in { def 8h16b : NeonI_2VMisc<0b1, U, 0b00, opcode, (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn), asmop # "2\t$Rd.16b, $Rn.8h", - [], NoItinerary>; + [], NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; def 4s8h : NeonI_2VMisc<0b1, U, 0b01, opcode, (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn), asmop # "2\t$Rd.8h, $Rn.4s", - [], NoItinerary>; + [], NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; def 2d4s : NeonI_2VMisc<0b1, U, 0b10, opcode, (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn), asmop # "2\t$Rd.4s, $Rn.2d", - [], NoItinerary>; + [], NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; } } @@ -8613,37 +8804,43 @@ multiclass NeonI_2VMisc_SHIFT opcode> { (outs VPR128:$Rd), (ins VPR64:$Rn, uimm_exact8:$Imm), asmop # "\t$Rd.8h, $Rn.8b, $Imm", - [], NoItinerary>; + [], NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; def 4h4s : NeonI_2VMisc<0b0, U, 0b01, opcode, (outs VPR128:$Rd), (ins VPR64:$Rn, uimm_exact16:$Imm), asmop # "\t$Rd.4s, $Rn.4h, $Imm", - [], NoItinerary>; + [], NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; def 2s2d : NeonI_2VMisc<0b0, U, 0b10, opcode, (outs VPR128:$Rd), (ins VPR64:$Rn, uimm_exact32:$Imm), asmop # "\t$Rd.2d, $Rn.2s, $Imm", - [], NoItinerary>; + [], NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; def 16b8h : NeonI_2VMisc<0b1, U, 0b00, opcode, (outs VPR128:$Rd), (ins VPR128:$Rn, uimm_exact8:$Imm), asmop # "2\t$Rd.8h, $Rn.16b, $Imm", - [], NoItinerary>; + [], NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; def 8h4s : NeonI_2VMisc<0b1, U, 0b01, opcode, (outs VPR128:$Rd), (ins VPR128:$Rn, uimm_exact16:$Imm), asmop # "2\t$Rd.4s, $Rn.8h, $Imm", - [], NoItinerary>; + [], NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; def 4s2d : NeonI_2VMisc<0b1, U, 0b10, opcode, (outs VPR128:$Rd), (ins VPR128:$Rn, uimm_exact32:$Imm), asmop # "2\t$Rd.2d, $Rn.4s, $Imm", - [], NoItinerary>; + [], NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; } } @@ -8691,23 +8888,27 @@ multiclass NeonI_2VMisc_SD_Narrow opcode> { def 4s4h : NeonI_2VMisc<0b0, U, 0b00, opcode, (outs VPR64:$Rd), (ins VPR128:$Rn), asmop # "\t$Rd.4h, $Rn.4s", - [], NoItinerary>; + [], NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; def 2d2s : NeonI_2VMisc<0b0, U, 0b01, opcode, (outs VPR64:$Rd), (ins VPR128:$Rn), asmop # "\t$Rd.2s, $Rn.2d", - [], NoItinerary>; + [], NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; let Constraints = "$src = $Rd" in { def 4s8h : NeonI_2VMisc<0b1, U, 0b00, opcode, (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn), asmop # "2\t$Rd.8h, $Rn.4s", - [], NoItinerary>; + [], NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; def 2d4s : NeonI_2VMisc<0b1, U, 0b01, opcode, (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn), asmop # "2\t$Rd.4s, $Rn.2d", - [], NoItinerary>; + [], NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; } } @@ -8745,12 +8946,14 @@ multiclass NeonI_2VMisc_D_Narrow; + [], NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; def 2d4s : NeonI_2VMisc<0b1, U, 0b01, opcode, (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn), asmop # "2\t$Rd.4s, $Rn.2d", - [], NoItinerary> { + [], NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU]> { let Constraints = "$src = $Rd"; } @@ -8774,22 +8977,26 @@ multiclass NeonI_2VMisc_HS_Extend opcode> { def 4h4s : NeonI_2VMisc<0b0, U, 0b00, opcode, (outs VPR128:$Rd), (ins VPR64:$Rn), asmop # "\t$Rd.4s, $Rn.4h", - [], NoItinerary>; + [], NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; def 2s2d : NeonI_2VMisc<0b0, U, 0b01, opcode, (outs VPR128:$Rd), (ins VPR64:$Rn), asmop # "\t$Rd.2d, $Rn.2s", - [], NoItinerary>; + [], NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; def 8h4s : NeonI_2VMisc<0b1, U, 0b00, opcode, (outs VPR128:$Rd), (ins VPR128:$Rn), asmop # "2\t$Rd.4s, $Rn.8h", - [], NoItinerary>; + [], NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; def 4s2d : NeonI_2VMisc<0b1, U, 0b01, opcode, (outs VPR128:$Rd), (ins VPR128:$Rn), asmop # "2\t$Rd.2d, $Rn.4s", - [], NoItinerary>; + [], NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; } defm FCVTL : NeonI_2VMisc_HS_Extend<"fcvtl", 0b0, 0b10111>; @@ -8825,21 +9032,24 @@ multiclass NeonI_2VMisc_SD_Conv opcode, asmop # "\t$Rd.4s, $Rn.4s", [(set (ResTy4s VPR128:$Rd), (ResTy4s (Neon_Op (OpTy4s VPR128:$Rn))))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; def 2d : NeonI_2VMisc<0b1, U, {Size, 0b1}, opcode, (outs VPR128:$Rd), (ins VPR128:$Rn), asmop # "\t$Rd.2d, $Rn.2d", [(set (ResTy2d VPR128:$Rd), (ResTy2d (Neon_Op (OpTy2d VPR128:$Rn))))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; def 2s : NeonI_2VMisc<0b0, U, {Size, 0b0}, opcode, (outs VPR64:$Rd), (ins VPR64:$Rn), asmop # "\t$Rd.2s, $Rn.2s", [(set (ResTy2s VPR64:$Rd), (ResTy2s (Neon_Op (OpTy2s VPR64:$Rn))))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; } multiclass NeonI_2VMisc_fp_to_int; defm FRSQRTE : NeonI_2VMisc_fp_to_fp<"frsqrte", 0b1, 0b1, 0b11101, int_arm_neon_vrsqrte>; +let SchedRW = [WriteFPSqrt, ReadFPSqrt] in { defm FSQRT : NeonI_2VMisc_fp_to_fp<"fsqrt", 0b1, 0b1, 0b11111, fsqrt>; +} multiclass NeonI_2VMisc_S_Conv opcode, SDPatternOperator Neon_Op> { @@ -8903,14 +9115,16 @@ multiclass NeonI_2VMisc_S_Conv; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; def 2s : NeonI_2VMisc<0b0, U, {Size, 0b0}, opcode, (outs VPR64:$Rd), (ins VPR64:$Rn), asmop # "\t$Rd.2s, $Rn.2s", [(set (v2i32 VPR64:$Rd), (v2i32 (Neon_Op (v2i32 VPR64:$Rn))))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; } defm URECPE : NeonI_2VMisc_S_Conv<"urecpe", 0b1, 0b0, 0b11100, @@ -8927,7 +9141,8 @@ class NeonI_Cryptoaes_2v size, bits<5> opcode, [(set (v16i8 VPR128:$Rd), (v16i8 (opnode (v16i8 VPR128:$src), (v16i8 VPR128:$Rn))))], - NoItinerary>{ + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU]> { let Constraints = "$src = $Rd"; let Predicates = [HasNEON, HasCrypto]; } @@ -8942,7 +9157,8 @@ class NeonI_Cryptoaes size, bits<5> opcode, asmop # "\t$Rd.16b, $Rn.16b", [(set (v16i8 VPR128:$Rd), (v16i8 (opnode (v16i8 VPR128:$Rn))))], - NoItinerary>; + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]>; def AESMC : NeonI_Cryptoaes<0b00, 0b00110, "aesmc", int_arm_neon_aesmc>; def AESIMC : NeonI_Cryptoaes<0b00, 0b00111, "aesimc", int_arm_neon_aesimc>; @@ -8955,7 +9171,8 @@ class NeonI_Cryptosha_vv size, bits<5> opcode, [(set (v4i32 VPR128:$Rd), (v4i32 (opnode (v4i32 VPR128:$src), (v4i32 VPR128:$Rn))))], - NoItinerary> { + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU]> { let Constraints = "$src = $Rd"; let Predicates = [HasNEON, HasCrypto]; } @@ -8970,7 +9187,8 @@ class NeonI_Cryptosha_ss size, bits<5> opcode, : NeonI_Crypto_SHA { + [], NoItinerary>, + Sched<[WriteFPALU, ReadFPALU]> { let Predicates = [HasNEON, HasCrypto]; let hasSideEffects = 0; } @@ -8990,7 +9208,8 @@ class NeonI_Cryptosha3_vvv size, bits<3> opcode, string asmop, (v4i32 (opnode (v4i32 VPR128:$src), (v4i32 VPR128:$Rn), (v4i32 VPR128:$Rm))))], - NoItinerary> { + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU, ReadFPALU]> { let Constraints = "$src = $Rd"; let Predicates = [HasNEON, HasCrypto]; } @@ -9010,7 +9229,8 @@ class NeonI_Cryptosha3_qqv size, bits<3> opcode, string asmop, (v4i32 (opnode (v4i32 FPR128:$src), (v4i32 FPR128:$Rn), (v4i32 VPR128:$Rm))))], - NoItinerary> { + NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU, ReadFPALU]> { let Constraints = "$src = $Rd"; let Predicates = [HasNEON, HasCrypto]; } @@ -9025,7 +9245,8 @@ class NeonI_Cryptosha3_qsv size, bits<3> opcode, string asmop> (outs FPR128:$Rd), (ins FPR128:$src, FPR32:$Rn, VPR128:$Rm), asmop # "\t$Rd, $Rn, $Rm.4s", - [], NoItinerary> { + [], NoItinerary>, + Sched<[WriteFPALU, ReadFPALU, ReadFPALU, ReadFPALU]> { let Constraints = "$src = $Rd"; let hasSideEffects = 0; let Predicates = [HasNEON, HasCrypto]; diff --git a/lib/Target/AArch64/AArch64Schedule.td b/lib/Target/AArch64/AArch64Schedule.td index 6fcb1116b6c..ec8450b9c1f 100644 --- a/lib/Target/AArch64/AArch64Schedule.td +++ b/lib/Target/AArch64/AArch64Schedule.td @@ -37,8 +37,16 @@ def ReadDiv : SchedRead; // Loads def WriteLd : SchedWrite; def WritePreLd : SchedWrite; +def WriteVecLd : SchedWrite; def ReadLd : SchedRead; def ReadPreLd : SchedRead; +def ReadVecLd : SchedRead; + +// Stores +def WriteSt : SchedWrite; +def WriteVecSt : SchedWrite; +def ReadSt : SchedRead; +def ReadVecSt : SchedRead; // Branches def WriteBr : SchedWrite; diff --git a/lib/Target/AArch64/AArch64ScheduleA53.td b/lib/Target/AArch64/AArch64ScheduleA53.td index e288a24eb2c..20a14e79228 100644 --- a/lib/Target/AArch64/AArch64ScheduleA53.td +++ b/lib/Target/AArch64/AArch64ScheduleA53.td @@ -71,9 +71,18 @@ def : WriteRes { let Latency = 4; } // Div def : WriteRes { let Latency = 4; } -// Load +// Load - Note: Vector loads take 1-5 cycles to issue. For the WriteVecLd below, +// choosing the median of 3 which makes the latency 6. May model this more +// carefully in the future. def : WriteRes { let Latency = 4; } def : WriteRes { let Latency = 4; } +def : WriteRes { let Latency = 6; } + +// Store - Note: Vector stores take 1-3 cycles to issue. For the ReadVecSt below, +// choosing the median of 2 which makes the latency 5. May model this more +// carefully in the future. +def : WriteRes { let Latency = 4; } +def : WriteRes { let Latency = 5; } // Branch def : WriteRes; @@ -114,9 +123,14 @@ def : ReadAdvance; // No forwarding defined for ReadDiv yet. def : ReadAdvance; -// No forwarding defined for ReadLd, ReadPreLd yet. +// No forwarding defined for ReadLd, ReadPreLd, ReadVecLd yet. def : ReadAdvance; def : ReadAdvance; +def : ReadAdvance; + +// No forwarding defined for ReadSt and ReadVecSt yet. +def : ReadAdvance; +def : ReadAdvance; // No forwarding defined for ReadFPALU yet. def : ReadAdvance; diff --git a/test/CodeGen/AArch64/misched-basic-A53.ll b/test/CodeGen/AArch64/misched-basic-A53.ll index 0d5534eca54..1555c4868e1 100644 --- a/test/CodeGen/AArch64/misched-basic-A53.ll +++ b/test/CodeGen/AArch64/misched-basic-A53.ll @@ -4,13 +4,15 @@ ; The Cortex-A53 machine model will cause the MADD instruction to be scheduled ; much higher than the ADD instructions in order to hide latency. When not ; specifying a subtarget, the MADD will remain near the end of the block. +; +; CHECK: ********** MI Scheduling ********** ; CHECK: main ; CHECK: *** Final schedule for BB#2 *** ; CHECK: SU(13) ; CHECK: MADDwwww ; CHECK: SU(4) ; CHECK: ADDwwi_lsl0_s -; CHECK: ********** MI Scheduling ********** +; CHECK: ********** INTERVALS ********** @main.x = private unnamed_addr constant [8 x i32] [i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1], align 4 @main.y = private unnamed_addr constant [8 x i32] [i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2], align 4 @@ -76,6 +78,33 @@ for.end: ; preds = %for.cond ret i32 %add6 } + +; The Cortex-A53 machine model will cause the FDIVvvv_42 to be raised to +; hide latency. Whereas normally there would only be a single FADDvvv_4s +; after it, this test checks to make sure there are more than one. +; +; CHECK: ********** MI Scheduling ********** +; CHECK: neon4xfloat:BB#0 +; CHECK: *** Final schedule for BB#0 *** +; CHECK: FDIVvvv_4S +; CHECK: FADDvvv_4S +; CHECK: FADDvvv_4S +; CHECK: ********** INTERVALS ********** +define <4 x float> @neon4xfloat(<4 x float> %A, <4 x float> %B) { + %tmp1 = fadd <4 x float> %A, %B; + %tmp2 = fadd <4 x float> %A, %tmp1; + %tmp3 = fadd <4 x float> %A, %tmp2; + %tmp4 = fadd <4 x float> %A, %tmp3; + %tmp5 = fadd <4 x float> %A, %tmp4; + %tmp6 = fadd <4 x float> %A, %tmp5; + %tmp7 = fadd <4 x float> %A, %tmp6; + %tmp8 = fadd <4 x float> %A, %tmp7; + %tmp9 = fdiv <4 x float> %A, %B; + %tmp10 = fadd <4 x float> %tmp8, %tmp9; + + ret <4 x float> %tmp10 +} + ; Function Attrs: nounwind declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) #1 -- 2.34.1