From 0e55fd61ae9ab88cf76b30f7e69d168bd7be87d0 Mon Sep 17 00:00:00 2001 From: Evan Cheng Date: Thu, 30 Sep 2010 01:08:25 +0000 Subject: [PATCH] ARM instruction itinerary fixes: 1. Cortex-a9 8-bit and 16-bit loads / stores AGU cycles are 1 cycle longer than 32-bit ones. 2. Cortex-a9 is out-of-order so model all read cycles as cycle 1. 3. Lots of other random fixes for A8 and A9. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@115121 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMInstrInfo.td | 108 +++++++-------- lib/Target/ARM/ARMInstrThumb.td | 40 +++--- lib/Target/ARM/ARMInstrThumb2.td | 190 ++++++++++++++------------- lib/Target/ARM/ARMSchedule.td | 42 ++++-- lib/Target/ARM/ARMScheduleA8.td | 150 ++++++++++++++------- lib/Target/ARM/ARMScheduleA9.td | 217 +++++++++++++++++++------------ lib/Target/ARM/ARMScheduleV6.td | 43 ++++-- 7 files changed, 475 insertions(+), 315 deletions(-) diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index 97ac233fe6c..d8bc047b240 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -850,36 +850,36 @@ def PICADD : AXI1<0b0100, (outs GPR:$dst), (ins GPR:$a, pclabel:$cp, pred:$p), let AddedComplexity = 10 in { def PICLDR : AXI2ldw<(outs GPR:$dst), (ins addrmodepc:$addr, pred:$p), - Pseudo, IIC_iLoadr, "\n${addr:label}:\n\tldr$p\t$dst, $addr", + Pseudo, IIC_iLoad_r, "\n${addr:label}:\n\tldr$p\t$dst, $addr", [(set GPR:$dst, (load addrmodepc:$addr))]>; def PICLDRH : AXI3ldh<(outs GPR:$dst), (ins addrmodepc:$addr, pred:$p), - Pseudo, IIC_iLoadr, "\n${addr:label}:\n\tldrh${p}\t$dst, $addr", + Pseudo, IIC_iLoad_bh_r, "\n${addr:label}:\n\tldrh${p}\t$dst, $addr", [(set GPR:$dst, (zextloadi16 addrmodepc:$addr))]>; def PICLDRB : AXI2ldb<(outs GPR:$dst), (ins addrmodepc:$addr, pred:$p), - Pseudo, IIC_iLoadr, "\n${addr:label}:\n\tldrb${p}\t$dst, $addr", + Pseudo, IIC_iLoad_bh_r, "\n${addr:label}:\n\tldrb${p}\t$dst, $addr", [(set GPR:$dst, (zextloadi8 addrmodepc:$addr))]>; def PICLDRSH : AXI3ldsh<(outs GPR:$dst), (ins addrmodepc:$addr, pred:$p), - Pseudo, IIC_iLoadr, "\n${addr:label}:\n\tldrsh${p}\t$dst, $addr", + Pseudo, IIC_iLoad_bh_r, "\n${addr:label}:\n\tldrsh${p}\t$dst, $addr", [(set GPR:$dst, (sextloadi16 addrmodepc:$addr))]>; def PICLDRSB : AXI3ldsb<(outs GPR:$dst), (ins addrmodepc:$addr, pred:$p), - Pseudo, IIC_iLoadr, "\n${addr:label}:\n\tldrsb${p}\t$dst, $addr", + Pseudo, IIC_iLoad_bh_r, "\n${addr:label}:\n\tldrsb${p}\t$dst, $addr", [(set GPR:$dst, (sextloadi8 addrmodepc:$addr))]>; } let AddedComplexity = 10 in { def PICSTR : AXI2stw<(outs), (ins GPR:$src, addrmodepc:$addr, pred:$p), - Pseudo, IIC_iStorer, "\n${addr:label}:\n\tstr$p\t$src, $addr", + Pseudo, IIC_iStore_r, "\n${addr:label}:\n\tstr$p\t$src, $addr", [(store GPR:$src, addrmodepc:$addr)]>; def PICSTRH : AXI3sth<(outs), (ins GPR:$src, addrmodepc:$addr, pred:$p), - Pseudo, IIC_iStorer, "\n${addr:label}:\n\tstrh${p}\t$src, $addr", + Pseudo, IIC_iStore_bh_r, "\n${addr:label}:\n\tstrh${p}\t$src, $addr", [(truncstorei16 GPR:$src, addrmodepc:$addr)]>; def PICSTRB : AXI2stb<(outs), (ins GPR:$src, addrmodepc:$addr, pred:$p), - Pseudo, IIC_iStorer, "\n${addr:label}:\n\tstrb${p}\t$src, $addr", + Pseudo, IIC_iStore_bh_r, "\n${addr:label}:\n\tstrb${p}\t$src, $addr", [(truncstorei8 GPR:$src, addrmodepc:$addr)]>; } } // isNotDuplicable = 1 @@ -1235,90 +1235,90 @@ def RFE : ABXI<{1,0,0,?}, (outs), (ins addrmode4:$addr, GPR:$base), // Load let canFoldAsLoad = 1, isReMaterializable = 1 in -def LDR : AI2ldw<(outs GPR:$dst), (ins addrmode2:$addr), LdFrm, IIC_iLoadr, +def LDR : AI2ldw<(outs GPR:$dst), (ins addrmode2:$addr), LdFrm, IIC_iLoad_r, "ldr", "\t$dst, $addr", [(set GPR:$dst, (load addrmode2:$addr))]>; // Special LDR for loads from non-pc-relative constpools. let canFoldAsLoad = 1, mayLoad = 1, neverHasSideEffects = 1, isReMaterializable = 1 in -def LDRcp : AI2ldw<(outs GPR:$dst), (ins addrmode2:$addr), LdFrm, IIC_iLoadr, +def LDRcp : AI2ldw<(outs GPR:$dst), (ins addrmode2:$addr), LdFrm, IIC_iLoad_r, "ldr", "\t$dst, $addr", []>; // Loads with zero extension def LDRH : AI3ldh<(outs GPR:$dst), (ins addrmode3:$addr), LdMiscFrm, - IIC_iLoadr, "ldrh", "\t$dst, $addr", + IIC_iLoad_bh_r, "ldrh", "\t$dst, $addr", [(set GPR:$dst, (zextloadi16 addrmode3:$addr))]>; def LDRB : AI2ldb<(outs GPR:$dst), (ins addrmode2:$addr), LdFrm, - IIC_iLoadr, "ldrb", "\t$dst, $addr", + IIC_iLoad_bh_r, "ldrb", "\t$dst, $addr", [(set GPR:$dst, (zextloadi8 addrmode2:$addr))]>; // Loads with sign extension def LDRSH : AI3ldsh<(outs GPR:$dst), (ins addrmode3:$addr), LdMiscFrm, - IIC_iLoadr, "ldrsh", "\t$dst, $addr", + IIC_iLoad_bh_r, "ldrsh", "\t$dst, $addr", [(set GPR:$dst, (sextloadi16 addrmode3:$addr))]>; def LDRSB : AI3ldsb<(outs GPR:$dst), (ins addrmode3:$addr), LdMiscFrm, - IIC_iLoadr, "ldrsb", "\t$dst, $addr", + IIC_iLoad_bh_r, "ldrsb", "\t$dst, $addr", [(set GPR:$dst, (sextloadi8 addrmode3:$addr))]>; let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in { // Load doubleword def LDRD : AI3ldd<(outs GPR:$dst1, GPR:$dst2), (ins addrmode3:$addr), LdMiscFrm, - IIC_iLoadr, "ldrd", "\t$dst1, $addr", + IIC_iLoad_d_r, "ldrd", "\t$dst1, $addr", []>, Requires<[IsARM, HasV5TE]>; // Indexed loads def LDR_PRE : AI2ldwpr<(outs GPR:$dst, GPR:$base_wb), - (ins addrmode2:$addr), LdFrm, IIC_iLoadru, + (ins addrmode2:$addr), LdFrm, IIC_iLoad_ru, "ldr", "\t$dst, $addr!", "$addr.base = $base_wb", []>; def LDR_POST : AI2ldwpo<(outs GPR:$dst, GPR:$base_wb), - (ins GPR:$base, am2offset:$offset), LdFrm, IIC_iLoadru, + (ins GPR:$base, am2offset:$offset), LdFrm, IIC_iLoad_ru, "ldr", "\t$dst, [$base], $offset", "$base = $base_wb", []>; def LDRH_PRE : AI3ldhpr<(outs GPR:$dst, GPR:$base_wb), - (ins addrmode3:$addr), LdMiscFrm, IIC_iLoadru, + (ins addrmode3:$addr), LdMiscFrm, IIC_iLoad_bh_ru, "ldrh", "\t$dst, $addr!", "$addr.base = $base_wb", []>; def LDRH_POST : AI3ldhpo<(outs GPR:$dst, GPR:$base_wb), - (ins GPR:$base,am3offset:$offset), LdMiscFrm, IIC_iLoadru, + (ins GPR:$base,am3offset:$offset), LdMiscFrm, IIC_iLoad_bh_ru, "ldrh", "\t$dst, [$base], $offset", "$base = $base_wb", []>; def LDRB_PRE : AI2ldbpr<(outs GPR:$dst, GPR:$base_wb), - (ins addrmode2:$addr), LdFrm, IIC_iLoadru, + (ins addrmode2:$addr), LdFrm, IIC_iLoad_bh_ru, "ldrb", "\t$dst, $addr!", "$addr.base = $base_wb", []>; def LDRB_POST : AI2ldbpo<(outs GPR:$dst, GPR:$base_wb), - (ins GPR:$base,am2offset:$offset), LdFrm, IIC_iLoadru, + (ins GPR:$base,am2offset:$offset), LdFrm, IIC_iLoad_bh_ru, "ldrb", "\t$dst, [$base], $offset", "$base = $base_wb", []>; def LDRSH_PRE : AI3ldshpr<(outs GPR:$dst, GPR:$base_wb), - (ins addrmode3:$addr), LdMiscFrm, IIC_iLoadru, + (ins addrmode3:$addr), LdMiscFrm, IIC_iLoad_bh_ru, "ldrsh", "\t$dst, $addr!", "$addr.base = $base_wb", []>; def LDRSH_POST: AI3ldshpo<(outs GPR:$dst, GPR:$base_wb), - (ins GPR:$base,am3offset:$offset), LdMiscFrm, IIC_iLoadru, + (ins GPR:$base,am3offset:$offset), LdMiscFrm, IIC_iLoad_bh_ru, "ldrsh", "\t$dst, [$base], $offset", "$base = $base_wb", []>; def LDRSB_PRE : AI3ldsbpr<(outs GPR:$dst, GPR:$base_wb), - (ins addrmode3:$addr), LdMiscFrm, IIC_iLoadru, + (ins addrmode3:$addr), LdMiscFrm, IIC_iLoad_bh_ru, "ldrsb", "\t$dst, $addr!", "$addr.base = $base_wb", []>; def LDRSB_POST: AI3ldsbpo<(outs GPR:$dst, GPR:$base_wb), - (ins GPR:$base,am3offset:$offset), LdMiscFrm, IIC_iLoadru, + (ins GPR:$base,am3offset:$offset), LdMiscFrm, IIC_iLoad_ru, "ldrsb", "\t$dst, [$base], $offset", "$base = $base_wb", []>; // For disassembly only def LDRD_PRE : AI3lddpr<(outs GPR:$dst1, GPR:$dst2, GPR:$base_wb), - (ins addrmode3:$addr), LdMiscFrm, IIC_iLoadr, + (ins addrmode3:$addr), LdMiscFrm, IIC_iLoad_d_ru, "ldrd", "\t$dst1, $dst2, $addr!", "$addr.base = $base_wb", []>, Requires<[IsARM, HasV5TE]>; // For disassembly only def LDRD_POST : AI3lddpo<(outs GPR:$dst1, GPR:$dst2, GPR:$base_wb), - (ins GPR:$base,am3offset:$offset), LdMiscFrm, IIC_iLoadr, + (ins GPR:$base,am3offset:$offset), LdMiscFrm, IIC_iLoad_d_ru, "ldrd", "\t$dst1, $dst2, [$base], $offset", "$base = $base_wb", []>, Requires<[IsARM, HasV5TE]>; @@ -1327,94 +1327,94 @@ def LDRD_POST : AI3lddpo<(outs GPR:$dst1, GPR:$dst2, GPR:$base_wb), // LDRT, LDRBT, LDRSBT, LDRHT, LDRSHT are for disassembly only. def LDRT : AI2ldwpo<(outs GPR:$dst, GPR:$base_wb), - (ins GPR:$base, am2offset:$offset), LdFrm, IIC_iLoadru, + (ins GPR:$base, am2offset:$offset), LdFrm, IIC_iLoad_ru, "ldrt", "\t$dst, [$base], $offset", "$base = $base_wb", []> { let Inst{21} = 1; // overwrite } def LDRBT : AI2ldbpo<(outs GPR:$dst, GPR:$base_wb), - (ins GPR:$base,am2offset:$offset), LdFrm, IIC_iLoadru, + (ins GPR:$base,am2offset:$offset), LdFrm, IIC_iLoad_bh_ru, "ldrbt", "\t$dst, [$base], $offset", "$base = $base_wb", []> { let Inst{21} = 1; // overwrite } def LDRSBT : AI3ldsbpo<(outs GPR:$dst, GPR:$base_wb), - (ins GPR:$base,am3offset:$offset), LdMiscFrm, IIC_iLoadru, + (ins GPR:$base,am3offset:$offset), LdMiscFrm, IIC_iLoad_bh_ru, "ldrsbt", "\t$dst, [$base], $offset", "$base = $base_wb", []> { let Inst{21} = 1; // overwrite } def LDRHT : AI3ldhpo<(outs GPR:$dst, GPR:$base_wb), - (ins GPR:$base, am3offset:$offset), LdMiscFrm, IIC_iLoadru, + (ins GPR:$base, am3offset:$offset), LdMiscFrm, IIC_iLoad_bh_ru, "ldrht", "\t$dst, [$base], $offset", "$base = $base_wb", []> { let Inst{21} = 1; // overwrite } def LDRSHT : AI3ldshpo<(outs GPR:$dst, GPR:$base_wb), - (ins GPR:$base,am3offset:$offset), LdMiscFrm, IIC_iLoadru, + (ins GPR:$base,am3offset:$offset), LdMiscFrm, IIC_iLoad_bh_ru, "ldrsht", "\t$dst, [$base], $offset", "$base = $base_wb", []> { let Inst{21} = 1; // overwrite } // Store -def STR : AI2stw<(outs), (ins GPR:$src, addrmode2:$addr), StFrm, IIC_iStorer, +def STR : AI2stw<(outs), (ins GPR:$src, addrmode2:$addr), StFrm, IIC_iStore_r, "str", "\t$src, $addr", [(store GPR:$src, addrmode2:$addr)]>; // Stores with truncate def STRH : AI3sth<(outs), (ins GPR:$src, addrmode3:$addr), StMiscFrm, - IIC_iStorer, "strh", "\t$src, $addr", + IIC_iStore_bh_r, "strh", "\t$src, $addr", [(truncstorei16 GPR:$src, addrmode3:$addr)]>; -def STRB : AI2stb<(outs), (ins GPR:$src, addrmode2:$addr), StFrm, IIC_iStorer, - "strb", "\t$src, $addr", +def STRB : AI2stb<(outs), (ins GPR:$src, addrmode2:$addr), StFrm, + IIC_iStore_bh_r, "strb", "\t$src, $addr", [(truncstorei8 GPR:$src, addrmode2:$addr)]>; // Store doubleword let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in def STRD : AI3std<(outs), (ins GPR:$src1, GPR:$src2, addrmode3:$addr), - StMiscFrm, IIC_iStorer, + StMiscFrm, IIC_iStore_d_r, "strd", "\t$src1, $addr", []>, Requires<[IsARM, HasV5TE]>; // Indexed stores def STR_PRE : AI2stwpr<(outs GPR:$base_wb), (ins GPR:$src, GPR:$base, am2offset:$offset), - StFrm, IIC_iStoreru, + StFrm, IIC_iStore_ru, "str", "\t$src, [$base, $offset]!", "$base = $base_wb", [(set GPR:$base_wb, (pre_store GPR:$src, GPR:$base, am2offset:$offset))]>; def STR_POST : AI2stwpo<(outs GPR:$base_wb), (ins GPR:$src, GPR:$base,am2offset:$offset), - StFrm, IIC_iStoreru, + StFrm, IIC_iStore_ru, "str", "\t$src, [$base], $offset", "$base = $base_wb", [(set GPR:$base_wb, (post_store GPR:$src, GPR:$base, am2offset:$offset))]>; def STRH_PRE : AI3sthpr<(outs GPR:$base_wb), (ins GPR:$src, GPR:$base,am3offset:$offset), - StMiscFrm, IIC_iStoreru, + StMiscFrm, IIC_iStore_ru, "strh", "\t$src, [$base, $offset]!", "$base = $base_wb", [(set GPR:$base_wb, (pre_truncsti16 GPR:$src, GPR:$base,am3offset:$offset))]>; def STRH_POST: AI3sthpo<(outs GPR:$base_wb), (ins GPR:$src, GPR:$base,am3offset:$offset), - StMiscFrm, IIC_iStoreru, + StMiscFrm, IIC_iStore_bh_ru, "strh", "\t$src, [$base], $offset", "$base = $base_wb", [(set GPR:$base_wb, (post_truncsti16 GPR:$src, GPR:$base, am3offset:$offset))]>; def STRB_PRE : AI2stbpr<(outs GPR:$base_wb), (ins GPR:$src, GPR:$base,am2offset:$offset), - StFrm, IIC_iStoreru, + StFrm, IIC_iStore_bh_ru, "strb", "\t$src, [$base, $offset]!", "$base = $base_wb", [(set GPR:$base_wb, (pre_truncsti8 GPR:$src, GPR:$base, am2offset:$offset))]>; def STRB_POST: AI2stbpo<(outs GPR:$base_wb), (ins GPR:$src, GPR:$base,am2offset:$offset), - StFrm, IIC_iStoreru, + StFrm, IIC_iStore_bh_ru, "strb", "\t$src, [$base], $offset", "$base = $base_wb", [(set GPR:$base_wb, (post_truncsti8 GPR:$src, GPR:$base, am2offset:$offset))]>; @@ -1422,14 +1422,14 @@ def STRB_POST: AI2stbpo<(outs GPR:$base_wb), // For disassembly only def STRD_PRE : AI3stdpr<(outs GPR:$base_wb), (ins GPR:$src1, GPR:$src2, GPR:$base, am3offset:$offset), - StMiscFrm, IIC_iStoreru, + StMiscFrm, IIC_iStore_d_ru, "strd", "\t$src1, $src2, [$base, $offset]!", "$base = $base_wb", []>; // For disassembly only def STRD_POST: AI3stdpo<(outs GPR:$base_wb), (ins GPR:$src1, GPR:$src2, GPR:$base, am3offset:$offset), - StMiscFrm, IIC_iStoreru, + StMiscFrm, IIC_iStore_d_ru, "strd", "\t$src1, $src2, [$base], $offset", "$base = $base_wb", []>; @@ -1437,7 +1437,7 @@ def STRD_POST: AI3stdpo<(outs GPR:$base_wb), def STRT : AI2stwpo<(outs GPR:$base_wb), (ins GPR:$src, GPR:$base,am2offset:$offset), - StFrm, IIC_iStoreru, + StFrm, IIC_iStore_ru, "strt", "\t$src, [$base], $offset", "$base = $base_wb", [/* For disassembly only; pattern left blank */]> { let Inst{21} = 1; // overwrite @@ -1445,7 +1445,7 @@ def STRT : AI2stwpo<(outs GPR:$base_wb), def STRBT : AI2stbpo<(outs GPR:$base_wb), (ins GPR:$src, GPR:$base,am2offset:$offset), - StFrm, IIC_iStoreru, + StFrm, IIC_iStore_bh_ru, "strbt", "\t$src, [$base], $offset", "$base = $base_wb", [/* For disassembly only; pattern left blank */]> { let Inst{21} = 1; // overwrite @@ -1453,7 +1453,7 @@ def STRBT : AI2stbpo<(outs GPR:$base_wb), def STRHT: AI3sthpo<(outs GPR:$base_wb), (ins GPR:$src, GPR:$base,am3offset:$offset), - StMiscFrm, IIC_iStoreru, + StMiscFrm, IIC_iStore_bh_ru, "strht", "\t$src, [$base], $offset", "$base = $base_wb", [/* For disassembly only; pattern left blank */]> { let Inst{21} = 1; // overwrite @@ -1616,7 +1616,7 @@ defm UXTAB16 : AI_exta_rrot_np<0b01101100, "uxtab16">; def SBFX : I<(outs GPR:$dst), (ins GPR:$src, imm0_31:$lsb, imm0_31:$width), - AddrMode1, Size4Bytes, IndexModeNone, DPFrm, IIC_iBITi, + AddrMode1, Size4Bytes, IndexModeNone, DPFrm, IIC_iUNAsi, "sbfx", "\t$dst, $src, $lsb, $width", "", []>, Requires<[IsARM, HasV6T2]> { let Inst{27-21} = 0b0111101; @@ -1625,7 +1625,7 @@ def SBFX : I<(outs GPR:$dst), def UBFX : I<(outs GPR:$dst), (ins GPR:$src, imm0_31:$lsb, imm0_31:$width), - AddrMode1, Size4Bytes, IndexModeNone, DPFrm, IIC_iBITi, + AddrMode1, Size4Bytes, IndexModeNone, DPFrm, IIC_iUNAsi, "ubfx", "\t$dst, $src, $lsb, $width", "", []>, Requires<[IsARM, HasV6T2]> { let Inst{27-21} = 0b0111111; @@ -2024,7 +2024,7 @@ def SMMLSR : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), multiclass AI_smul { def BB : AMulxyI<0b0001011, (outs GPR:$dst), (ins GPR:$a, GPR:$b), - IIC_iMUL32, !strconcat(opc, "bb"), "\t$dst, $a, $b", + IIC_iMUL16, !strconcat(opc, "bb"), "\t$dst, $a, $b", [(set GPR:$dst, (opnode (sext_inreg GPR:$a, i16), (sext_inreg GPR:$b, i16)))]>, Requires<[IsARM, HasV5TE]> { @@ -2033,7 +2033,7 @@ multiclass AI_smul { } def BT : AMulxyI<0b0001011, (outs GPR:$dst), (ins GPR:$a, GPR:$b), - IIC_iMUL32, !strconcat(opc, "bt"), "\t$dst, $a, $b", + IIC_iMUL16, !strconcat(opc, "bt"), "\t$dst, $a, $b", [(set GPR:$dst, (opnode (sext_inreg GPR:$a, i16), (sra GPR:$b, (i32 16))))]>, Requires<[IsARM, HasV5TE]> { @@ -2042,7 +2042,7 @@ multiclass AI_smul { } def TB : AMulxyI<0b0001011, (outs GPR:$dst), (ins GPR:$a, GPR:$b), - IIC_iMUL32, !strconcat(opc, "tb"), "\t$dst, $a, $b", + IIC_iMUL16, !strconcat(opc, "tb"), "\t$dst, $a, $b", [(set GPR:$dst, (opnode (sra GPR:$a, (i32 16)), (sext_inreg GPR:$b, i16)))]>, Requires<[IsARM, HasV5TE]> { @@ -2051,7 +2051,7 @@ multiclass AI_smul { } def TT : AMulxyI<0b0001011, (outs GPR:$dst), (ins GPR:$a, GPR:$b), - IIC_iMUL32, !strconcat(opc, "tt"), "\t$dst, $a, $b", + IIC_iMUL16, !strconcat(opc, "tt"), "\t$dst, $a, $b", [(set GPR:$dst, (opnode (sra GPR:$a, (i32 16)), (sra GPR:$b, (i32 16))))]>, Requires<[IsARM, HasV5TE]> { diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td index 15280139bcb..7303046433e 100644 --- a/lib/Target/ARM/ARMInstrThumb.td +++ b/lib/Target/ARM/ARMInstrThumb.td @@ -423,47 +423,47 @@ def tTRAP : TI<(outs), (ins), IIC_Br, // let canFoldAsLoad = 1, isReMaterializable = 1 in -def tLDR : T1pI4<(outs tGPR:$dst), (ins t_addrmode_s4:$addr), IIC_iLoadr, +def tLDR : T1pI4<(outs tGPR:$dst), (ins t_addrmode_s4:$addr), IIC_iLoad_r, "ldr", "\t$dst, $addr", [(set tGPR:$dst, (load t_addrmode_s4:$addr))]>, T1LdSt<0b100>; -def tLDRi: T1pI4<(outs tGPR:$dst), (ins t_addrmode_s4:$addr), IIC_iLoadr, +def tLDRi: T1pI4<(outs tGPR:$dst), (ins t_addrmode_s4:$addr), IIC_iLoad_r, "ldr", "\t$dst, $addr", []>, T1LdSt4Imm<{1,?,?}>; -def tLDRB : T1pI1<(outs tGPR:$dst), (ins t_addrmode_s1:$addr), IIC_iLoadr, +def tLDRB : T1pI1<(outs tGPR:$dst), (ins t_addrmode_s1:$addr), IIC_iLoad_bh_r, "ldrb", "\t$dst, $addr", [(set tGPR:$dst, (zextloadi8 t_addrmode_s1:$addr))]>, T1LdSt<0b110>; -def tLDRBi: T1pI1<(outs tGPR:$dst), (ins t_addrmode_s1:$addr), IIC_iLoadr, +def tLDRBi: T1pI1<(outs tGPR:$dst), (ins t_addrmode_s1:$addr), IIC_iLoad_bh_r, "ldrb", "\t$dst, $addr", []>, T1LdSt1Imm<{1,?,?}>; -def tLDRH : T1pI2<(outs tGPR:$dst), (ins t_addrmode_s2:$addr), IIC_iLoadr, +def tLDRH : T1pI2<(outs tGPR:$dst), (ins t_addrmode_s2:$addr), IIC_iLoad_bh_r, "ldrh", "\t$dst, $addr", [(set tGPR:$dst, (zextloadi16 t_addrmode_s2:$addr))]>, T1LdSt<0b101>; -def tLDRHi: T1pI2<(outs tGPR:$dst), (ins t_addrmode_s2:$addr), IIC_iLoadr, +def tLDRHi: T1pI2<(outs tGPR:$dst), (ins t_addrmode_s2:$addr), IIC_iLoad_bh_r, "ldrh", "\t$dst, $addr", []>, T1LdSt2Imm<{1,?,?}>; let AddedComplexity = 10 in -def tLDRSB : T1pI1<(outs tGPR:$dst), (ins t_addrmode_rr:$addr), IIC_iLoadr, +def tLDRSB : T1pI1<(outs tGPR:$dst), (ins t_addrmode_rr:$addr), IIC_iLoad_bh_r, "ldrsb", "\t$dst, $addr", [(set tGPR:$dst, (sextloadi8 t_addrmode_rr:$addr))]>, T1LdSt<0b011>; let AddedComplexity = 10 in -def tLDRSH : T1pI2<(outs tGPR:$dst), (ins t_addrmode_rr:$addr), IIC_iLoadr, +def tLDRSH : T1pI2<(outs tGPR:$dst), (ins t_addrmode_rr:$addr), IIC_iLoad_bh_r, "ldrsh", "\t$dst, $addr", [(set tGPR:$dst, (sextloadi16 t_addrmode_rr:$addr))]>, T1LdSt<0b111>; let canFoldAsLoad = 1 in -def tLDRspi : T1pIs<(outs tGPR:$dst), (ins t_addrmode_sp:$addr), IIC_iLoadi, +def tLDRspi : T1pIs<(outs tGPR:$dst), (ins t_addrmode_sp:$addr), IIC_iLoad_i, "ldr", "\t$dst, $addr", [(set tGPR:$dst, (load t_addrmode_sp:$addr))]>, T1LdStSP<{1,?,?}>; @@ -471,14 +471,14 @@ def tLDRspi : T1pIs<(outs tGPR:$dst), (ins t_addrmode_sp:$addr), IIC_iLoadi, // Special instruction for restore. It cannot clobber condition register // when it's expanded by eliminateCallFramePseudoInstr(). let canFoldAsLoad = 1, mayLoad = 1, neverHasSideEffects = 1 in -def tRestore : T1pIs<(outs tGPR:$dst), (ins t_addrmode_sp:$addr), IIC_iLoadi, +def tRestore : T1pIs<(outs tGPR:$dst), (ins t_addrmode_sp:$addr), IIC_iLoad_i, "ldr", "\t$dst, $addr", []>, T1LdStSP<{1,?,?}>; // Load tconstpool // FIXME: Use ldr.n to work around a Darwin assembler bug. let canFoldAsLoad = 1, isReMaterializable = 1 in -def tLDRpci : T1pIs<(outs tGPR:$dst), (ins i32imm:$addr), IIC_iLoadi, +def tLDRpci : T1pIs<(outs tGPR:$dst), (ins i32imm:$addr), IIC_iLoad_i, "ldr", ".n\t$dst, $addr", [(set tGPR:$dst, (load (ARMWrapper tconstpool:$addr)))]>, T1Encoding<{0,1,0,0,1,?}>; // A6.2 & A8.6.59 @@ -486,38 +486,38 @@ def tLDRpci : T1pIs<(outs tGPR:$dst), (ins i32imm:$addr), IIC_iLoadi, // Special LDR for loads from non-pc-relative constpools. let canFoldAsLoad = 1, mayLoad = 1, neverHasSideEffects = 1, isReMaterializable = 1 in -def tLDRcp : T1pIs<(outs tGPR:$dst), (ins i32imm:$addr), IIC_iLoadi, +def tLDRcp : T1pIs<(outs tGPR:$dst), (ins i32imm:$addr), IIC_iLoad_i, "ldr", "\t$dst, $addr", []>, T1LdStSP<{1,?,?}>; -def tSTR : T1pI4<(outs), (ins tGPR:$src, t_addrmode_s4:$addr), IIC_iStorer, +def tSTR : T1pI4<(outs), (ins tGPR:$src, t_addrmode_s4:$addr), IIC_iStore_r, "str", "\t$src, $addr", [(store tGPR:$src, t_addrmode_s4:$addr)]>, T1LdSt<0b000>; -def tSTRi: T1pI4<(outs), (ins tGPR:$src, t_addrmode_s4:$addr), IIC_iStorer, +def tSTRi: T1pI4<(outs), (ins tGPR:$src, t_addrmode_s4:$addr), IIC_iStore_r, "str", "\t$src, $addr", []>, T1LdSt4Imm<{0,?,?}>; -def tSTRB : T1pI1<(outs), (ins tGPR:$src, t_addrmode_s1:$addr), IIC_iStorer, +def tSTRB : T1pI1<(outs), (ins tGPR:$src, t_addrmode_s1:$addr), IIC_iStore_bh_r, "strb", "\t$src, $addr", [(truncstorei8 tGPR:$src, t_addrmode_s1:$addr)]>, T1LdSt<0b010>; -def tSTRBi: T1pI1<(outs), (ins tGPR:$src, t_addrmode_s1:$addr), IIC_iStorer, +def tSTRBi: T1pI1<(outs), (ins tGPR:$src, t_addrmode_s1:$addr), IIC_iStore_bh_r, "strb", "\t$src, $addr", []>, T1LdSt1Imm<{0,?,?}>; -def tSTRH : T1pI2<(outs), (ins tGPR:$src, t_addrmode_s2:$addr), IIC_iStorer, +def tSTRH : T1pI2<(outs), (ins tGPR:$src, t_addrmode_s2:$addr), IIC_iStore_bh_r, "strh", "\t$src, $addr", [(truncstorei16 tGPR:$src, t_addrmode_s2:$addr)]>, T1LdSt<0b001>; -def tSTRHi: T1pI2<(outs), (ins tGPR:$src, t_addrmode_s2:$addr), IIC_iStorer, +def tSTRHi: T1pI2<(outs), (ins tGPR:$src, t_addrmode_s2:$addr), IIC_iStore_bh_r, "strh", "\t$src, $addr", []>, T1LdSt2Imm<{0,?,?}>; -def tSTRspi : T1pIs<(outs), (ins tGPR:$src, t_addrmode_sp:$addr), IIC_iStorei, +def tSTRspi : T1pIs<(outs), (ins tGPR:$src, t_addrmode_sp:$addr), IIC_iStore_i, "str", "\t$src, $addr", [(store tGPR:$src, t_addrmode_sp:$addr)]>, T1LdStSP<{0,?,?}>; @@ -525,7 +525,7 @@ def tSTRspi : T1pIs<(outs), (ins tGPR:$src, t_addrmode_sp:$addr), IIC_iStorei, let mayStore = 1, neverHasSideEffects = 1 in { // Special instruction for spill. It cannot clobber condition register // when it's expanded by eliminateCallFramePseudoInstr(). -def tSpill : T1pIs<(outs), (ins tGPR:$src, t_addrmode_sp:$addr), IIC_iStorei, +def tSpill : T1pIs<(outs), (ins tGPR:$src, t_addrmode_sp:$addr), IIC_iStore_i, "str", "\t$src, $addr", []>, T1LdStSP<{0,?,?}>; } diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index 25eca70d38f..66d1329e257 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -573,8 +573,9 @@ multiclass T2I_cmp_irs opcod, string opc, } /// T2I_ld - Defines a set of (op r, {imm12|imm8|so_reg}) load patterns. -multiclass T2I_ld opcod, string opc, PatFrag opnode> { - def i12 : T2Ii12<(outs GPR:$dst), (ins t2addrmode_imm12:$addr), IIC_iLoadi, +multiclass T2I_ld opcod, string opc, + InstrItinClass iii, InstrItinClass iir, PatFrag opnode> { + def i12 : T2Ii12<(outs GPR:$dst), (ins t2addrmode_imm12:$addr), iii, opc, ".w\t$dst, $addr", [(set GPR:$dst, (opnode t2addrmode_imm12:$addr))]> { let Inst{31-27} = 0b11111; @@ -584,7 +585,7 @@ multiclass T2I_ld opcod, string opc, PatFrag opnode> { let Inst{22-21} = opcod; let Inst{20} = 1; // load } - def i8 : T2Ii8 <(outs GPR:$dst), (ins t2addrmode_imm8:$addr), IIC_iLoadi, + def i8 : T2Ii8 <(outs GPR:$dst), (ins t2addrmode_imm8:$addr), iii, opc, "\t$dst, $addr", [(set GPR:$dst, (opnode t2addrmode_imm8:$addr))]> { let Inst{31-27} = 0b11111; @@ -598,7 +599,7 @@ multiclass T2I_ld opcod, string opc, PatFrag opnode> { let Inst{10} = 1; // The P bit. let Inst{8} = 0; // The W bit. } - def s : T2Iso <(outs GPR:$dst), (ins t2addrmode_so_reg:$addr), IIC_iLoadr, + def s : T2Iso <(outs GPR:$dst), (ins t2addrmode_so_reg:$addr), iir, opc, ".w\t$dst, $addr", [(set GPR:$dst, (opnode t2addrmode_so_reg:$addr))]> { let Inst{31-27} = 0b11111; @@ -609,7 +610,7 @@ multiclass T2I_ld opcod, string opc, PatFrag opnode> { let Inst{20} = 1; // load let Inst{11-6} = 0b000000; } - def pci : T2Ipc <(outs GPR:$dst), (ins i32imm:$addr), IIC_iLoadi, + def pci : T2Ipc <(outs GPR:$dst), (ins i32imm:$addr), iii, opc, ".w\t$dst, $addr", [(set GPR:$dst, (opnode (ARMWrapper tconstpool:$addr)))]> { let isReMaterializable = 1; @@ -624,8 +625,9 @@ multiclass T2I_ld opcod, string opc, PatFrag opnode> { } /// T2I_st - Defines a set of (op r, {imm12|imm8|so_reg}) store patterns. -multiclass T2I_st opcod, string opc, PatFrag opnode> { - def i12 : T2Ii12<(outs), (ins GPR:$src, t2addrmode_imm12:$addr), IIC_iStorei, +multiclass T2I_st opcod, string opc, + InstrItinClass iii, InstrItinClass iir, PatFrag opnode> { + def i12 : T2Ii12<(outs), (ins GPR:$src, t2addrmode_imm12:$addr), iii, opc, ".w\t$src, $addr", [(opnode GPR:$src, t2addrmode_imm12:$addr)]> { let Inst{31-27} = 0b11111; @@ -633,7 +635,7 @@ multiclass T2I_st opcod, string opc, PatFrag opnode> { let Inst{22-21} = opcod; let Inst{20} = 0; // !load } - def i8 : T2Ii8 <(outs), (ins GPR:$src, t2addrmode_imm8:$addr), IIC_iStorei, + def i8 : T2Ii8 <(outs), (ins GPR:$src, t2addrmode_imm8:$addr), iii, opc, "\t$src, $addr", [(opnode GPR:$src, t2addrmode_imm8:$addr)]> { let Inst{31-27} = 0b11111; @@ -645,7 +647,7 @@ multiclass T2I_st opcod, string opc, PatFrag opnode> { let Inst{10} = 1; // The P bit. let Inst{8} = 0; // The W bit. } - def s : T2Iso <(outs), (ins GPR:$src, t2addrmode_so_reg:$addr), IIC_iStorer, + def s : T2Iso <(outs), (ins GPR:$src, t2addrmode_so_reg:$addr), iir, opc, ".w\t$src, $addr", [(opnode GPR:$src, t2addrmode_so_reg:$addr)]> { let Inst{31-27} = 0b11111; @@ -656,10 +658,10 @@ multiclass T2I_st opcod, string opc, PatFrag opnode> { } } -/// T2I_unary_rrot - A unary operation with two forms: one whose operand is a +/// T2I_ext_rrot - A unary operation with two forms: one whose operand is a /// register and one whose operand is a register rotated by 8/16/24. -multiclass T2I_unary_rrot opcod, string opc, PatFrag opnode> { - def r : T2I<(outs rGPR:$dst), (ins rGPR:$src), IIC_iUNAr, +multiclass T2I_ext_rrot opcod, string opc, PatFrag opnode> { + def r : T2I<(outs rGPR:$dst), (ins rGPR:$src), IIC_iEXTr, opc, ".w\t$dst, $src", [(set rGPR:$dst, (opnode rGPR:$src))]> { let Inst{31-27} = 0b11111; @@ -670,7 +672,7 @@ multiclass T2I_unary_rrot opcod, string opc, PatFrag opnode> { let Inst{7} = 1; let Inst{5-4} = 0b00; // rotate } - def r_rot : T2I<(outs rGPR:$dst), (ins rGPR:$src, i32imm:$rot), IIC_iUNAsi, + def r_rot : T2I<(outs rGPR:$dst), (ins rGPR:$src, i32imm:$rot), IIC_iEXTr, opc, ".w\t$dst, $src, ror $rot", [(set rGPR:$dst, (opnode (rotr rGPR:$src, rot_imm:$rot)))]> { let Inst{31-27} = 0b11111; @@ -684,8 +686,8 @@ multiclass T2I_unary_rrot opcod, string opc, PatFrag opnode> { } // UXTB16 - Requres T2ExtractPack, does not need the .w qualifier. -multiclass T2I_unary_rrot_uxtb16 opcod, string opc, PatFrag opnode> { - def r : T2I<(outs rGPR:$dst), (ins rGPR:$src), IIC_iUNAr, +multiclass T2I_ext_rrot_uxtb16 opcod, string opc, PatFrag opnode> { + def r : T2I<(outs rGPR:$dst), (ins rGPR:$src), IIC_iEXTr, opc, "\t$dst, $src", [(set rGPR:$dst, (opnode rGPR:$src))]>, Requires<[HasT2ExtractPack]> { @@ -697,7 +699,7 @@ multiclass T2I_unary_rrot_uxtb16 opcod, string opc, PatFrag opnode> { let Inst{7} = 1; let Inst{5-4} = 0b00; // rotate } - def r_rot : T2I<(outs rGPR:$dst), (ins rGPR:$src, i32imm:$rot), IIC_iUNAsi, + def r_rot : T2I<(outs rGPR:$dst), (ins rGPR:$src, i32imm:$rot), IIC_iEXTr, opc, "\t$dst, $src, ror $rot", [(set rGPR:$dst, (opnode (rotr rGPR:$src, rot_imm:$rot)))]>, Requires<[HasT2ExtractPack]> { @@ -713,8 +715,8 @@ multiclass T2I_unary_rrot_uxtb16 opcod, string opc, PatFrag opnode> { // SXTB16 - Requres T2ExtractPack, does not need the .w qualifier, no pattern // supported yet. -multiclass T2I_unary_rrot_sxtb16 opcod, string opc> { - def r : T2I<(outs rGPR:$dst), (ins rGPR:$src), IIC_iUNAr, +multiclass T2I_ext_rrot_sxtb16 opcod, string opc> { + def r : T2I<(outs rGPR:$dst), (ins rGPR:$src), IIC_iEXTr, opc, "\t$dst, $src", []> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0100; @@ -724,7 +726,7 @@ multiclass T2I_unary_rrot_sxtb16 opcod, string opc> { let Inst{7} = 1; let Inst{5-4} = 0b00; // rotate } - def r_rot : T2I<(outs rGPR:$dst), (ins rGPR:$src, i32imm:$rot), IIC_iUNAsi, + def r_rot : T2I<(outs rGPR:$dst), (ins rGPR:$src, i32imm:$rot), IIC_iEXTr, opc, "\t$dst, $src, ror $rot", []> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0100; @@ -736,10 +738,10 @@ multiclass T2I_unary_rrot_sxtb16 opcod, string opc> { } } -/// T2I_bin_rrot - A binary operation with two forms: one whose operand is a +/// T2I_exta_rrot - A binary operation with two forms: one whose operand is a /// register and one whose operand is a register rotated by 8/16/24. -multiclass T2I_bin_rrot opcod, string opc, PatFrag opnode> { - def rr : T2I<(outs rGPR:$dst), (ins rGPR:$LHS, rGPR:$RHS), IIC_iALUr, +multiclass T2I_exta_rrot opcod, string opc, PatFrag opnode> { + def rr : T2I<(outs rGPR:$dst), (ins rGPR:$LHS, rGPR:$RHS), IIC_iEXTAr, opc, "\t$dst, $LHS, $RHS", [(set rGPR:$dst, (opnode rGPR:$LHS, rGPR:$RHS))]>, Requires<[HasT2ExtractPack]> { @@ -751,7 +753,7 @@ multiclass T2I_bin_rrot opcod, string opc, PatFrag opnode> { let Inst{5-4} = 0b00; // rotate } def rr_rot : T2I<(outs rGPR:$dst), (ins rGPR:$LHS, rGPR:$RHS, i32imm:$rot), - IIC_iALUsr, opc, "\t$dst, $LHS, $RHS, ror $rot", + IIC_iEXTAsr, opc, "\t$dst, $LHS, $RHS, ror $rot", [(set rGPR:$dst, (opnode rGPR:$LHS, (rotr rGPR:$RHS, rot_imm:$rot)))]>, Requires<[HasT2ExtractPack]> { @@ -766,7 +768,7 @@ multiclass T2I_bin_rrot opcod, string opc, PatFrag opnode> { // DO variant - disassembly only, no pattern -multiclass T2I_bin_rrot_DO opcod, string opc> { +multiclass T2I_exta_rrot_DO opcod, string opc> { def rr : T2I<(outs rGPR:$dst), (ins rGPR:$LHS, rGPR:$RHS), IIC_iEXTAr, opc, "\t$dst, $LHS, $RHS", []> { let Inst{31-27} = 0b11111; @@ -914,23 +916,28 @@ def t2UDIV : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iALUi, // Load let canFoldAsLoad = 1, isReMaterializable = 1 in -defm t2LDR : T2I_ld<0, 0b10, "ldr", UnOpFrag<(load node:$Src)>>; +defm t2LDR : T2I_ld<0, 0b10, "ldr", IIC_iLoad_i, IIC_iLoad_r, + UnOpFrag<(load node:$Src)>>; // Loads with zero extension -defm t2LDRH : T2I_ld<0, 0b01, "ldrh", UnOpFrag<(zextloadi16 node:$Src)>>; -defm t2LDRB : T2I_ld<0, 0b00, "ldrb", UnOpFrag<(zextloadi8 node:$Src)>>; +defm t2LDRH : T2I_ld<0, 0b01, "ldrh", IIC_iLoad_bh_i, IIC_iLoad_bh_r, + UnOpFrag<(zextloadi16 node:$Src)>>; +defm t2LDRB : T2I_ld<0, 0b00, "ldrb", IIC_iLoad_bh_i, IIC_iLoad_bh_r, + UnOpFrag<(zextloadi8 node:$Src)>>; // Loads with sign extension -defm t2LDRSH : T2I_ld<1, 0b01, "ldrsh", UnOpFrag<(sextloadi16 node:$Src)>>; -defm t2LDRSB : T2I_ld<1, 0b00, "ldrsb", UnOpFrag<(sextloadi8 node:$Src)>>; +defm t2LDRSH : T2I_ld<1, 0b01, "ldrsh", IIC_iLoad_bh_i, IIC_iLoad_bh_r, + UnOpFrag<(sextloadi16 node:$Src)>>; +defm t2LDRSB : T2I_ld<1, 0b00, "ldrsb", IIC_iLoad_bh_i, IIC_iLoad_bh_r, + UnOpFrag<(sextloadi8 node:$Src)>>; let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in { // Load doubleword def t2LDRDi8 : T2Ii8s4<1, 0, 1, (outs rGPR:$dst1, rGPR:$dst2), (ins t2addrmode_imm8s4:$addr), - IIC_iLoadi, "ldrd", "\t$dst1, $addr", []>; + IIC_iLoad_d_i, "ldrd", "\t$dst1, $addr", []>; def t2LDRDpci : T2Ii8s4<1, 0, 1, (outs rGPR:$dst1, rGPR:$dst2), - (ins i32imm:$addr), IIC_iLoadi, + (ins i32imm:$addr), IIC_iLoad_d_i, "ldrd", "\t$dst1, $addr", []> { let Inst{19-16} = 0b1111; // Rn } @@ -985,57 +992,57 @@ def : T2Pat<(extloadi16 (ARMWrapper tconstpool:$addr)), let mayLoad = 1, neverHasSideEffects = 1 in { def t2LDR_PRE : T2Iidxldst<0, 0b10, 1, 1, (outs GPR:$dst, GPR:$base_wb), (ins t2addrmode_imm8:$addr), - AddrModeT2_i8, IndexModePre, IIC_iLoadiu, + AddrModeT2_i8, IndexModePre, IIC_iLoad_iu, "ldr", "\t$dst, $addr!", "$addr.base = $base_wb", []>; def t2LDR_POST : T2Iidxldst<0, 0b10, 1, 0, (outs GPR:$dst, GPR:$base_wb), (ins GPR:$base, t2am_imm8_offset:$offset), - AddrModeT2_i8, IndexModePost, IIC_iLoadiu, + AddrModeT2_i8, IndexModePost, IIC_iLoad_iu, "ldr", "\t$dst, [$base], $offset", "$base = $base_wb", []>; def t2LDRB_PRE : T2Iidxldst<0, 0b00, 1, 1, (outs GPR:$dst, GPR:$base_wb), (ins t2addrmode_imm8:$addr), - AddrModeT2_i8, IndexModePre, IIC_iLoadiu, + AddrModeT2_i8, IndexModePre, IIC_iLoad_bh_iu, "ldrb", "\t$dst, $addr!", "$addr.base = $base_wb", []>; def t2LDRB_POST : T2Iidxldst<0, 0b00, 1, 0, (outs GPR:$dst, GPR:$base_wb), (ins GPR:$base, t2am_imm8_offset:$offset), - AddrModeT2_i8, IndexModePost, IIC_iLoadiu, + AddrModeT2_i8, IndexModePost, IIC_iLoad_bh_iu, "ldrb", "\t$dst, [$base], $offset", "$base = $base_wb", []>; def t2LDRH_PRE : T2Iidxldst<0, 0b01, 1, 1, (outs GPR:$dst, GPR:$base_wb), (ins t2addrmode_imm8:$addr), - AddrModeT2_i8, IndexModePre, IIC_iLoadiu, + AddrModeT2_i8, IndexModePre, IIC_iLoad_bh_iu, "ldrh", "\t$dst, $addr!", "$addr.base = $base_wb", []>; def t2LDRH_POST : T2Iidxldst<0, 0b01, 1, 0, (outs GPR:$dst, GPR:$base_wb), (ins GPR:$base, t2am_imm8_offset:$offset), - AddrModeT2_i8, IndexModePost, IIC_iLoadiu, + AddrModeT2_i8, IndexModePost, IIC_iLoad_bh_iu, "ldrh", "\t$dst, [$base], $offset", "$base = $base_wb", []>; def t2LDRSB_PRE : T2Iidxldst<1, 0b00, 1, 1, (outs GPR:$dst, GPR:$base_wb), (ins t2addrmode_imm8:$addr), - AddrModeT2_i8, IndexModePre, IIC_iLoadiu, + AddrModeT2_i8, IndexModePre, IIC_iLoad_bh_iu, "ldrsb", "\t$dst, $addr!", "$addr.base = $base_wb", []>; def t2LDRSB_POST : T2Iidxldst<1, 0b00, 1, 0, (outs GPR:$dst, GPR:$base_wb), (ins GPR:$base, t2am_imm8_offset:$offset), - AddrModeT2_i8, IndexModePost, IIC_iLoadiu, + AddrModeT2_i8, IndexModePost, IIC_iLoad_bh_iu, "ldrsb", "\t$dst, [$base], $offset", "$base = $base_wb", []>; def t2LDRSH_PRE : T2Iidxldst<1, 0b01, 1, 1, (outs GPR:$dst, GPR:$base_wb), (ins t2addrmode_imm8:$addr), - AddrModeT2_i8, IndexModePre, IIC_iLoadiu, + AddrModeT2_i8, IndexModePre, IIC_iLoad_bh_iu, "ldrsh", "\t$dst, $addr!", "$addr.base = $base_wb", []>; def t2LDRSH_POST : T2Iidxldst<1, 0b01, 1, 0, (outs GPR:$dst, GPR:$base_wb), (ins GPR:$base, t2am_imm8_offset:$offset), - AddrModeT2_i8, IndexModePost, IIC_iLoadiu, + AddrModeT2_i8, IndexModePost, IIC_iLoad_bh_iu, "ldrsh", "\t$dst, [$base], $offset", "$base = $base_wb", []>; } // mayLoad = 1, neverHasSideEffects = 1 @@ -1043,8 +1050,8 @@ def t2LDRSH_POST : T2Iidxldst<1, 0b01, 1, 0, (outs GPR:$dst, GPR:$base_wb), // LDRT, LDRBT, LDRHT, LDRSBT, LDRSHT all have offset mode (PUW=0b110) and are // for disassembly only. // Ref: A8.6.57 LDR (immediate, Thumb) Encoding T4 -class T2IldT type, string opc> - : T2Ii8<(outs GPR:$dst), (ins t2addrmode_imm8:$addr), IIC_iLoadi, opc, +class T2IldT type, string opc, InstrItinClass ii> + : T2Ii8<(outs GPR:$dst), (ins t2addrmode_imm8:$addr), ii, opc, "\t$dst, $addr", []> { let Inst{31-27} = 0b11111; let Inst{26-25} = 0b00; @@ -1056,62 +1063,65 @@ class T2IldT type, string opc> let Inst{10-8} = 0b110; // PUW. } -def t2LDRT : T2IldT<0, 0b10, "ldrt">; -def t2LDRBT : T2IldT<0, 0b00, "ldrbt">; -def t2LDRHT : T2IldT<0, 0b01, "ldrht">; -def t2LDRSBT : T2IldT<1, 0b00, "ldrsbt">; -def t2LDRSHT : T2IldT<1, 0b01, "ldrsht">; +def t2LDRT : T2IldT<0, 0b10, "ldrt", IIC_iLoad_i>; +def t2LDRBT : T2IldT<0, 0b00, "ldrbt", IIC_iLoad_bh_i>; +def t2LDRHT : T2IldT<0, 0b01, "ldrht", IIC_iLoad_bh_i>; +def t2LDRSBT : T2IldT<1, 0b00, "ldrsbt", IIC_iLoad_bh_i>; +def t2LDRSHT : T2IldT<1, 0b01, "ldrsht", IIC_iLoad_bh_i>; // Store -defm t2STR :T2I_st<0b10,"str", BinOpFrag<(store node:$LHS, node:$RHS)>>; -defm t2STRB:T2I_st<0b00,"strb",BinOpFrag<(truncstorei8 node:$LHS, node:$RHS)>>; -defm t2STRH:T2I_st<0b01,"strh",BinOpFrag<(truncstorei16 node:$LHS, node:$RHS)>>; +defm t2STR :T2I_st<0b10,"str", IIC_iStore_i, IIC_iStore_r, + BinOpFrag<(store node:$LHS, node:$RHS)>>; +defm t2STRB:T2I_st<0b00,"strb", IIC_iStore_bh_i, IIC_iStore_bh_r, + BinOpFrag<(truncstorei8 node:$LHS, node:$RHS)>>; +defm t2STRH:T2I_st<0b01,"strh", IIC_iStore_bh_i, IIC_iStore_bh_r, + BinOpFrag<(truncstorei16 node:$LHS, node:$RHS)>>; // Store doubleword let mayLoad = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in def t2STRDi8 : T2Ii8s4<1, 0, 0, (outs), (ins GPR:$src1, GPR:$src2, t2addrmode_imm8s4:$addr), - IIC_iStorer, "strd", "\t$src1, $addr", []>; + IIC_iStore_d_r, "strd", "\t$src1, $addr", []>; // Indexed stores def t2STR_PRE : T2Iidxldst<0, 0b10, 0, 1, (outs GPR:$base_wb), (ins GPR:$src, GPR:$base, t2am_imm8_offset:$offset), - AddrModeT2_i8, IndexModePre, IIC_iStoreiu, + AddrModeT2_i8, IndexModePre, IIC_iStore_iu, "str", "\t$src, [$base, $offset]!", "$base = $base_wb", [(set GPR:$base_wb, (pre_store GPR:$src, GPR:$base, t2am_imm8_offset:$offset))]>; def t2STR_POST : T2Iidxldst<0, 0b10, 0, 0, (outs GPR:$base_wb), (ins GPR:$src, GPR:$base, t2am_imm8_offset:$offset), - AddrModeT2_i8, IndexModePost, IIC_iStoreiu, + AddrModeT2_i8, IndexModePost, IIC_iStore_iu, "str", "\t$src, [$base], $offset", "$base = $base_wb", [(set GPR:$base_wb, (post_store GPR:$src, GPR:$base, t2am_imm8_offset:$offset))]>; def t2STRH_PRE : T2Iidxldst<0, 0b01, 0, 1, (outs GPR:$base_wb), (ins GPR:$src, GPR:$base, t2am_imm8_offset:$offset), - AddrModeT2_i8, IndexModePre, IIC_iStoreiu, + AddrModeT2_i8, IndexModePre, IIC_iStore_iu, "strh", "\t$src, [$base, $offset]!", "$base = $base_wb", [(set GPR:$base_wb, (pre_truncsti16 GPR:$src, GPR:$base, t2am_imm8_offset:$offset))]>; def t2STRH_POST : T2Iidxldst<0, 0b01, 0, 0, (outs GPR:$base_wb), (ins GPR:$src, GPR:$base, t2am_imm8_offset:$offset), - AddrModeT2_i8, IndexModePost, IIC_iStoreiu, + AddrModeT2_i8, IndexModePost, IIC_iStore_bh_iu, "strh", "\t$src, [$base], $offset", "$base = $base_wb", [(set GPR:$base_wb, (post_truncsti16 GPR:$src, GPR:$base, t2am_imm8_offset:$offset))]>; def t2STRB_PRE : T2Iidxldst<0, 0b00, 0, 1, (outs GPR:$base_wb), (ins GPR:$src, GPR:$base, t2am_imm8_offset:$offset), - AddrModeT2_i8, IndexModePre, IIC_iStoreiu, + AddrModeT2_i8, IndexModePre, IIC_iStore_bh_iu, "strb", "\t$src, [$base, $offset]!", "$base = $base_wb", [(set GPR:$base_wb, (pre_truncsti8 GPR:$src, GPR:$base, t2am_imm8_offset:$offset))]>; def t2STRB_POST : T2Iidxldst<0, 0b00, 0, 0, (outs GPR:$base_wb), (ins GPR:$src, GPR:$base, t2am_imm8_offset:$offset), - AddrModeT2_i8, IndexModePost, IIC_iStoreiu, + AddrModeT2_i8, IndexModePost, IIC_iStore_bh_iu, "strb", "\t$src, [$base], $offset", "$base = $base_wb", [(set GPR:$base_wb, (post_truncsti8 GPR:$src, GPR:$base, t2am_imm8_offset:$offset))]>; @@ -1119,8 +1129,8 @@ def t2STRB_POST : T2Iidxldst<0, 0b00, 0, 0, (outs GPR:$base_wb), // STRT, STRBT, STRHT all have offset mode (PUW=0b110) and are for disassembly // only. // Ref: A8.6.193 STR (immediate, Thumb) Encoding T4 -class T2IstT type, string opc> - : T2Ii8<(outs GPR:$src), (ins t2addrmode_imm8:$addr), IIC_iStorei, opc, +class T2IstT type, string opc, InstrItinClass ii> + : T2Ii8<(outs GPR:$src), (ins t2addrmode_imm8:$addr), ii, opc, "\t$src, $addr", []> { let Inst{31-27} = 0b11111; let Inst{26-25} = 0b00; @@ -1132,28 +1142,28 @@ class T2IstT type, string opc> let Inst{10-8} = 0b110; // PUW } -def t2STRT : T2IstT<0b10, "strt">; -def t2STRBT : T2IstT<0b00, "strbt">; -def t2STRHT : T2IstT<0b01, "strht">; +def t2STRT : T2IstT<0b10, "strt", IIC_iStore_i>; +def t2STRBT : T2IstT<0b00, "strbt", IIC_iStore_bh_i>; +def t2STRHT : T2IstT<0b01, "strht", IIC_iStore_bh_i>; // ldrd / strd pre / post variants // For disassembly only. def t2LDRD_PRE : T2Ii8s4<1, 1, 1, (outs GPR:$dst1, GPR:$dst2), - (ins GPR:$base, t2am_imm8s4_offset:$imm), NoItinerary, + (ins GPR:$base, t2am_imm8s4_offset:$imm), IIC_iLoad_d_ru, "ldrd", "\t$dst1, $dst2, [$base, $imm]!", []>; def t2LDRD_POST : T2Ii8s4<0, 1, 1, (outs GPR:$dst1, GPR:$dst2), - (ins GPR:$base, t2am_imm8s4_offset:$imm), NoItinerary, + (ins GPR:$base, t2am_imm8s4_offset:$imm), IIC_iLoad_d_ru, "ldrd", "\t$dst1, $dst2, [$base], $imm", []>; def t2STRD_PRE : T2Ii8s4<1, 1, 0, (outs), (ins GPR:$src1, GPR:$src2, GPR:$base, t2am_imm8s4_offset:$imm), - NoItinerary, "strd", "\t$src1, $src2, [$base, $imm]!", []>; + IIC_iStore_d_ru, "strd", "\t$src1, $src2, [$base, $imm]!", []>; def t2STRD_POST : T2Ii8s4<0, 1, 0, (outs), (ins GPR:$src1, GPR:$src2, GPR:$base, t2am_imm8s4_offset:$imm), - NoItinerary, "strd", "\t$src1, $src2, [$base], $imm", []>; + IIC_iStore_d_ru, "strd", "\t$src1, $src2, [$base], $imm", []>; // T2Ipl (Preload Data/Instruction) signals the memory system of possible future // data/instruction access. These are for disassembly only. @@ -1162,7 +1172,7 @@ def t2STRD_POST : T2Ii8s4<0, 1, 0, (outs), // The neg_zero operand translates -0 to -1, -1 to -2, ..., etc. multiclass T2Ipl { - def i12 : T2I<(outs), (ins GPR:$base, i32imm:$imm), IIC_iLoadi, opc, + def i12 : T2I<(outs), (ins GPR:$base, i32imm:$imm), IIC_iLoad_i, opc, "\t[$base, $imm]", []> { let Inst{31-25} = 0b1111100; let Inst{24} = instr; @@ -1173,7 +1183,7 @@ multiclass T2Ipl { let Inst{15-12} = 0b1111; } - def i8 : T2I<(outs), (ins GPR:$base, neg_zero:$imm), IIC_iLoadi, opc, + def i8 : T2I<(outs), (ins GPR:$base, neg_zero:$imm), IIC_iLoad_i, opc, "\t[$base, $imm]", []> { let Inst{31-25} = 0b1111100; let Inst{24} = instr; @@ -1185,7 +1195,7 @@ multiclass T2Ipl { let Inst{11-8} = 0b1100; } - def pci : T2I<(outs), (ins GPR:$base, neg_zero:$imm), IIC_iLoadi, opc, + def pci : T2I<(outs), (ins GPR:$base, neg_zero:$imm), IIC_iLoad_i, opc, "\t[pc, $imm]", []> { let Inst{31-25} = 0b1111100; let Inst{24} = instr; @@ -1197,7 +1207,7 @@ multiclass T2Ipl { let Inst{15-12} = 0b1111; } - def r : T2I<(outs), (ins GPR:$base, GPR:$a), IIC_iLoadi, opc, + def r : T2I<(outs), (ins GPR:$base, GPR:$a), IIC_iLoad_i, opc, "\t[$base, $a]", []> { let Inst{31-25} = 0b1111100; let Inst{24} = instr; @@ -1210,7 +1220,7 @@ multiclass T2Ipl { let Inst{5-4} = 0b00; // no shift is applied } - def s : T2I<(outs), (ins GPR:$base, GPR:$a, i32imm:$shamt), IIC_iLoadi, opc, + def s : T2I<(outs), (ins GPR:$base, GPR:$a, i32imm:$shamt), IIC_iLoad_i, opc, "\t[$base, $a, lsl $shamt]", []> { let Inst{31-25} = 0b1111100; let Inst{24} = instr; @@ -1342,28 +1352,28 @@ def : T2Pat<(or rGPR:$src, 0xffff0000), (t2MOVTi16 rGPR:$src, 0xffff)>; // Sign extenders -defm t2SXTB : T2I_unary_rrot<0b100, "sxtb", +defm t2SXTB : T2I_ext_rrot<0b100, "sxtb", UnOpFrag<(sext_inreg node:$Src, i8)>>; -defm t2SXTH : T2I_unary_rrot<0b000, "sxth", +defm t2SXTH : T2I_ext_rrot<0b000, "sxth", UnOpFrag<(sext_inreg node:$Src, i16)>>; -defm t2SXTB16 : T2I_unary_rrot_sxtb16<0b010, "sxtb16">; +defm t2SXTB16 : T2I_ext_rrot_sxtb16<0b010, "sxtb16">; -defm t2SXTAB : T2I_bin_rrot<0b100, "sxtab", +defm t2SXTAB : T2I_exta_rrot<0b100, "sxtab", BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS, i8))>>; -defm t2SXTAH : T2I_bin_rrot<0b000, "sxtah", +defm t2SXTAH : T2I_exta_rrot<0b000, "sxtah", BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS,i16))>>; -defm t2SXTAB16 : T2I_bin_rrot_DO<0b010, "sxtab16">; +defm t2SXTAB16 : T2I_exta_rrot_DO<0b010, "sxtab16">; // TODO: SXT(A){B|H}16 - done for disassembly only // Zero extenders let AddedComplexity = 16 in { -defm t2UXTB : T2I_unary_rrot<0b101, "uxtb", +defm t2UXTB : T2I_ext_rrot<0b101, "uxtb", UnOpFrag<(and node:$Src, 0x000000FF)>>; -defm t2UXTH : T2I_unary_rrot<0b001, "uxth", +defm t2UXTH : T2I_ext_rrot<0b001, "uxth", UnOpFrag<(and node:$Src, 0x0000FFFF)>>; -defm t2UXTB16 : T2I_unary_rrot_uxtb16<0b011, "uxtb16", +defm t2UXTB16 : T2I_ext_rrot_uxtb16<0b011, "uxtb16", UnOpFrag<(and node:$Src, 0x00FF00FF)>>; // FIXME: This pattern incorrectly assumes the shl operator is a rotate. @@ -1375,11 +1385,11 @@ defm t2UXTB16 : T2I_unary_rrot_uxtb16<0b011, "uxtb16", def : T2Pat<(and (srl rGPR:$Src, (i32 8)), 0xFF00FF), (t2UXTB16r_rot rGPR:$Src, 8)>, Requires<[HasT2ExtractPack]>; -defm t2UXTAB : T2I_bin_rrot<0b101, "uxtab", +defm t2UXTAB : T2I_exta_rrot<0b101, "uxtab", BinOpFrag<(add node:$LHS, (and node:$RHS, 0x00FF))>>; -defm t2UXTAH : T2I_bin_rrot<0b001, "uxtah", +defm t2UXTAH : T2I_exta_rrot<0b001, "uxtah", BinOpFrag<(add node:$LHS, (and node:$RHS, 0xFFFF))>>; -defm t2UXTAB16 : T2I_bin_rrot_DO<0b011, "uxtab16">; +defm t2UXTAB16 : T2I_exta_rrot_DO<0b011, "uxtab16">; } //===----------------------------------------------------------------------===// @@ -1663,7 +1673,7 @@ def t2BFC : T2I<(outs rGPR:$dst), (ins rGPR:$src, bf_inv_mask_imm:$imm), } def t2SBFX: T2I<(outs rGPR:$dst), (ins rGPR:$src, imm0_31:$lsb, imm0_31:$width), - IIC_iBITi, "sbfx", "\t$dst, $src, $lsb, $width", []> { + IIC_iUNAsi, "sbfx", "\t$dst, $src, $lsb, $width", []> { let Inst{31-27} = 0b11110; let Inst{25} = 1; let Inst{24-20} = 0b10100; @@ -1671,7 +1681,7 @@ def t2SBFX: T2I<(outs rGPR:$dst), (ins rGPR:$src, imm0_31:$lsb, imm0_31:$width), } def t2UBFX: T2I<(outs rGPR:$dst), (ins rGPR:$src, imm0_31:$lsb, imm0_31:$width), - IIC_iBITi, "ubfx", "\t$dst, $src, $lsb, $width", []> { + IIC_iUNAsi, "ubfx", "\t$dst, $src, $lsb, $width", []> { let Inst{31-27} = 0b11110; let Inst{25} = 1; let Inst{24-20} = 0b11100; @@ -1860,7 +1870,7 @@ def t2SMMLSR:T2I <(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$c), IIC_iMAC32, } multiclass T2I_smul { - def BB : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iMUL32, + def BB : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iMUL16, !strconcat(opc, "bb"), "\t$dst, $a, $b", [(set rGPR:$dst, (opnode (sext_inreg rGPR:$a, i16), (sext_inreg rGPR:$b, i16)))]> { @@ -1872,7 +1882,7 @@ multiclass T2I_smul { let Inst{5-4} = 0b00; } - def BT : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iMUL32, + def BT : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iMUL16, !strconcat(opc, "bt"), "\t$dst, $a, $b", [(set rGPR:$dst, (opnode (sext_inreg rGPR:$a, i16), (sra rGPR:$b, (i32 16))))]> { @@ -1884,7 +1894,7 @@ multiclass T2I_smul { let Inst{5-4} = 0b01; } - def TB : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iMUL32, + def TB : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iMUL16, !strconcat(opc, "tb"), "\t$dst, $a, $b", [(set rGPR:$dst, (opnode (sra rGPR:$a, (i32 16)), (sext_inreg rGPR:$b, i16)))]> { @@ -1896,7 +1906,7 @@ multiclass T2I_smul { let Inst{5-4} = 0b10; } - def TT : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iMUL32, + def TT : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iMUL16, !strconcat(opc, "tt"), "\t$dst, $a, $b", [(set rGPR:$dst, (opnode (sra rGPR:$a, (i32 16)), (sra rGPR:$b, (i32 16))))]> { diff --git a/lib/Target/ARM/ARMSchedule.td b/lib/Target/ARM/ARMSchedule.td index 00d148b8eda..07bd0fdf287 100644 --- a/lib/Target/ARM/ARMSchedule.td +++ b/lib/Target/ARM/ARMSchedule.td @@ -52,21 +52,39 @@ def IIC_iMUL32 : InstrItinClass; def IIC_iMAC32 : InstrItinClass; def IIC_iMUL64 : InstrItinClass; def IIC_iMAC64 : InstrItinClass; -def IIC_iLoadi : InstrItinClass; -def IIC_iLoadr : InstrItinClass; -def IIC_iLoadsi : InstrItinClass; -def IIC_iLoadiu : InstrItinClass; -def IIC_iLoadru : InstrItinClass; -def IIC_iLoadsiu : InstrItinClass; +def IIC_iLoad_i : InstrItinClass; +def IIC_iLoad_r : InstrItinClass; +def IIC_iLoad_si : InstrItinClass; +def IIC_iLoad_iu : InstrItinClass; +def IIC_iLoad_ru : InstrItinClass; +def IIC_iLoad_siu : InstrItinClass; +def IIC_iLoad_bh_i : InstrItinClass; +def IIC_iLoad_bh_r : InstrItinClass; +def IIC_iLoad_bh_si : InstrItinClass; +def IIC_iLoad_bh_iu : InstrItinClass; +def IIC_iLoad_bh_ru : InstrItinClass; +def IIC_iLoad_bh_siu : InstrItinClass; +def IIC_iLoad_d_i : InstrItinClass; +def IIC_iLoad_d_r : InstrItinClass; +def IIC_iLoad_d_ru : InstrItinClass; def IIC_iLoadm : InstrItinClass<0>; // micro-coded def IIC_iLoadmBr : InstrItinClass<0>; // micro-coded def IIC_iLoadiALU : InstrItinClass; -def IIC_iStorei : InstrItinClass; -def IIC_iStorer : InstrItinClass; -def IIC_iStoresi : InstrItinClass; -def IIC_iStoreiu : InstrItinClass; -def IIC_iStoreru : InstrItinClass; -def IIC_iStoresiu : InstrItinClass; +def IIC_iStore_i : InstrItinClass; +def IIC_iStore_r : InstrItinClass; +def IIC_iStore_si : InstrItinClass; +def IIC_iStore_iu : InstrItinClass; +def IIC_iStore_ru : InstrItinClass; +def IIC_iStore_siu : InstrItinClass; +def IIC_iStore_bh_i : InstrItinClass; +def IIC_iStore_bh_r : InstrItinClass; +def IIC_iStore_bh_si : InstrItinClass; +def IIC_iStore_bh_iu : InstrItinClass; +def IIC_iStore_bh_ru : InstrItinClass; +def IIC_iStore_bh_siu : InstrItinClass; +def IIC_iStore_d_i : InstrItinClass; +def IIC_iStore_d_r : InstrItinClass; +def IIC_iStore_d_ru : InstrItinClass; def IIC_iStorem : InstrItinClass<0>; // micro-coded def IIC_Br : InstrItinClass; def IIC_fpSTAT : InstrItinClass; diff --git a/lib/Target/ARM/ARMScheduleA8.td b/lib/Target/ARM/ARMScheduleA8.td index 8962ec93efa..ff2a673c26b 100644 --- a/lib/Target/ARM/ARMScheduleA8.td +++ b/lib/Target/ARM/ARMScheduleA8.td @@ -108,38 +108,69 @@ def CortexA8Itineraries : ProcessorItineraries< // use A8_Issue to enforce the 1 load/store per cycle limit // // Immediate offset - InstrItinData, - InstrStage<1, [A8_Pipe0, A8_Pipe1]>, - InstrStage<1, [A8_LdSt0]>], [3, 1]>, + InstrItinData, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0]>], [3, 1]>, + InstrItinData, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0]>], [3, 1]>, + InstrItinData, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0]>], [3, 1]>, // // Register offset - InstrItinData, - InstrStage<1, [A8_Pipe0, A8_Pipe1]>, - InstrStage<1, [A8_LdSt0]>], [3, 1, 1]>, + InstrItinData, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0]>], [3, 1, 1]>, + InstrItinData, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0]>], [3, 1, 1]>, + InstrItinData, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0]>], [3, 1, 1]>, // // Scaled register offset, issues over 2 cycles - InstrItinData, - InstrStage<1, [A8_Pipe0], 0>, - InstrStage<1, [A8_Pipe1]>, - InstrStage<1, [A8_Pipe0, A8_Pipe1]>, - InstrStage<1, [A8_LdSt0]>], [4, 1, 1]>, + InstrItinData, + InstrStage<1, [A8_Pipe0], 0>, + InstrStage<1, [A8_Pipe1]>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0]>], [4, 1, 1]>, + InstrItinData, + InstrStage<1, [A8_Pipe0], 0>, + InstrStage<1, [A8_Pipe1]>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0]>], [4, 1, 1]>, // // Immediate offset with update - InstrItinData, - InstrStage<1, [A8_Pipe0, A8_Pipe1]>, - InstrStage<1, [A8_LdSt0]>], [3, 2, 1]>, + InstrItinData, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0]>], [3, 2, 1]>, + InstrItinData, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0]>], [3, 2, 1]>, // // Register offset with update - InstrItinData, - InstrStage<1, [A8_Pipe0, A8_Pipe1]>, - InstrStage<1, [A8_LdSt0]>], [3, 2, 1, 1]>, + InstrItinData, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0]>], [3, 2, 1, 1]>, + InstrItinData, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0]>], [3, 2, 1, 1]>, + InstrItinData, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0]>], [3, 2, 1, 1]>, // // Scaled register offset with update, issues over 2 cycles - InstrItinData, - InstrStage<1, [A8_Pipe0], 0>, - InstrStage<1, [A8_Pipe1]>, - InstrStage<1, [A8_Pipe0, A8_Pipe1]>, - InstrStage<1, [A8_LdSt0]>], [4, 3, 1, 1]>, + InstrItinData, + InstrStage<1, [A8_Pipe0], 0>, + InstrStage<1, [A8_Pipe1]>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0]>], [4, 3, 1, 1]>, + InstrItinData, + InstrStage<1, [A8_Pipe0], 0>, + InstrStage<1, [A8_Pipe1]>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0]>], [4, 3, 1, 1]>, // // Load multiple InstrItinData, @@ -170,38 +201,69 @@ def CortexA8Itineraries : ProcessorItineraries< // use A8_Issue to enforce the 1 load/store per cycle limit // // Immediate offset - InstrItinData, - InstrStage<1, [A8_Pipe0, A8_Pipe1]>, - InstrStage<1, [A8_LdSt0]>], [3, 1]>, + InstrItinData, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0]>], [3, 1]>, + InstrItinData, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0]>], [3, 1]>, + InstrItinData, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0]>], [3, 1]>, // // Register offset - InstrItinData, - InstrStage<1, [A8_Pipe0, A8_Pipe1]>, - InstrStage<1, [A8_LdSt0]>], [3, 1, 1]>, + InstrItinData, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0]>], [3, 1, 1]>, + InstrItinData, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0]>], [3, 1, 1]>, + InstrItinData, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0]>], [3, 1, 1]>, // // Scaled register offset, issues over 2 cycles - InstrItinData, - InstrStage<1, [A8_Pipe0], 0>, - InstrStage<1, [A8_Pipe1]>, - InstrStage<1, [A8_Pipe0, A8_Pipe1]>, - InstrStage<1, [A8_LdSt0]>], [3, 1, 1]>, + InstrItinData, + InstrStage<1, [A8_Pipe0], 0>, + InstrStage<1, [A8_Pipe1]>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0]>], [3, 1, 1]>, + InstrItinData, + InstrStage<1, [A8_Pipe0], 0>, + InstrStage<1, [A8_Pipe1]>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0]>], [3, 1, 1]>, // // Immediate offset with update - InstrItinData, - InstrStage<1, [A8_Pipe0, A8_Pipe1]>, - InstrStage<1, [A8_LdSt0]>], [2, 3, 1]>, + InstrItinData, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0]>], [2, 3, 1]>, + InstrItinData, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0]>], [2, 3, 1]>, // // Register offset with update - InstrItinData, - InstrStage<1, [A8_Pipe0, A8_Pipe1]>, - InstrStage<1, [A8_LdSt0]>], [2, 3, 1, 1]>, + InstrItinData, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0]>], [2, 3, 1, 1]>, + InstrItinData, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0]>], [2, 3, 1, 1]>, + InstrItinData, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0]>], [2, 3, 1, 1]>, // // Scaled register offset with update, issues over 2 cycles - InstrItinData, - InstrStage<1, [A8_Pipe0], 0>, - InstrStage<1, [A8_Pipe1]>, - InstrStage<1, [A8_Pipe0, A8_Pipe1]>, - InstrStage<1, [A8_LdSt0]>], [3, 3, 1, 1]>, + InstrItinData, + InstrStage<1, [A8_Pipe0], 0>, + InstrStage<1, [A8_Pipe1]>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0]>], [3, 3, 1, 1]>, + InstrItinData, + InstrStage<1, [A8_Pipe0], 0>, + InstrStage<1, [A8_Pipe1]>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0]>], [3, 3, 1, 1]>, // // Store multiple InstrItinData, diff --git a/lib/Target/ARM/ARMScheduleA9.td b/lib/Target/ARM/ARMScheduleA9.td index 1f4b8d1ab07..e17d6bb9610 100644 --- a/lib/Target/ARM/ARMScheduleA9.td +++ b/lib/Target/ARM/ARMScheduleA9.td @@ -18,7 +18,7 @@ // Functional units def A9_Pipe0 : FuncUnit; // pipeline 0 def A9_Pipe1 : FuncUnit; // pipeline 1 -def A9_LSPipe : FuncUnit; // LS pipe +def A9_AGU : FuncUnit; // LS pipe def A9_NPipe : FuncUnit; // NEON ALU/MUL pipe def A9_DRegsVFP: FuncUnit; // FP register set, VFP side def A9_DRegsN : FuncUnit; // FP register set, NEON side @@ -29,7 +29,7 @@ def A9_LdBypass : Bypass; // Dual issue pipeline represented by A9_Pipe0 | A9_Pipe1 // def CortexA9Itineraries : ProcessorItineraries< - [A9_NPipe, A9_DRegsN, A9_DRegsVFP, A9_LSPipe, A9_Pipe0, A9_Pipe1], + [A9_NPipe, A9_DRegsN, A9_DRegsVFP, A9_AGU, A9_Pipe0, A9_Pipe1], [A9_LdBypass], [ // Two fully-pipelined integer ALU pipelines @@ -38,7 +38,7 @@ def CortexA9Itineraries : ProcessorItineraries< InstrItinData], [1]>, InstrItinData], [1, 1]>, InstrItinData], [1, 1]>, - InstrItinData], [2, 2, 1]>, + InstrItinData], [2, 1, 1]>, InstrItinData, InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2]>, // @@ -47,158 +47,211 @@ def CortexA9Itineraries : ProcessorItineraries< [1]>, InstrItinData], [1, 1], [NoBypass, A9_LdBypass]>, - InstrItinData], - [1, 1]>, - InstrItinData], - [2, 2, 1]>, + InstrItinData], + [2, 1]>, + InstrItinData], + [3, 1, 1]>, // // No operand cycles - InstrItinData]>, + InstrItinData]>, // // Binary Instructions that produce a result InstrItinData], - [2, 2], [NoBypass, A9_LdBypass]>, + [1, 1], [NoBypass, A9_LdBypass]>, InstrItinData], - [2, 2, 2], [NoBypass, A9_LdBypass, A9_LdBypass]>, + [1, 1, 1], [NoBypass, A9_LdBypass, A9_LdBypass]>, InstrItinData], - [2, 2, 1], [NoBypass, A9_LdBypass, NoBypass]>, + [2, 1, 1], [NoBypass, A9_LdBypass, NoBypass]>, InstrItinData], - [2, 1, 2], [NoBypass, NoBypass, A9_LdBypass]>, + [2, 1, 1], [NoBypass, NoBypass, A9_LdBypass]>, InstrItinData], - [2, 2, 1, 1], + [3, 1, 1, 1], [NoBypass, A9_LdBypass, NoBypass, NoBypass]>, // // Bitwise Instructions that produce a result - InstrItinData], [2, 2]>, - InstrItinData], [2, 2, 2]>, - InstrItinData], [2, 2, 1]>, - InstrItinData], [2, 2, 1, 1]>, + InstrItinData], [1, 1]>, + InstrItinData], [1, 1, 1]>, + InstrItinData], [2, 1, 1]>, + InstrItinData], [3, 1, 1, 1]>, // // Unary Instructions that produce a result - InstrItinData], [2, 2]>, + + // CLZ, RBIT, etc. + InstrItinData], [1, 1]>, + + // BFC, BFI, UBFX, SBFX InstrItinData], [2, 1]>, + // // Zero and sign extension instructions InstrItinData], [2, 1]>, InstrItinData], [3, 1, 1]>, - InstrItinData],[3, 1, 1, 1]>, + InstrItinData],[3, 1, 1, 1]>, // // Compare instructions InstrItinData], - [2], [A9_LdBypass]>, + [1], [A9_LdBypass]>, InstrItinData], - [2, 2], [A9_LdBypass, A9_LdBypass]>, + [1, 1], [A9_LdBypass, A9_LdBypass]>, InstrItinData], - [2, 1], [A9_LdBypass, NoBypass]>, + [1, 1], [A9_LdBypass, NoBypass]>, InstrItinData], - [2, 1, 1], [A9_LdBypass, NoBypass, NoBypass]>, + [1, 1, 1], [A9_LdBypass, NoBypass, NoBypass]>, // // Test instructions - InstrItinData], [2]>, - InstrItinData], [2, 2]>, - InstrItinData], [2, 1]>, - InstrItinData], [2, 1, 1]>, + InstrItinData], [1]>, + InstrItinData], [1, 1]>, + InstrItinData], [1, 1]>, + InstrItinData], [1, 1, 1]>, // // Move instructions, conditional - InstrItinData], [2]>, - InstrItinData], [2, 1]>, - InstrItinData], [2, 1]>, + // FIXME: Correctly model the extra input dep on the destination. + InstrItinData], [1]>, + InstrItinData], [1, 1]>, + InstrItinData], [1, 1]>, InstrItinData], [2, 1, 1]>, // Integer multiply pipeline // InstrItinData, - InstrStage<2, [A9_Pipe0]>], [4, 1, 1]>, + InstrStage<2, [A9_Pipe0]>], [3, 1, 1]>, InstrItinData, - InstrStage<2, [A9_Pipe0]>], [4, 1, 1, 2]>, + InstrStage<2, [A9_Pipe0]>], [3, 1, 1, 1]>, InstrItinData, InstrStage<2, [A9_Pipe0]>], [4, 1, 1]>, InstrItinData, - InstrStage<2, [A9_Pipe0]>], [4, 1, 1, 2]>, - InstrItinData, + InstrStage<2, [A9_Pipe0]>], [4, 1, 1, 1]>, + InstrItinData, InstrStage<3, [A9_Pipe0]>], [4, 5, 1, 1]>, - InstrItinData, + InstrItinData, InstrStage<3, [A9_Pipe0]>], [4, 5, 1, 1]>, // Integer load pipeline // FIXME: The timings are some rough approximations // // Immediate offset - InstrItinData, - InstrStage<1, [A9_LSPipe]>], - [3, 1], [A9_LdBypass]>, + InstrItinData, + InstrStage<1, [A9_AGU]>], + [3, 1], [A9_LdBypass]>, + InstrItinData, + InstrStage<2, [A9_AGU]>], + [4, 1], [A9_LdBypass]>, + // FIXME: If address is 64-bit aligned, AGU cycles is 1. + InstrItinData, + InstrStage<2, [A9_AGU]>], + [3, 3, 1], [A9_LdBypass]>, // // Register offset - InstrItinData, - InstrStage<1, [A9_LSPipe]>], - [3, 1, 1], [A9_LdBypass]>, + InstrItinData, + InstrStage<1, [A9_AGU]>], + [3, 1, 1], [A9_LdBypass]>, + InstrItinData, + InstrStage<2, [A9_AGU]>], + [4, 1, 1], [A9_LdBypass]>, + InstrItinData, + InstrStage<2, [A9_AGU]>], + [3, 3, 1, 1], [A9_LdBypass]>, // // Scaled register offset - InstrItinData, - InstrStage<2, [A9_LSPipe]>], - [4, 1, 1], [A9_LdBypass]>, + InstrItinData, + InstrStage<1, [A9_AGU]>], + [4, 1, 1], [A9_LdBypass]>, + InstrItinData, + InstrStage<2, [A9_AGU]>], + [5, 1, 1], [A9_LdBypass]>, // // Immediate offset with update - InstrItinData, - InstrStage<2, [A9_LSPipe]>], - [3, 2, 1], [A9_LdBypass]>, + InstrItinData, + InstrStage<1, [A9_AGU]>], + [3, 2, 1], [A9_LdBypass]>, + InstrItinData, + InstrStage<2, [A9_AGU]>], + [4, 3, 1], [A9_LdBypass]>, // // Register offset with update - InstrItinData, - InstrStage<2, [A9_LSPipe]>], - [3, 2, 1, 1], [A9_LdBypass]>, + InstrItinData, + InstrStage<1, [A9_AGU]>], + [3, 2, 1, 1], [A9_LdBypass]>, + InstrItinData, + InstrStage<2, [A9_AGU]>], + [4, 3, 1, 1], [A9_LdBypass]>, + InstrItinData, + InstrStage<2, [A9_AGU]>], + [3, 3, 1, 1], [A9_LdBypass]>, // // Scaled register offset with update - InstrItinData, - InstrStage<2, [A9_LSPipe]>], - [4, 3, 1, 1], [A9_LdBypass]>, + InstrItinData, + InstrStage<1, [A9_AGU]>], + [4, 3, 1, 1], [A9_LdBypass]>, + InstrItinData, + InstrStage<2, [A9_AGU]>], + [5, 4, 1, 1], [A9_LdBypass]>, // // Load multiple InstrItinData, - InstrStage<1, [A9_LSPipe]>], + InstrStage<2, [A9_AGU]>], [3], [A9_LdBypass]>, // // Load multiple plus branch InstrItinData, - InstrStage<1, [A9_LSPipe]>, + InstrStage<1, [A9_AGU]>, InstrStage<1, [A9_Pipe0, A9_Pipe1]>]>, // // iLoadi + iALUr for t2LDRpci_pic. InstrItinData, - InstrStage<1, [A9_LSPipe]>, + InstrStage<1, [A9_AGU]>, InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 1]>, // Integer store pipeline /// // Immediate offset - InstrItinData, - InstrStage<1, [A9_LSPipe]>], [3, 1]>, + InstrItinData, + InstrStage<1, [A9_AGU]>], [1, 1]>, + InstrItinData, + InstrStage<2, [A9_AGU]>], [1, 1]>, + // FIXME: If address is 64-bit aligned, AGU cycles is 1. + InstrItinData, + InstrStage<2, [A9_AGU]>], [1, 1]>, // // Register offset - InstrItinData, - InstrStage<1, [A9_LSPipe]>], [3, 1, 1]>, + InstrItinData, + InstrStage<1, [A9_AGU]>], [1, 1, 1]>, + InstrItinData, + InstrStage<2, [A9_AGU]>], [1, 1, 1]>, + InstrItinData, + InstrStage<2, [A9_AGU]>], [1, 1, 1]>, // // Scaled register offset - InstrItinData, - InstrStage<2, [A9_LSPipe]>], [3, 1, 1]>, + InstrItinData, + InstrStage<1, [A9_AGU]>], [1, 1, 1]>, + InstrItinData, + InstrStage<2, [A9_AGU]>], [1, 1, 1]>, // // Immediate offset with update - InstrItinData, - InstrStage<1, [A9_LSPipe]>], [2, 3, 1]>, + InstrItinData, + InstrStage<1, [A9_AGU]>], [2, 1, 1]>, + InstrItinData, + InstrStage<2, [A9_AGU]>], [3, 1, 1]>, // // Register offset with update - InstrItinData, - InstrStage<1, [A9_LSPipe]>], [2, 3, 1, 1]>, + InstrItinData, + InstrStage<1, [A9_AGU]>], [2, 1, 1, 1]>, + InstrItinData, + InstrStage<2, [A9_AGU]>], [3, 1, 1, 1]>, + InstrItinData, + InstrStage<2, [A9_AGU]>], [3, 1, 1, 1]>, // // Scaled register offset with update - InstrItinData, - InstrStage<2, [A9_LSPipe]>], [3, 3, 1, 1]>, + InstrItinData, + InstrStage<1, [A9_AGU]>], [2, 1, 1, 1]>, + InstrItinData, + InstrStage<2, [A9_AGU]>], [3, 1, 1, 1]>, // // Store multiple InstrItinData, - InstrStage<1, [A9_LSPipe]>]>, + InstrStage<1, [A9_AGU]>]>, // Branch // // no delay slots, so the latency of a branch is unimportant @@ -397,42 +450,42 @@ def CortexA9Itineraries : ProcessorItineraries< InstrItinData, InstrStage<2, [A9_DRegsN], 0, Reserved>, InstrStage<1, [A9_Pipe1], 0>, - InstrStage<1, [A9_LSPipe]>, + InstrStage<1, [A9_AGU]>, InstrStage<1, [A9_NPipe]>]>, // // Double-precision FP Load InstrItinData, InstrStage<2, [A9_DRegsN], 0, Reserved>, InstrStage<1, [A9_Pipe1], 0>, - InstrStage<1, [A9_LSPipe]>, + InstrStage<1, [A9_AGU]>, InstrStage<1, [A9_NPipe]>]>, // // FP Load Multiple InstrItinData, InstrStage<2, [A9_DRegsN], 0, Reserved>, InstrStage<1, [A9_Pipe1], 0>, - InstrStage<1, [A9_LSPipe]>, + InstrStage<1, [A9_AGU]>, InstrStage<1, [A9_NPipe]>]>, // // Single-precision FP Store InstrItinData, InstrStage<2, [A9_DRegsN], 0, Reserved>, InstrStage<1, [A9_Pipe1], 0>, - InstrStage<1, [A9_LSPipe]>, + InstrStage<1, [A9_AGU]>, InstrStage<1, [A9_NPipe]>]>, // // Double-precision FP Store InstrItinData, InstrStage<2, [A9_DRegsN], 0, Reserved>, InstrStage<1, [A9_Pipe1], 0>, - InstrStage<1, [A9_LSPipe]>, + InstrStage<1, [A9_AGU]>, InstrStage<1, [A9_NPipe]>]>, // // FP Store Multiple InstrItinData, InstrStage<2, [A9_DRegsN], 0, Reserved>, InstrStage<1, [A9_Pipe1], 0>, - InstrStage<1, [A9_LSPipe]>, + InstrStage<1, [A9_AGU]>, InstrStage<1, [A9_NPipe]>]>, // NEON // Issue through integer pipeline, and execute in NEON unit. @@ -443,7 +496,7 @@ def CortexA9Itineraries : ProcessorItineraries< InstrItinData, InstrStage<7, [A9_DRegsVFP], 0, Reserved>, InstrStage<1, [A9_Pipe1], 0>, - InstrStage<1, [A9_LSPipe]>, + InstrStage<1, [A9_AGU]>, InstrStage<1, [A9_NPipe]>]>, // // VLD2 @@ -452,7 +505,7 @@ def CortexA9Itineraries : ProcessorItineraries< // Extra latency cycles since wbck is 6 cycles InstrStage<7, [A9_DRegsVFP], 0, Reserved>, InstrStage<1, [A9_Pipe1], 0>, - InstrStage<1, [A9_LSPipe]>, + InstrStage<1, [A9_AGU]>, InstrStage<1, [A9_NPipe]>], [2, 2, 1]>, // // VLD3 @@ -461,7 +514,7 @@ def CortexA9Itineraries : ProcessorItineraries< // Extra latency cycles since wbck is 6 cycles InstrStage<7, [A9_DRegsVFP], 0, Reserved>, InstrStage<1, [A9_Pipe1], 0>, - InstrStage<1, [A9_LSPipe]>, + InstrStage<1, [A9_AGU]>, InstrStage<1, [A9_NPipe]>], [2, 2, 2, 1]>, // // VLD4 @@ -470,7 +523,7 @@ def CortexA9Itineraries : ProcessorItineraries< // Extra latency cycles since wbck is 6 cycles InstrStage<7, [A9_DRegsVFP], 0, Reserved>, InstrStage<1, [A9_Pipe1], 0>, - InstrStage<1, [A9_LSPipe]>, + InstrStage<1, [A9_AGU]>, InstrStage<1, [A9_NPipe]>], [2, 2, 2, 2, 1]>, // // VST @@ -479,7 +532,7 @@ def CortexA9Itineraries : ProcessorItineraries< // Extra latency cycles since wbck is 6 cycles InstrStage<7, [A9_DRegsVFP], 0, Reserved>, InstrStage<1, [A9_Pipe1], 0>, - InstrStage<1, [A9_LSPipe]>, + InstrStage<1, [A9_AGU]>, InstrStage<1, [A9_NPipe]>]>, // // Double-register Integer Unary @@ -709,7 +762,7 @@ def CortexA9Itineraries : ProcessorItineraries< // FIXME: all latencies are arbitrary, no information is available InstrStage<3, [A9_DRegsVFP], 0, Reserved>, InstrStage<1, [A9_Pipe1]>, - InstrStage<1, [A9_LSPipe]>], [2, 1]>, + InstrStage<1, [A9_AGU]>], [2, 1]>, // // Quad-register Permute Move // Result written in N2, but that is relative to the last cycle of multicycle, @@ -845,7 +898,7 @@ def CortexA9Itineraries : ProcessorItineraries< // Extra latency cycles since wbck is 8 cycles InstrStage<9, [A9_DRegsVFP], 0, Reserved>, InstrStage<1, [A9_Pipe1]>, - InstrStage<3, [A9_LSPipe]>], [4, 4, 1, 1]>, + InstrStage<3, [A9_AGU]>], [4, 4, 1, 1]>, // // Double-register VEXT diff --git a/lib/Target/ARM/ARMScheduleV6.td b/lib/Target/ARM/ARMScheduleV6.td index f19d1d13775..b5ae9277fcf 100644 --- a/lib/Target/ARM/ARMScheduleV6.td +++ b/lib/Target/ARM/ARMScheduleV6.td @@ -89,22 +89,31 @@ def ARMV6Itineraries : ProcessorItineraries< // Integer load pipeline // // Immediate offset - InstrItinData], [4, 1]>, + InstrItinData], [4, 1]>, + InstrItinData], [4, 1]>, + InstrItinData], [4, 1]>, // // Register offset - InstrItinData], [4, 1, 1]>, + InstrItinData], [4, 1, 1]>, + InstrItinData], [4, 1, 1]>, + InstrItinData], [4, 1, 1]>, // // Scaled register offset, issues over 2 cycles - InstrItinData], [5, 2, 1]>, + InstrItinData], [5, 2, 1]>, + InstrItinData], [5, 2, 1]>, // // Immediate offset with update - InstrItinData], [4, 2, 1]>, + InstrItinData], [4, 2, 1]>, + InstrItinData], [4, 2, 1]>, // // Register offset with update - InstrItinData], [4, 2, 1, 1]>, + InstrItinData], [4, 2, 1, 1]>, + InstrItinData], [4, 2, 1, 1]>, + InstrItinData], [4, 2, 1, 1]>, // // Scaled register offset with update, issues over 2 cycles - InstrItinData], [5, 2, 2, 1]>, + InstrItinData], [5, 2, 2, 1]>, + InstrItinData], [5, 2, 2, 1]>, // // Load multiple @@ -123,23 +132,31 @@ def ARMV6Itineraries : ProcessorItineraries< // Integer store pipeline // // Immediate offset - InstrItinData], [2, 1]>, + InstrItinData], [2, 1]>, + InstrItinData], [2, 1]>, + InstrItinData], [2, 1]>, // // Register offset - InstrItinData], [2, 1, 1]>, - + InstrItinData], [2, 1, 1]>, + InstrItinData], [2, 1, 1]>, + InstrItinData], [2, 1, 1]>, // // Scaled register offset, issues over 2 cycles - InstrItinData], [2, 2, 1]>, + InstrItinData], [2, 2, 1]>, + InstrItinData], [2, 2, 1]>, // // Immediate offset with update - InstrItinData], [2, 2, 1]>, + InstrItinData], [2, 2, 1]>, + InstrItinData], [2, 2, 1]>, // // Register offset with update - InstrItinData], [2, 2, 1, 1]>, + InstrItinData], [2, 2, 1, 1]>, + InstrItinData], [2, 2, 1, 1]>, + InstrItinData], [2, 2, 1, 1]>, // // Scaled register offset with update, issues over 2 cycles - InstrItinData], [2, 2, 2, 1]>, + InstrItinData], [2, 2, 2, 1]>, + InstrItinData], [2, 2, 2, 1]>, // // Store multiple InstrItinData]>, -- 2.34.1