//===----------------------------------------------------------------------===//
//
// This file describes the structures used for instruction itineraries and
-// states. This is used by schedulers to determine instruction states and
+// stages. This is used by schedulers to determine instruction stages and
// latencies.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_TARGET_TARGETINSTRITINERARIES_H
#define LLVM_TARGET_TARGETINSTRITINERARIES_H
+#include <algorithm>
+
namespace llvm {
//===----------------------------------------------------------------------===//
-/// Instruction stage - These values represent a step in the execution of an
-/// instruction. The latency represents the number of discrete time slots
-/// needed to complete the stage. Units represent the choice of functional
-/// units that can be used to complete the stage. Eg. IntUnit1, IntUnit2.
+/// Instruction stage - These values represent a non-pipelined step in
+/// the execution of an instruction. Cycles represents the number of
+/// discrete time slots needed to complete the stage. Units represent
+/// the choice of functional units that can be used to complete the
+/// stage. Eg. IntUnit1, IntUnit2. NextCycles indicates how many
+/// cycles should elapse from the start of this stage to the start of
+/// the next stage in the itinerary. A value of -1 indicates that the
+/// next stage should start immediately after the current one.
+/// For example:
+///
+/// { 1, x, -1 }
+/// indicates that the stage occupies FU x for 1 cycle and that
+/// the next stage starts immediately after this one.
+///
+/// { 2, x|y, 1 }
+/// indicates that the stage occupies either FU x or FU y for 2
+/// consecuative cycles and that the next stage starts one cycle
+/// after this stage starts. That is, the stage requirements
+/// overlap in time.
+///
+/// { 1, x, 0 }
+/// indicates that the stage occupies FU x for 1 cycle and that
+/// the next stage starts in this same cycle. This can be used to
+/// indicate that the instruction requires multiple stages at the
+/// same time.
///
struct InstrStage {
- unsigned Cycles; ///< Length of stage in machine cycles
- unsigned Units; ///< Choice of functional units
+ unsigned Cycles_; ///< Length of stage in machine cycles
+ unsigned Units_; ///< Choice of functional units
+ int NextCycles_; ///< Number of machine cycles to next stage
+
+ /// getCycles - returns the number of cycles the stage is occupied
+ unsigned getCycles() const {
+ return Cycles_;
+ }
+
+ /// getUnits - returns the choice of FUs
+ unsigned getUnits() const {
+ return Units_;
+ }
+
+ /// getNextCycles - returns the number of cycles from the start of
+ /// this stage to the start of the next stage in the itinerary
+ unsigned getNextCycles() const {
+ return (NextCycles_ >= 0) ? (unsigned)NextCycles_ : Cycles_;
+ }
};
if (isEmpty())
return 1;
- // Just sum the cycle count for each stage. The assumption is that all
- // inputs are consumed at the start of the first stage and that all
- // outputs are produced at the end of the last stage.
- unsigned Latency = 0;
+ // Caclulate the maximum completion time for any stage. The
+ // assumption is that all inputs are consumed at the start of the
+ // first stage and that all outputs are produced at the end of the
+ // latest completing last stage.
+ unsigned Latency = 0, StartCycle = 0;
for (const InstrStage *IS = begin(ItinClassIndx), *E = end(ItinClassIndx);
- IS != E; ++IS)
- Latency += IS->Cycles;
+ IS != E; ++IS) {
+ Latency = std::max(Latency, StartCycle + IS->getCycles());
+ StartCycle += IS->getNextCycles();
+ }
+
return Latency;
}
};
class FuncUnit;
//===----------------------------------------------------------------------===//
-// Instruction stage - These values represent a step in the execution of an
-// instruction. The latency represents the number of discrete time slots used
-// need to complete the stage. Units represent the choice of functional units
-// that can be used to complete the stage. Eg. IntUnit1, IntUnit2.
+// Instruction stage - These values represent a non-pipelined step in
+// the execution of an instruction. Cycles represents the number of
+// discrete time slots needed to complete the stage. Units represent
+// the choice of functional units that can be used to complete the
+// stage. Eg. IntUnit1, IntUnit2. NextCycles indicates how many
+// cycles should elapse from the start of this stage to the start of
+// the next stage in the itinerary. For example:
//
-class InstrStage<int cycles, list<FuncUnit> units> {
+// A stage is specified in one of two ways:
+//
+// InstrStage<1, [FU_x, FU_y]> - TimeInc defaults to Cycles
+// InstrStage<1, [FU_x, FU_y], 0> - TimeInc explicit
+//
+class InstrStage<int cycles, list<FuncUnit> units, int timeinc = -1> {
int Cycles = cycles; // length of stage in machine cycles
list<FuncUnit> Units = units; // choice of functional units
+ int TimeInc = timeinc; // cycles till start of next stage
}
//===----------------------------------------------------------------------===//
// If the begin stage of an itinerary has 0 cycles and units,
// then we have reached the end of the itineraries.
const InstrStage *IS = ItinData.begin(idx), *E = ItinData.end(idx);
- if ((IS->Cycles == 0) && (IS->Units == 0))
+ if ((IS->getCycles() == 0) && (IS->getUnits() == 0))
break;
unsigned ItinDepth = 0;
for (; IS != E; ++IS)
- ItinDepth += std::max(1U, IS->Cycles);
+ ItinDepth += IS->getCycles();
ScoreboardDepth = std::max(ScoreboardDepth, ItinDepth);
}
unsigned idx = SU->getInstr()->getDesc().getSchedClass();
for (const InstrStage *IS = ItinData.begin(idx), *E = ItinData.end(idx);
IS != E; ++IS) {
- // If the stages cycles are 0, then we must have the FU free in
- // the current cycle, but we don't advance the cycle time .
- unsigned StageCycles = std::max(1U, IS->Cycles);
-
// We must find one of the stage's units free for every cycle the
- // stage is occupied.
- for (unsigned int i = 0; i < StageCycles; ++i) {
- assert((cycle < ScoreboardDepth) && "Scoreboard depth exceeded!");
-
- unsigned index = getFutureIndex(cycle);
- unsigned freeUnits = IS->Units & ~Scoreboard[index];
+ // stage is occupied. FIXME it would be more accurate to find the
+ // same unit free in all the cycles.
+ for (unsigned int i = 0; i < IS->getCycles(); ++i) {
+ assert(((cycle + i) < ScoreboardDepth) &&
+ "Scoreboard depth exceeded!");
+
+ unsigned index = getFutureIndex(cycle + i);
+ unsigned freeUnits = IS->getUnits() & ~Scoreboard[index];
if (!freeUnits) {
- DEBUG(errs() << "*** Hazard in cycle " << cycle << ", ");
+ DEBUG(errs() << "*** Hazard in cycle " << (cycle + i) << ", ");
DEBUG(errs() << "SU(" << SU->NodeNum << "): ");
DEBUG(SU->getInstr()->dump());
return Hazard;
}
-
- if (IS->Cycles > 0)
- ++cycle;
}
+
+ // Advance the cycle to the next stage.
+ cycle += IS->getNextCycles();
}
return NoHazard;
unsigned idx = SU->getInstr()->getDesc().getSchedClass();
for (const InstrStage *IS = ItinData.begin(idx), *E = ItinData.end(idx);
IS != E; ++IS) {
- // If the stages cycles are 0, then we must reserve the FU in the
- // current cycle, but we don't advance the cycle time .
- unsigned StageCycles = std::max(1U, IS->Cycles);
-
// We must reserve one of the stage's units for every cycle the
- // stage is occupied.
- for (unsigned int i = 0; i < StageCycles; ++i) {
- assert((cycle < ScoreboardDepth) && "Scoreboard depth exceeded!");
-
- unsigned index = getFutureIndex(cycle);
- unsigned freeUnits = IS->Units & ~Scoreboard[index];
+ // stage is occupied. FIXME it would be more accurate to reserve
+ // the same unit free in all the cycles.
+ for (unsigned int i = 0; i < IS->getCycles(); ++i) {
+ assert(((cycle + i) < ScoreboardDepth) &&
+ "Scoreboard depth exceeded!");
+
+ unsigned index = getFutureIndex(cycle + i);
+ unsigned freeUnits = IS->getUnits() & ~Scoreboard[index];
// reduce to a single unit
unsigned freeUnit = 0;
assert(freeUnit && "No function unit available!");
Scoreboard[index] |= freeUnit;
-
- if (IS->Cycles > 0)
- ++cycle;
}
+
+ // Advance the cycle to the next stage.
+ cycle += IS->getNextCycles();
}
DEBUG(dumpScoreboard());
[]>;
// On non-Darwin platforms R9 is callee-saved.
-let isCall = 1, Itinerary = IIC_Br,
+let isCall = 1,
Defs = [R0, R1, R2, R3, R12, LR,
D0, D1, D2, D3, D4, D5, D6, D7,
D16, D17, D18, D19, D20, D21, D22, D23,
}
// On Darwin R9 is call-clobbered.
-let isCall = 1, Itinerary = IIC_Br,
+let isCall = 1,
Defs = [R0, R1, R2, R3, R9, R12, LR,
D0, D1, D2, D3, D4, D5, D6, D7,
D16, D17, D18, D19, D20, D21, D22, D23,
}
}
-let isBranch = 1, isTerminator = 1, Itinerary = IIC_Br in {
+let isBranch = 1, isTerminator = 1 in {
// B is "predicable" since it can be xformed into a Bcc.
let isBarrier = 1 in {
let isPredicable = 1 in
BinOpFrag<(and node:$LHS, (not node:$RHS))>>;
def BFC : I<(outs GPR:$dst), (ins GPR:$src, bf_inv_mask_imm:$imm),
- AddrMode1, Size4Bytes, IndexModeNone, DPFrm, NoItinerary,
+ AddrMode1, Size4Bytes, IndexModeNone, DPFrm, IIC_iALU,
"bfc", " $dst, $imm", "$src = $dst",
[(set GPR:$dst, (and GPR:$src, bf_inv_mask_imm:$imm))]>,
Requires<[IsARM, HasV6T2]> {
//
let isCommutable = 1 in
-def MUL : AsMul1I<0b0000000, (outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALU,
+def MUL : AsMul1I<0b0000000, (outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMPY,
"mul", " $dst, $a, $b",
[(set GPR:$dst, (mul GPR:$a, GPR:$b))]>;
def MLA : AsMul1I<0b0000001, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c),
- IIC_iALU, "mla", " $dst, $a, $b, $c",
+ IIC_iMPY, "mla", " $dst, $a, $b, $c",
[(set GPR:$dst, (add (mul GPR:$a, GPR:$b), GPR:$c))]>;
def MLS : AMul1I<0b0000011, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c),
- IIC_iALU, "mls", " $dst, $a, $b, $c",
+ IIC_iMPY, "mls", " $dst, $a, $b, $c",
[(set GPR:$dst, (sub GPR:$c, (mul GPR:$a, GPR:$b)))]>,
Requires<[IsARM, HasV6T2]>;
let neverHasSideEffects = 1 in {
let isCommutable = 1 in {
def SMULL : AsMul1I<0b0000110, (outs GPR:$ldst, GPR:$hdst),
- (ins GPR:$a, GPR:$b), IIC_iALU,
+ (ins GPR:$a, GPR:$b), IIC_iMPY,
"smull", " $ldst, $hdst, $a, $b", []>;
def UMULL : AsMul1I<0b0000100, (outs GPR:$ldst, GPR:$hdst),
- (ins GPR:$a, GPR:$b), IIC_iALU,
+ (ins GPR:$a, GPR:$b), IIC_iMPY,
"umull", " $ldst, $hdst, $a, $b", []>;
}
// Multiply + accumulate
def SMLAL : AsMul1I<0b0000111, (outs GPR:$ldst, GPR:$hdst),
- (ins GPR:$a, GPR:$b), IIC_iALU,
+ (ins GPR:$a, GPR:$b), IIC_iMPY,
"smlal", " $ldst, $hdst, $a, $b", []>;
def UMLAL : AsMul1I<0b0000101, (outs GPR:$ldst, GPR:$hdst),
- (ins GPR:$a, GPR:$b), IIC_iALU,
+ (ins GPR:$a, GPR:$b), IIC_iMPY,
"umlal", " $ldst, $hdst, $a, $b", []>;
def UMAAL : AMul1I <0b0000010, (outs GPR:$ldst, GPR:$hdst),
- (ins GPR:$a, GPR:$b), IIC_iALU,
+ (ins GPR:$a, GPR:$b), IIC_iMPY,
"umaal", " $ldst, $hdst, $a, $b", []>,
Requires<[IsARM, HasV6]>;
} // neverHasSideEffects
// Most significant word multiply
def SMMUL : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
- IIC_iALU, "smmul", " $dst, $a, $b",
+ IIC_iMPY, "smmul", " $dst, $a, $b",
[(set GPR:$dst, (mulhs GPR:$a, GPR:$b))]>,
Requires<[IsARM, HasV6]> {
let Inst{7-4} = 0b0001;
}
def SMMLA : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c),
- IIC_iALU, "smmla", " $dst, $a, $b, $c",
+ IIC_iMPY, "smmla", " $dst, $a, $b, $c",
[(set GPR:$dst, (add (mulhs GPR:$a, GPR:$b), GPR:$c))]>,
Requires<[IsARM, HasV6]> {
let Inst{7-4} = 0b0001;
def SMMLS : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c),
- IIC_iALU, "smmls", " $dst, $a, $b, $c",
+ IIC_iMPY, "smmls", " $dst, $a, $b, $c",
[(set GPR:$dst, (sub GPR:$c, (mulhs GPR:$a, GPR:$b)))]>,
Requires<[IsARM, HasV6]> {
let Inst{7-4} = 0b1101;
multiclass AI_smul<string opc, PatFrag opnode> {
def BB : AMulxyI<0b0001011, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
- IIC_iALU, !strconcat(opc, "bb"), " $dst, $a, $b",
+ IIC_iMPY, !strconcat(opc, "bb"), " $dst, $a, $b",
[(set GPR:$dst, (opnode (sext_inreg GPR:$a, i16),
(sext_inreg GPR:$b, i16)))]>,
Requires<[IsARM, HasV5TE]> {
}
def BT : AMulxyI<0b0001011, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
- IIC_iALU, !strconcat(opc, "bt"), " $dst, $a, $b",
+ IIC_iMPY, !strconcat(opc, "bt"), " $dst, $a, $b",
[(set GPR:$dst, (opnode (sext_inreg GPR:$a, i16),
(sra GPR:$b, (i32 16))))]>,
Requires<[IsARM, HasV5TE]> {
}
def TB : AMulxyI<0b0001011, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
- IIC_iALU, !strconcat(opc, "tb"), " $dst, $a, $b",
+ IIC_iMPY, !strconcat(opc, "tb"), " $dst, $a, $b",
[(set GPR:$dst, (opnode (sra GPR:$a, (i32 16)),
(sext_inreg GPR:$b, i16)))]>,
Requires<[IsARM, HasV5TE]> {
}
def TT : AMulxyI<0b0001011, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
- IIC_iALU, !strconcat(opc, "tt"), " $dst, $a, $b",
+ IIC_iMPY, !strconcat(opc, "tt"), " $dst, $a, $b",
[(set GPR:$dst, (opnode (sra GPR:$a, (i32 16)),
(sra GPR:$b, (i32 16))))]>,
Requires<[IsARM, HasV5TE]> {
}
def WB : AMulxyI<0b0001001, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
- IIC_iALU, !strconcat(opc, "wb"), " $dst, $a, $b",
+ IIC_iMPY, !strconcat(opc, "wb"), " $dst, $a, $b",
[(set GPR:$dst, (sra (opnode GPR:$a,
(sext_inreg GPR:$b, i16)), (i32 16)))]>,
Requires<[IsARM, HasV5TE]> {
}
def WT : AMulxyI<0b0001001, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
- IIC_iALU, !strconcat(opc, "wt"), " $dst, $a, $b",
+ IIC_iMPY, !strconcat(opc, "wt"), " $dst, $a, $b",
[(set GPR:$dst, (sra (opnode GPR:$a,
(sra GPR:$b, (i32 16))), (i32 16)))]>,
Requires<[IsARM, HasV5TE]> {
multiclass AI_smla<string opc, PatFrag opnode> {
def BB : AMulxyI<0b0001000, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc),
- IIC_iALU, !strconcat(opc, "bb"), " $dst, $a, $b, $acc",
+ IIC_iMPY, !strconcat(opc, "bb"), " $dst, $a, $b, $acc",
[(set GPR:$dst, (add GPR:$acc,
(opnode (sext_inreg GPR:$a, i16),
(sext_inreg GPR:$b, i16))))]>,
}
def BT : AMulxyI<0b0001000, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc),
- IIC_iALU, !strconcat(opc, "bt"), " $dst, $a, $b, $acc",
+ IIC_iMPY, !strconcat(opc, "bt"), " $dst, $a, $b, $acc",
[(set GPR:$dst, (add GPR:$acc, (opnode (sext_inreg GPR:$a, i16),
(sra GPR:$b, (i32 16)))))]>,
Requires<[IsARM, HasV5TE]> {
}
def TB : AMulxyI<0b0001000, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc),
- IIC_iALU, !strconcat(opc, "tb"), " $dst, $a, $b, $acc",
+ IIC_iMPY, !strconcat(opc, "tb"), " $dst, $a, $b, $acc",
[(set GPR:$dst, (add GPR:$acc, (opnode (sra GPR:$a, (i32 16)),
(sext_inreg GPR:$b, i16))))]>,
Requires<[IsARM, HasV5TE]> {
}
def TT : AMulxyI<0b0001000, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc),
- IIC_iALU, !strconcat(opc, "tt"), " $dst, $a, $b, $acc",
+ IIC_iMPY, !strconcat(opc, "tt"), " $dst, $a, $b, $acc",
[(set GPR:$dst, (add GPR:$acc, (opnode (sra GPR:$a, (i32 16)),
(sra GPR:$b, (i32 16)))))]>,
Requires<[IsARM, HasV5TE]> {
}
def WB : AMulxyI<0b0001001, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc),
- IIC_iALU, !strconcat(opc, "wb"), " $dst, $a, $b, $acc",
+ IIC_iMPY, !strconcat(opc, "wb"), " $dst, $a, $b, $acc",
[(set GPR:$dst, (add GPR:$acc, (sra (opnode GPR:$a,
(sext_inreg GPR:$b, i16)), (i32 16))))]>,
Requires<[IsARM, HasV5TE]> {
}
def WT : AMulxyI<0b0001001, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc),
- IIC_iALU, !strconcat(opc, "wt"), " $dst, $a, $b, $acc",
+ IIC_iMPY, !strconcat(opc, "wt"), " $dst, $a, $b, $acc",
[(set GPR:$dst, (add GPR:$acc, (sra (opnode GPR:$a,
(sra GPR:$b, (i32 16))), (i32 16))))]>,
Requires<[IsARM, HasV5TE]> {
// multiply register
let isCommutable = 1 in
-def tMUL : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iALU,
+def tMUL : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iMPY,
"mul", " $dst, $rhs",
[(set tGPR:$dst, (mul tGPR:$lhs, tGPR:$rhs))]>;
// Multiply Instructions.
//
let isCommutable = 1 in
-def t2MUL: T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALU,
+def t2MUL: T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMPY,
"mul", " $dst, $a, $b",
[(set GPR:$dst, (mul GPR:$a, GPR:$b))]>;
-def t2MLA: T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iALU,
+def t2MLA: T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMPY,
"mla", " $dst, $a, $b, $c",
[(set GPR:$dst, (add (mul GPR:$a, GPR:$b), GPR:$c))]>;
-def t2MLS: T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iALU,
+def t2MLS: T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMPY,
"mls", " $dst, $a, $b, $c",
[(set GPR:$dst, (sub GPR:$c, (mul GPR:$a, GPR:$b)))]>;
// Extra precision multiplies with low / high results
let neverHasSideEffects = 1 in {
let isCommutable = 1 in {
-def t2SMULL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iALU,
+def t2SMULL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMPY,
"smull", " $ldst, $hdst, $a, $b", []>;
-def t2UMULL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iALU,
+def t2UMULL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMPY,
"umull", " $ldst, $hdst, $a, $b", []>;
}
// Multiply + accumulate
-def t2SMLAL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iALU,
+def t2SMLAL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMPY,
"smlal", " $ldst, $hdst, $a, $b", []>;
-def t2UMLAL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iALU,
+def t2UMLAL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMPY,
"umlal", " $ldst, $hdst, $a, $b", []>;
-def t2UMAAL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iALU,
+def t2UMAAL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMPY,
"umaal", " $ldst, $hdst, $a, $b", []>;
} // neverHasSideEffects
// Most significant word multiply
-def t2SMMUL : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALU,
+def t2SMMUL : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMPY,
"smmul", " $dst, $a, $b",
[(set GPR:$dst, (mulhs GPR:$a, GPR:$b))]>;
-def t2SMMLA : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iALU,
+def t2SMMLA : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMPY,
"smmla", " $dst, $a, $b, $c",
[(set GPR:$dst, (add (mulhs GPR:$a, GPR:$b), GPR:$c))]>;
-def t2SMMLS : T2I <(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iALU,
+def t2SMMLS : T2I <(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMPY,
"smmls", " $dst, $a, $b, $c",
[(set GPR:$dst, (sub GPR:$c, (mulhs GPR:$a, GPR:$b)))]>;
multiclass T2I_smul<string opc, PatFrag opnode> {
- def BB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALU,
+ def BB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMPY,
!strconcat(opc, "bb"), " $dst, $a, $b",
[(set GPR:$dst, (opnode (sext_inreg GPR:$a, i16),
(sext_inreg GPR:$b, i16)))]>;
- def BT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALU,
+ def BT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMPY,
!strconcat(opc, "bt"), " $dst, $a, $b",
[(set GPR:$dst, (opnode (sext_inreg GPR:$a, i16),
(sra GPR:$b, (i32 16))))]>;
- def TB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALU,
+ def TB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMPY,
!strconcat(opc, "tb"), " $dst, $a, $b",
[(set GPR:$dst, (opnode (sra GPR:$a, (i32 16)),
(sext_inreg GPR:$b, i16)))]>;
- def TT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALU,
+ def TT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMPY,
!strconcat(opc, "tt"), " $dst, $a, $b",
[(set GPR:$dst, (opnode (sra GPR:$a, (i32 16)),
(sra GPR:$b, (i32 16))))]>;
- def WB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALU,
+ def WB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMPY,
!strconcat(opc, "wb"), " $dst, $a, $b",
[(set GPR:$dst, (sra (opnode GPR:$a,
(sext_inreg GPR:$b, i16)), (i32 16)))]>;
- def WT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALU,
+ def WT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMPY,
!strconcat(opc, "wt"), " $dst, $a, $b",
[(set GPR:$dst, (sra (opnode GPR:$a,
(sra GPR:$b, (i32 16))), (i32 16)))]>;
multiclass T2I_smla<string opc, PatFrag opnode> {
- def BB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iALU,
+ def BB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMPY,
!strconcat(opc, "bb"), " $dst, $a, $b, $acc",
[(set GPR:$dst, (add GPR:$acc,
(opnode (sext_inreg GPR:$a, i16),
(sext_inreg GPR:$b, i16))))]>;
- def BT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iALU,
+ def BT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMPY,
!strconcat(opc, "bt"), " $dst, $a, $b, $acc",
[(set GPR:$dst, (add GPR:$acc, (opnode (sext_inreg GPR:$a, i16),
(sra GPR:$b, (i32 16)))))]>;
- def TB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iALU,
+ def TB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMPY,
!strconcat(opc, "tb"), " $dst, $a, $b, $acc",
[(set GPR:$dst, (add GPR:$acc, (opnode (sra GPR:$a, (i32 16)),
(sext_inreg GPR:$b, i16))))]>;
- def TT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iALU,
+ def TT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMPY,
!strconcat(opc, "tt"), " $dst, $a, $b, $acc",
[(set GPR:$dst, (add GPR:$acc, (opnode (sra GPR:$a, (i32 16)),
(sra GPR:$b, (i32 16)))))]>;
- def WB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iALU,
+ def WB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMPY,
!strconcat(opc, "wb"), " $dst, $a, $b, $acc",
[(set GPR:$dst, (add GPR:$acc, (sra (opnode GPR:$a,
(sext_inreg GPR:$b, i16)), (i32 16))))]>;
- def WT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iALU,
+ def WT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMPY,
!strconcat(opc, "wt"), " $dst, $a, $b, $acc",
[(set GPR:$dst, (add GPR:$acc, (sra (opnode GPR:$a,
(sra GPR:$b, (i32 16))), (i32 16))))]>;
// two fully-pipelined integer ALU pipelines
InstrItinData<IIC_iALU , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>]>,
// one fully-pipelined integer Multiply pipeline
- // function units are used in alpha order, so use FU_Pipe1
- // for the Multiple pipeline
- InstrItinData<IIC_iMPY , [InstrStage<1, [FU_Pipe1]>]>,
+ // function units are reserved by the scheduler in reverse alpha order,
+ // so use FU_Pipe0 for the Multiple pipeline
+ InstrItinData<IIC_iMPY , [InstrStage<1, [FU_Pipe0]>]>,
// loads have an extra cycle of latency, but are fully pipelined
- // use a 0 cycle FU_Issue to enforce the 1 load/store per cycle limit
- InstrItinData<IIC_iLoad , [InstrStage<0, [FU_Issue]>,
+ // use FU_Issue to enforce the 1 load/store per cycle limit
+ InstrItinData<IIC_iLoad , [InstrStage<1, [FU_Issue], 0>,
InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<1, [FU_LdSt0]>]>,
// fully-pipelined stores
- // use a 0 cycle FU_Issue to enforce the 1 load/store per cycle limit
- InstrItinData<IIC_iStore , [InstrStage<0, [FU_Issue]>,
+ // use FU_Issue to enforce the 1 load/store per cycle limit
+ InstrItinData<IIC_iStore , [InstrStage<1, [FU_Issue], 0>,
InstrStage<1, [FU_Pipe0, FU_Pipe1]>]>,
// no delay slots, so the latency of a branch is unimportant
InstrItinData<IIC_Br , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>]>,
- // VFP ALU is not pipelined so stall all issues
- // FIXME assume NFP pipeline and 7 cycle non-pipelined latency
- InstrItinData<IIC_fpALU , [InstrStage<7, [FU_Pipe0, FU_Pipe1]>]>,
+ // NFP ALU is not pipelined so stall all issues
+ InstrItinData<IIC_fpALU , [InstrStage<7, [FU_Pipe0], 0>,
+ InstrStage<7, [FU_Pipe1], 0>]>,
// VFP MPY is not pipelined so stall all issues
- // FIXME assume NFP pipeline and 7 cycle non-pipelined latency
- InstrItinData<IIC_fpMPY , [InstrStage<7, [FU_Pipe0, FU_Pipe1]>]>,
+ InstrItinData<IIC_fpMPY , [InstrStage<7, [FU_Pipe0], 0>,
+ InstrStage<7, [FU_Pipe1], 0>]>,
// loads have an extra cycle of latency, but are fully pipelined
- // use a 0 cycle FU_Issue to enforce the 1 load/store per cycle limit
- InstrItinData<IIC_fpLoad , [InstrStage<0, [FU_Issue]>,
+ // use FU_Issue to enforce the 1 load/store per cycle limit
+ InstrItinData<IIC_fpLoad , [InstrStage<1, [FU_Issue], 0>,
InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<1, [FU_LdSt0]>]>,
- // use a 0 cycle FU_Issue to enforce the 1 load/store per cycle limit
- InstrItinData<IIC_fpStore , [InstrStage<0, [FU_Issue]>,
+ // use FU_Issue to enforce the 1 load/store per cycle limit
+ InstrItinData<IIC_fpStore , [InstrStage<1, [FU_Issue], 0>,
InstrStage<1, [FU_Pipe0, FU_Pipe1]>]>
]>;
// Next stage
const Record *Stage = StageList[i];
- // Form string as ,{ cycles, u1 | u2 | ... | un }
+ // Form string as ,{ cycles, u1 | u2 | ... | un, timeinc }
int Cycles = Stage->getValueAsInt("Cycles");
ItinString += " { " + itostr(Cycles) + ", ";
if (++j < M) ItinString += " | ";
}
+ int TimeInc = Stage->getValueAsInt("TimeInc");
+ ItinString += ", " + itostr(TimeInc);
+
// Close off stage
ItinString += " }";
if (++i < N) ItinString += ", ";
// Begin stages table
OS << "static const llvm::InstrStage Stages[] = {\n"
- " { 0, 0 }, // No itinerary\n";
+ " { 0, 0, 0 }, // No itinerary\n";
unsigned StageCount = 1;
unsigned ItinEnum = 1;
// If new itinerary
if (Find == 0) {
- // Emit as { cycles, u1 | u2 | ... | un }, // index
+ // Emit as { cycles, u1 | u2 | ... | un, timeinc }, // index
OS << ItinString << ", // " << ItinEnum << "\n";
// Record Itin class number.
ItinMap[ItinString] = Find = StageCount;
}
// Closing stage
- OS << " { 0, 0 } // End itinerary\n";
+ OS << " { 0, 0, 0 } // End itinerary\n";
// End stages table
OS << "};\n";