// The following definitions describe the simpler per-operand machine model.
// This works with MachineScheduler and will eventually replace itineraries.
+class A9WriteLMOpsListType<list<WriteSequence> writes> {
+ list <WriteSequence> Writes = writes;
+ SchedMachineModel SchedModel = ?;
+}
// Cortex-A9 machine model for scheduling and other instruction cost heuristics.
def CortexA9Model : SchedMachineModel {
let MispredictPenalty = 8; // Based on estimate of pipeline depth.
let Itineraries = CortexA9Itineraries;
+
+ // FIXME: Many vector operations were never given an itinerary. We
+ // haven't mapped these to the new model either.
+ let CompleteModel = 0;
}
//===----------------------------------------------------------------------===//
// Define each kind of processor resource and number available.
+//
+// The AGU unit has BufferSize=1 so that the latency between operations
+// that use it are considered to stall other operations.
+//
+// The FP unit has BufferSize=0 so that it is a hard dispatch
+// hazard. No instruction may be dispatched while the unit is reserved.
let SchedModel = CortexA9Model in {
def A9UnitALU : ProcResource<2>;
def A9UnitMul : ProcResource<1> { let Super = A9UnitALU; }
-def A9UnitAGU : ProcResource<1>;
+def A9UnitAGU : ProcResource<1> { let BufferSize = 1; }
def A9UnitLS : ProcResource<1>;
def A9UnitFP : ProcResource<1> { let BufferSize = 0; }
def A9UnitB : ProcResource<1>;
// Define a predicate to select the LDM based on number of memory addresses.
def A9LMAdr#NumAddr#Pred :
- SchedPredicate<"TII->getNumLDMAddresses(MI) == "#NumAddr>;
+ SchedPredicate<"(TII->getNumLDMAddresses(MI)+1)/2 == "#NumAddr>;
} // foreach NumAddr
//===----------------------------------------------------------------------===//
// LDM: Load multiple into 32-bit integer registers.
+def A9WriteLMOpsList : A9WriteLMOpsListType<
+ [A9WriteL1, A9WriteL1Hi,
+ A9WriteL2, A9WriteL2Hi,
+ A9WriteL3, A9WriteL3Hi,
+ A9WriteL4, A9WriteL4Hi,
+ A9WriteL5, A9WriteL5Hi,
+ A9WriteL6, A9WriteL6Hi,
+ A9WriteL7, A9WriteL7Hi,
+ A9WriteL8, A9WriteL8Hi]>;
+
// A9WriteLM variants expand into a pair of writes for each 64-bit
// value loaded. When the number of registers is odd, the last
// A9WriteLnHi is naturally ignored because the instruction has no
// following def operands. These variants take no issue resource, so
// they may need to be part of a WriteSequence that includes A9WriteIssue.
def A9WriteLM : SchedWriteVariant<[
- SchedVar<A9LMAdr1Pred, [A9WriteL1, A9WriteL1Hi]>,
- SchedVar<A9LMAdr2Pred, [A9WriteL1, A9WriteL1Hi,
- A9WriteL2, A9WriteL2Hi]>,
- SchedVar<A9LMAdr3Pred, [A9WriteL1, A9WriteL1Hi,
- A9WriteL2, A9WriteL2Hi,
- A9WriteL3, A9WriteL3Hi]>,
- SchedVar<A9LMAdr4Pred, [A9WriteL1, A9WriteL1Hi,
- A9WriteL2, A9WriteL2Hi,
- A9WriteL3, A9WriteL3Hi,
- A9WriteL4, A9WriteL4Hi]>,
- SchedVar<A9LMAdr5Pred, [A9WriteL1, A9WriteL1Hi,
- A9WriteL2, A9WriteL2Hi,
- A9WriteL3, A9WriteL3Hi,
- A9WriteL4, A9WriteL4Hi,
- A9WriteL5, A9WriteL5Hi]>,
- SchedVar<A9LMAdr6Pred, [A9WriteL1, A9WriteL1Hi,
- A9WriteL2, A9WriteL2Hi,
- A9WriteL3, A9WriteL3Hi,
- A9WriteL4, A9WriteL4Hi,
- A9WriteL5, A9WriteL5Hi,
- A9WriteL6, A9WriteL6Hi]>,
- SchedVar<A9LMAdr7Pred, [A9WriteL1, A9WriteL1Hi,
- A9WriteL2, A9WriteL2Hi,
- A9WriteL3, A9WriteL3Hi,
- A9WriteL4, A9WriteL4Hi,
- A9WriteL5, A9WriteL5Hi,
- A9WriteL6, A9WriteL6Hi,
- A9WriteL7, A9WriteL7Hi]>,
- SchedVar<A9LMAdr8Pred, [A9WriteL1, A9WriteL1Hi,
- A9WriteL2, A9WriteL2Hi,
- A9WriteL3, A9WriteL3Hi,
- A9WriteL4, A9WriteL4Hi,
- A9WriteL5, A9WriteL5Hi,
- A9WriteL6, A9WriteL6Hi,
- A9WriteL7, A9WriteL7Hi,
- A9WriteL8, A9WriteL8Hi]>,
+ SchedVar<A9LMAdr1Pred, A9WriteLMOpsList.Writes[0-1]>,
+ SchedVar<A9LMAdr2Pred, A9WriteLMOpsList.Writes[0-3]>,
+ SchedVar<A9LMAdr3Pred, A9WriteLMOpsList.Writes[0-5]>,
+ SchedVar<A9LMAdr4Pred, A9WriteLMOpsList.Writes[0-7]>,
+ SchedVar<A9LMAdr5Pred, A9WriteLMOpsList.Writes[0-9]>,
+ SchedVar<A9LMAdr6Pred, A9WriteLMOpsList.Writes[0-11]>,
+ SchedVar<A9LMAdr7Pred, A9WriteLMOpsList.Writes[0-13]>,
+ SchedVar<A9LMAdr8Pred, A9WriteLMOpsList.Writes[0-15]>,
// For unknown LDMs, define the maximum number of writes, but only
// make the first two consume resources.
SchedVar<A9LMUnknownPred, [A9WriteL1, A9WriteL1Hi,
// pair of writes for each 64-bit data loaded. When the number of
// registers is odd, the last WriteLMfpnHi is naturally ignored because
// the instruction has no following def operands.
+
+def A9WriteLMfpPostRAOpsList : A9WriteLMOpsListType<
+ [A9WriteLMfp1, A9WriteLMfp2, // 0-1
+ A9WriteLMfp3, A9WriteLMfp4, // 2-3
+ A9WriteLMfp5, A9WriteLMfp6, // 4-5
+ A9WriteLMfp7, A9WriteLMfp8, // 6-7
+ A9WriteLMfp1Hi, // 8-8
+ A9WriteLMfp2Hi, A9WriteLMfp2Hi, // 9-10
+ A9WriteLMfp3Hi, A9WriteLMfp3Hi, // 11-12
+ A9WriteLMfp4Hi, A9WriteLMfp4Hi, // 13-14
+ A9WriteLMfp5Hi, A9WriteLMfp5Hi, // 15-16
+ A9WriteLMfp6Hi, A9WriteLMfp6Hi, // 17-18
+ A9WriteLMfp7Hi, A9WriteLMfp7Hi, // 19-20
+ A9WriteLMfp8Hi, A9WriteLMfp8Hi]>; // 21-22
+
def A9WriteLMfpPostRA : SchedWriteVariant<[
- SchedVar<A9LMAdr1Pred, [A9WriteLMfp1, A9WriteLMfp1Hi]>,
- SchedVar<A9LMAdr2Pred, [A9WriteLMfp1, A9WriteLMfp1Hi,
- A9WriteLMfp2, A9WriteLMfp2Hi]>,
- SchedVar<A9LMAdr3Pred, [A9WriteLMfp1, A9WriteLMfp1Hi,
- A9WriteLMfp2, A9WriteLMfp2Hi,
- A9WriteLMfp3, A9WriteLMfp3Hi]>,
- SchedVar<A9LMAdr4Pred, [A9WriteLMfp1, A9WriteLMfp1Hi,
- A9WriteLMfp2, A9WriteLMfp2Hi,
- A9WriteLMfp3, A9WriteLMfp3Hi,
- A9WriteLMfp4, A9WriteLMfp4Hi]>,
- SchedVar<A9LMAdr5Pred, [A9WriteLMfp1, A9WriteLMfp1Hi,
- A9WriteLMfp2, A9WriteLMfp2Hi,
- A9WriteLMfp3, A9WriteLMfp3Hi,
- A9WriteLMfp4, A9WriteLMfp4Hi,
- A9WriteLMfp5, A9WriteLMfp5Hi]>,
- SchedVar<A9LMAdr6Pred, [A9WriteLMfp1, A9WriteLMfp1Hi,
- A9WriteLMfp2, A9WriteLMfp2Hi,
- A9WriteLMfp3, A9WriteLMfp3Hi,
- A9WriteLMfp4, A9WriteLMfp4Hi,
- A9WriteLMfp5, A9WriteLMfp5Hi,
- A9WriteLMfp6, A9WriteLMfp6Hi]>,
- SchedVar<A9LMAdr7Pred, [A9WriteLMfp1, A9WriteLMfp1Hi,
- A9WriteLMfp2, A9WriteLMfp2Hi,
- A9WriteLMfp3, A9WriteLMfp3Hi,
- A9WriteLMfp4, A9WriteLMfp4Hi,
- A9WriteLMfp5, A9WriteLMfp5Hi,
- A9WriteLMfp6, A9WriteLMfp6Hi,
- A9WriteLMfp7, A9WriteLMfp7Hi]>,
- SchedVar<A9LMAdr8Pred, [A9WriteLMfp1, A9WriteLMfp1Hi,
- A9WriteLMfp2, A9WriteLMfp2Hi,
- A9WriteLMfp3, A9WriteLMfp3Hi,
- A9WriteLMfp4, A9WriteLMfp4Hi,
- A9WriteLMfp5, A9WriteLMfp5Hi,
- A9WriteLMfp6, A9WriteLMfp6Hi,
- A9WriteLMfp7, A9WriteLMfp7Hi,
- A9WriteLMfp8, A9WriteLMfp8Hi]>,
+ SchedVar<A9LMAdr1Pred, A9WriteLMfpPostRAOpsList.Writes[0-0, 8-8]>,
+ SchedVar<A9LMAdr2Pred, A9WriteLMfpPostRAOpsList.Writes[0-1, 9-10]>,
+ SchedVar<A9LMAdr3Pred, A9WriteLMfpPostRAOpsList.Writes[0-2, 10-12]>,
+ SchedVar<A9LMAdr4Pred, A9WriteLMfpPostRAOpsList.Writes[0-3, 11-14]>,
+ SchedVar<A9LMAdr5Pred, A9WriteLMfpPostRAOpsList.Writes[0-4, 12-16]>,
+ SchedVar<A9LMAdr6Pred, A9WriteLMfpPostRAOpsList.Writes[0-5, 13-18]>,
+ SchedVar<A9LMAdr7Pred, A9WriteLMfpPostRAOpsList.Writes[0-6, 14-20]>,
+ SchedVar<A9LMAdr8Pred, A9WriteLMfpPostRAOpsList.Writes[0-7, 15-22]>,
// For unknown LDMs, define the maximum number of writes, but only
- // make the first two consume resources.
- SchedVar<A9LMUnknownPred, [A9WriteLMfp1, A9WriteLMfp1Hi,
- A9WriteLMfp2, A9WriteLMfp2Hi,
- A9WriteLMfp3Hi, A9WriteLMfp3Hi,
- A9WriteLMfp4Hi, A9WriteLMfp4Hi,
+ // make the first two consume resources. We are optimizing for the case
+ // where the operands are DPRs, and this determines the first eight
+ // types. The remaining eight types are filled to cover the case
+ // where the operands are SPRs.
+ SchedVar<A9LMUnknownPred, [A9WriteLMfp1, A9WriteLMfp2,
+ A9WriteLMfp3Hi, A9WriteLMfp4Hi,
+ A9WriteLMfp5Hi, A9WriteLMfp6Hi,
+ A9WriteLMfp7Hi, A9WriteLMfp8Hi,
A9WriteLMfp5Hi, A9WriteLMfp5Hi,
A9WriteLMfp6Hi, A9WriteLMfp6Hi,
A9WriteLMfp7Hi, A9WriteLMfp7Hi,
SchedVar<A9PostRA, [A9WriteLMfpPostRA]>]>;
//===----------------------------------------------------------------------===//
-// Resources for other (non LDM/VLDM) Variants.
+// Resources for other (non-LDM/VLDM) Variants.
// These mov immediate writers are unconditionally expanded with
// additive latency.
// ...
// VHADD/VRHADD/VQADD/VTST/VADH/VRADH
def :ItinRW<[A9WriteV4, A9Read2, A9Read2], [IIC_VBINi4D, IIC_VBINi4Q]>;
+
// VSBH/VRSBH/VHSUB/VQSUB/VABD/VCEQ/VCGE/VCGT/VMAX/VMIN/VPMAX/VPMIN/VABDL
def :ItinRW<[A9WriteV4, A9Read2], [IIC_VSUBi4D, IIC_VSUBi4Q]>;
// VQNEG/VQABS
def :ItinRW<[A9WriteV4], [IIC_VSHLi4D, IIC_VSHLi4Q]>;
// NEON permute
-def :ItinRW<[A9WriteV2], [IIC_VPERMD, IIC_VPERMQ, IIC_VEXTD]>;
+def :ItinRW<[A9WriteV2, A9WriteV2], [IIC_VPERMD, IIC_VPERMQ, IIC_VEXTD]>;
def :ItinRW<[A9WriteV3, A9WriteV4, ReadDefault, A9Read2],
[IIC_VPERMQ3, IIC_VEXTQ]>;
def :ItinRW<[A9WriteV3, A9Read2], [IIC_VTB1]>;