delete the fixme too! :)
[oota-llvm.git] / lib / Target / ARM / ARMScheduleV7.td
index 78537a515ff65e0b5d0f288f802e15472763877d..bf5858171de3044d1707f595e060ef40aa2961d7 100644 (file)
 //
 //===----------------------------------------------------------------------===//
 
+//
+// Scheduling information derived from "Cortex-A8 Technical Reference Manual".
+//
 // Dual issue pipeline so every itinerary starts with FU_Pipe0 | FU_Pipe1
+//
 def CortexA8Itineraries : ProcessorItineraries<[
-  // two fully-pipelined integer ALU pipelines
-  InstrItinData<IIC_iALU    , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>]>,
-  // integer Multiply pipeline
-  InstrItinData<IIC_iMPYh   , [InstrStage<1, [FU_Pipe0]>]>,
-  InstrItinData<IIC_iMPYw   , [InstrStage<1, [FU_Pipe1], 0>, 
-                               InstrStage<2, [FU_Pipe0]>]>,
-  InstrItinData<IIC_iMPYl   , [InstrStage<2, [FU_Pipe1], 0>, 
-                               InstrStage<3, [FU_Pipe0]>]>,
+
+  // Two fully-pipelined integer ALU pipelines
+  //
+  // No operand cycles
+  InstrItinData<IIC_iALUx    , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>]>,
+  //
+  // Binary Instructions that produce a result
+  InstrItinData<IIC_iALUi    , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2, 2]>,
+  InstrItinData<IIC_iALUr    , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2, 2, 2]>,
+  InstrItinData<IIC_iALUsi   , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2, 2, 1]>,
+  InstrItinData<IIC_iALUsr   , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2, 2, 1, 1]>,
+  //
+  // Unary Instructions that produce a result
+  InstrItinData<IIC_iUNAr    , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2, 2]>,
+  InstrItinData<IIC_iUNAsi   , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2, 1]>,
+  InstrItinData<IIC_iUNAsr   , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2, 1, 1]>,
+  //
+  // Compare instructions
+  InstrItinData<IIC_iCMPi    , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2]>,
+  InstrItinData<IIC_iCMPr    , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2, 2]>,
+  InstrItinData<IIC_iCMPsi   , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2, 1]>,
+  InstrItinData<IIC_iCMPsr   , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2, 1, 1]>,
+  //
+  // Move instructions, unconditional
+  InstrItinData<IIC_iMOVi    , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [1]>,
+  InstrItinData<IIC_iMOVr    , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [1, 1]>,
+  InstrItinData<IIC_iMOVsi   , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [1, 1]>,
+  InstrItinData<IIC_iMOVsr   , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [1, 1, 1]>,
+  //
+  // Move instructions, conditional
+  InstrItinData<IIC_iCMOVi   , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2]>,
+  InstrItinData<IIC_iCMOVr   , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2, 1]>,
+  InstrItinData<IIC_iCMOVsi  , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2, 1]>,
+  InstrItinData<IIC_iCMOVsr  , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2, 1, 1]>,
+
+  // Integer multiply pipeline
+  // Result written in E5, but that is relative to the last cycle of multicycle,
+  // so we use 6 for those cases
+  //
+  InstrItinData<IIC_iMUL16   , [InstrStage<1, [FU_Pipe0]>], [5, 1, 1]>,
+  InstrItinData<IIC_iMAC16   , [InstrStage<1, [FU_Pipe1], 0>, 
+                                InstrStage<2, [FU_Pipe0]>], [6, 1, 1, 4]>,
+  InstrItinData<IIC_iMUL32   , [InstrStage<1, [FU_Pipe1], 0>, 
+                                InstrStage<2, [FU_Pipe0]>], [6, 1, 1]>,
+  InstrItinData<IIC_iMAC32   , [InstrStage<1, [FU_Pipe1], 0>, 
+                                InstrStage<2, [FU_Pipe0]>], [6, 1, 1, 4]>,
+  InstrItinData<IIC_iMUL64   , [InstrStage<2, [FU_Pipe1], 0>, 
+                                InstrStage<3, [FU_Pipe0]>], [6, 6, 1, 1]>,
+  InstrItinData<IIC_iMAC64   , [InstrStage<2, [FU_Pipe1], 0>, 
+                                InstrStage<3, [FU_Pipe0]>], [6, 6, 1, 1]>,
+  
+  // Integer load pipeline
+  //
   // loads have an extra cycle of latency, but are fully pipelined
   // use FU_Issue to enforce the 1 load/store per cycle limit
-  InstrItinData<IIC_iLoad   , [InstrStage<1, [FU_Issue], 0>, 
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
-                               InstrStage<1, [FU_LdSt0]>]>,
-  // fully-pipelined stores
+  //
+  // Immediate offset
+  InstrItinData<IIC_iLoadi   , [InstrStage<1, [FU_Issue], 0>,
+                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                                InstrStage<1, [FU_LdSt0]>], [3, 1]>,
+  //
+  // Register offset
+  InstrItinData<IIC_iLoadr   , [InstrStage<1, [FU_Issue], 0>,
+                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                                InstrStage<1, [FU_LdSt0]>], [3, 1, 1]>,
+  //
+  // Scaled register offset, issues over 2 cycles
+  InstrItinData<IIC_iLoadsi  , [InstrStage<2, [FU_Issue], 0>,
+                                InstrStage<1, [FU_Pipe0], 0>,
+                                InstrStage<1, [FU_Pipe1], 0>,
+                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                                InstrStage<1, [FU_LdSt0]>], [4, 1, 1]>,
+  //
+  // Immediate offset with update
+  InstrItinData<IIC_iLoadiu  , [InstrStage<1, [FU_Issue], 0>,
+                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                                InstrStage<1, [FU_LdSt0]>], [3, 2, 1]>,
+  //
+  // Register offset with update
+  InstrItinData<IIC_iLoadru  , [InstrStage<1, [FU_Issue], 0>,
+                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                                InstrStage<1, [FU_LdSt0]>], [3, 2, 1, 1]>,
+  //
+  // Scaled register offset with update, issues over 2 cycles
+  InstrItinData<IIC_iLoadsiu , [InstrStage<2, [FU_Issue], 0>,
+                                InstrStage<1, [FU_Pipe0], 0>,
+                                InstrStage<1, [FU_Pipe1], 0>,
+                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                                InstrStage<1, [FU_LdSt0]>], [4, 3, 1, 1]>,
+  //
+  // Load multiple
+  InstrItinData<IIC_iLoadm   , [InstrStage<2, [FU_Issue], 0>,
+                                InstrStage<2, [FU_Pipe0], 0>,
+                                InstrStage<2, [FU_Pipe1], 0>,
+                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                                InstrStage<1, [FU_LdSt0]>]>,
+
+  // Integer store pipeline
+  //
   // use FU_Issue to enforce the 1 load/store per cycle limit
-  InstrItinData<IIC_iStore  , [InstrStage<1, [FU_Issue], 0>, 
-                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>]>,
+  //
+  // Immediate offset
+  InstrItinData<IIC_iStorei  , [InstrStage<1, [FU_Issue], 0>,
+                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [3, 1]>,
+  //
+  // Register offset
+  InstrItinData<IIC_iStorer  , [InstrStage<1, [FU_Issue], 0>,
+                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [3, 1, 1]>,
+  //
+  // Scaled register offset, issues over 2 cycles
+  InstrItinData<IIC_iStoresi , [InstrStage<2, [FU_Issue], 0>,
+                                InstrStage<1, [FU_Pipe0], 0>,
+                                InstrStage<1, [FU_Pipe1], 0>,
+                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                                InstrStage<1, [FU_LdSt0]>], [3, 1, 1]>,
+  //
+  // Immediate offset with update
+  InstrItinData<IIC_iStoreiu , [InstrStage<1, [FU_Issue], 0>,
+                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                                InstrStage<1, [FU_LdSt0]>], [2, 3, 1]>,
+  //
+  // Register offset with update
+  InstrItinData<IIC_iStoreru  , [InstrStage<1, [FU_Issue], 0>,
+                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                                InstrStage<1, [FU_LdSt0]>], [2, 3, 1, 1]>,
+  //
+  // Scaled register offset with update, issues over 2 cycles
+  InstrItinData<IIC_iStoresiu, [InstrStage<2, [FU_Issue], 0>,
+                                InstrStage<1, [FU_Pipe0], 0>,
+                                InstrStage<1, [FU_Pipe1], 0>,
+                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                                InstrStage<1, [FU_LdSt0]>], [3, 3, 1, 1]>,
+  //
+  // Store multiple
+  InstrItinData<IIC_iStorem  , [InstrStage<2, [FU_Issue], 0>,
+                                InstrStage<2, [FU_Pipe0], 0>,
+                                InstrStage<2, [FU_Pipe1], 0>,
+                                InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                                InstrStage<1, [FU_LdSt0]>]>,
+  
+  // Branch
+  //
   // no delay slots, so the latency of a branch is unimportant
   InstrItinData<IIC_Br      , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>]>,
 
@@ -51,15 +180,57 @@ def CortexA8Itineraries : ProcessorItineraries<[
 
 // FIXME
 def CortexA9Itineraries : ProcessorItineraries<[
-  InstrItinData<IIC_iALU    , [InstrStage<1, [FU_Pipe0]>]>,
-  InstrItinData<IIC_iMPYh   , [InstrStage<1, [FU_Pipe0]>]>,
-  InstrItinData<IIC_iMPYw   , [InstrStage<1, [FU_Pipe0]>]>,
-  InstrItinData<IIC_iMPYl   , [InstrStage<1, [FU_Pipe0]>]>,
-  InstrItinData<IIC_iLoad   , [InstrStage<1, [FU_Pipe0]>, InstrStage<1, [FU_LdSt0]>]>,
-  InstrItinData<IIC_iStore  , [InstrStage<1, [FU_Pipe0]>]>,
+  InstrItinData<IIC_iALUx   , [InstrStage<1, [FU_Pipe0]>]>,
+  InstrItinData<IIC_iALUi   , [InstrStage<1, [FU_Pipe0]>]>,
+  InstrItinData<IIC_iALUr   , [InstrStage<1, [FU_Pipe0]>]>,
+  InstrItinData<IIC_iALUsi  , [InstrStage<1, [FU_Pipe0]>]>,
+  InstrItinData<IIC_iALUsr  , [InstrStage<1, [FU_Pipe0]>]>,
+  InstrItinData<IIC_iUNAr   , [InstrStage<1, [FU_Pipe0]>]>,
+  InstrItinData<IIC_iUNAsi  , [InstrStage<1, [FU_Pipe0]>]>,
+  InstrItinData<IIC_iUNAsr  , [InstrStage<1, [FU_Pipe0]>]>,
+  InstrItinData<IIC_iCMPi   , [InstrStage<1, [FU_Pipe0]>]>,
+  InstrItinData<IIC_iCMPr   , [InstrStage<1, [FU_Pipe0]>]>,
+  InstrItinData<IIC_iCMPsi  , [InstrStage<1, [FU_Pipe0]>]>,
+  InstrItinData<IIC_iCMPsr  , [InstrStage<1, [FU_Pipe0]>]>,
+  InstrItinData<IIC_iMOVi   , [InstrStage<1, [FU_Pipe0]>]>,
+  InstrItinData<IIC_iMOVr   , [InstrStage<1, [FU_Pipe0]>]>,
+  InstrItinData<IIC_iMOVsi  , [InstrStage<1, [FU_Pipe0]>]>,
+  InstrItinData<IIC_iMOVsr  , [InstrStage<1, [FU_Pipe0]>]>,
+  InstrItinData<IIC_iCMOVi  , [InstrStage<1, [FU_Pipe0]>]>,
+  InstrItinData<IIC_iCMOVr  , [InstrStage<1, [FU_Pipe0]>]>,
+  InstrItinData<IIC_iCMOVsi , [InstrStage<1, [FU_Pipe0]>]>,
+  InstrItinData<IIC_iCMOVsr , [InstrStage<1, [FU_Pipe0]>]>,
+  InstrItinData<IIC_iMUL16  , [InstrStage<1, [FU_Pipe0]>]>,
+  InstrItinData<IIC_iMAC16  , [InstrStage<1, [FU_Pipe0]>]>,
+  InstrItinData<IIC_iMUL32  , [InstrStage<1, [FU_Pipe0]>]>,
+  InstrItinData<IIC_iMAC32  , [InstrStage<1, [FU_Pipe0]>]>,
+  InstrItinData<IIC_iMUL64  , [InstrStage<1, [FU_Pipe0]>]>,
+  InstrItinData<IIC_iMAC64  , [InstrStage<1, [FU_Pipe0]>]>,
+  InstrItinData<IIC_iLoadi  , [InstrStage<1, [FU_Pipe0]>,
+                               InstrStage<1, [FU_LdSt0]>]>,
+  InstrItinData<IIC_iLoadr  , [InstrStage<1, [FU_Pipe0]>,
+                               InstrStage<1, [FU_LdSt0]>]>,
+  InstrItinData<IIC_iLoadsi , [InstrStage<1, [FU_Pipe0]>,
+                               InstrStage<1, [FU_LdSt0]>]>,
+  InstrItinData<IIC_iLoadiu , [InstrStage<1, [FU_Pipe0]>,
+                               InstrStage<1, [FU_LdSt0]>]>,
+  InstrItinData<IIC_iLoadru , [InstrStage<1, [FU_Pipe0]>,
+                               InstrStage<1, [FU_LdSt0]>]>,
+  InstrItinData<IIC_iLoadsiu, [InstrStage<1, [FU_Pipe0]>,
+                               InstrStage<1, [FU_LdSt0]>]>,
+  InstrItinData<IIC_iLoadm  , [InstrStage<2, [FU_Pipe0]>,
+                               InstrStage<2, [FU_LdSt0]>]>,
+  InstrItinData<IIC_iStorei  , [InstrStage<1, [FU_Pipe0]>]>,
+  InstrItinData<IIC_iStorer  , [InstrStage<1, [FU_Pipe0]>]>,
+  InstrItinData<IIC_iStoresi , [InstrStage<1, [FU_Pipe0]>]>,
+  InstrItinData<IIC_iStoreiu , [InstrStage<1, [FU_Pipe0]>]>,
+  InstrItinData<IIC_iStoreru , [InstrStage<1, [FU_Pipe0]>]>,
+  InstrItinData<IIC_iStoresiu, [InstrStage<1, [FU_Pipe0]>]>,
+  InstrItinData<IIC_iStorem  , [InstrStage<2, [FU_Pipe0]>]>,
   InstrItinData<IIC_Br      , [InstrStage<1, [FU_Pipe0]>]>,
   InstrItinData<IIC_fpALU   , [InstrStage<1, [FU_Pipe0]>]>,
   InstrItinData<IIC_fpMPY   , [InstrStage<1, [FU_Pipe0]>]>,
-  InstrItinData<IIC_fpLoad  , [InstrStage<1, [FU_Pipe0]>, InstrStage<1, [FU_LdSt0]>]>,
+  InstrItinData<IIC_fpLoad  , [InstrStage<1, [FU_Pipe0]>,
+                               InstrStage<1, [FU_LdSt0]>]>,
   InstrItinData<IIC_fpStore , [InstrStage<1, [FU_Pipe0]>]>
 ]>;