Use the "isCompare" machine instruction attribute instead of calling the
[oota-llvm.git] / lib / Target / ARM / ARMScheduleA9.td
index 1e9a23494f15b92c3c23524d93dbd6b6da1f0927..df2f896a8d4b1cff09db8931022be366d87b25f0 100644 (file)
@@ -1,10 +1,10 @@
 //=- ARMScheduleA9.td - ARM Cortex-A9 Scheduling Definitions -*- tablegen -*-=//
-// 
+//
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
-// 
+//
 //===----------------------------------------------------------------------===//
 //
 // This file defines the itinerary class data for the ARM Cortex A9 processors.
@@ -16,7 +16,6 @@
 // Reference Manual".
 //
 // Functional units
-def A9_Issue   : FuncUnit; // issue
 def A9_Pipe0   : FuncUnit; // pipeline 0
 def A9_Pipe1   : FuncUnit; // pipeline 1
 def A9_LSPipe  : FuncUnit; // LS pipe
@@ -27,41 +26,41 @@ def A9_DRegsN  : FuncUnit; // FP register set, NEON side
 // Dual issue pipeline represented by A9_Pipe0 | A9_Pipe1
 //
 def CortexA9Itineraries : ProcessorItineraries<
-  [A9_NPipe, A9_DRegsN, A9_DRegsVFP, A9_LSPipe, A9_Pipe0, A9_Pipe1, A9_Issue], [
+  [A9_NPipe, A9_DRegsN, A9_DRegsVFP, A9_LSPipe, A9_Pipe0, A9_Pipe1], [
   // Two fully-pipelined integer ALU pipelines
   // FIXME: There are no operand latencies for these instructions at all!
   //
   // Move instructions, unconditional
-  InstrItinData<IIC_iMOVi    , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [1]>,
-  InstrItinData<IIC_iMOVr    , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [1, 1]>,
-  InstrItinData<IIC_iMOVsi   , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [1, 1]>,
-  InstrItinData<IIC_iMOVsr   , [InstrStage<2, [A9_Pipe0, A9_Pipe1]>], [2, 2, 1]>,
+  InstrItinData<IIC_iMOVi   , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [1]>,
+  InstrItinData<IIC_iMOVr   , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [1, 1]>,
+  InstrItinData<IIC_iMOVsi  , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [1, 1]>,
+  InstrItinData<IIC_iMOVsr  , [InstrStage<2, [A9_Pipe0, A9_Pipe1]>], [2, 2, 1]>,
   //
   // No operand cycles
   InstrItinData<IIC_iALUx    , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>]>,
   //
   // Binary Instructions that produce a result
-  InstrItinData<IIC_iALUi    , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 2]>,
-  InstrItinData<IIC_iALUr    , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 2, 2]>,
-  InstrItinData<IIC_iALUsi   , [InstrStage<2, [A9_Pipe0, A9_Pipe1]>], [2, 2, 1]>,
-  InstrItinData<IIC_iALUsr   , [InstrStage<3, [A9_Pipe0, A9_Pipe1]>], [2, 2, 1, 1]>,
+  InstrItinData<IIC_iALUi , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 2]>,
+  InstrItinData<IIC_iALUr , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 2, 2]>,
+  InstrItinData<IIC_iALUsi, [InstrStage<2, [A9_Pipe0, A9_Pipe1]>], [2, 2, 1]>,
+  InstrItinData<IIC_iALUsr,[InstrStage<3, [A9_Pipe0, A9_Pipe1]>], [2, 2, 1, 1]>,
   //
   // Unary Instructions that produce a result
-  InstrItinData<IIC_iUNAr    , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 2]>,
-  InstrItinData<IIC_iUNAsi   , [InstrStage<2, [A9_Pipe0, A9_Pipe1]>], [2, 1]>,
-  InstrItinData<IIC_iUNAsr   , [InstrStage<3, [A9_Pipe0, A9_Pipe1]>], [2, 1, 1]>,
+  InstrItinData<IIC_iUNAr   , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 2]>,
+  InstrItinData<IIC_iUNAsi  , [InstrStage<2, [A9_Pipe0, A9_Pipe1]>], [2, 1]>,
+  InstrItinData<IIC_iUNAsr  , [InstrStage<3, [A9_Pipe0, A9_Pipe1]>], [2, 1, 1]>,
   //
   // Compare instructions
-  InstrItinData<IIC_iCMPi    , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2]>,
-  InstrItinData<IIC_iCMPr    , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 2]>,
-  InstrItinData<IIC_iCMPsi   , [InstrStage<2, [A9_Pipe0, A9_Pipe1]>], [2, 1]>,
-  InstrItinData<IIC_iCMPsr   , [InstrStage<3, [A9_Pipe0, A9_Pipe1]>], [2, 1, 1]>,
+  InstrItinData<IIC_iCMPi   , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2]>,
+  InstrItinData<IIC_iCMPr   , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 2]>,
+  InstrItinData<IIC_iCMPsi  , [InstrStage<2, [A9_Pipe0, A9_Pipe1]>], [2, 1]>,
+  InstrItinData<IIC_iCMPsr  , [InstrStage<3, [A9_Pipe0, A9_Pipe1]>], [2, 1, 1]>,
   //
   // Move instructions, conditional
-  InstrItinData<IIC_iCMOVi   , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2]>,
-  InstrItinData<IIC_iCMOVr   , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 1]>,
-  InstrItinData<IIC_iCMOVsi  , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 1]>,
-  InstrItinData<IIC_iCMOVsr  , [InstrStage<2, [A9_Pipe0, A9_Pipe1]>], [2, 1, 1]>,
+  InstrItinData<IIC_iCMOVi  , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2]>,
+  InstrItinData<IIC_iCMOVr  , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 1]>,
+  InstrItinData<IIC_iCMOVsi , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 1]>,
+  InstrItinData<IIC_iCMOVsr , [InstrStage<2, [A9_Pipe0, A9_Pipe1]>], [2, 1, 1]>,
 
   // Integer multiply pipeline
   //
@@ -77,7 +76,66 @@ def CortexA9Itineraries : ProcessorItineraries<
                                 InstrStage<3, [A9_Pipe0]>], [4, 5, 1, 1]>,
   InstrItinData<IIC_iMAC64   , [InstrStage<2, [A9_Pipe1], 0>,
                                 InstrStage<3, [A9_Pipe0]>], [4, 5, 1, 1]>,
+  // Integer load pipeline
+  // FIXME: The timings are some rough approximations
+  //
+  // Immediate offset
+  InstrItinData<IIC_iLoadi   , [InstrStage<1, [A9_Pipe1]>,
+                                InstrStage<1, [A9_LSPipe]>], [3, 1]>,
+  //
+  // Register offset
+  InstrItinData<IIC_iLoadr   , [InstrStage<1, [A9_Pipe1]>,
+                                InstrStage<1, [A9_LSPipe]>], [3, 1, 1]>,
+  //
+  // Scaled register offset
+  InstrItinData<IIC_iLoadsi  , [InstrStage<1, [A9_Pipe1]>,
+                                InstrStage<2, [A9_LSPipe]>], [4, 1, 1]>,
+  //
+  // Immediate offset with update
+  InstrItinData<IIC_iLoadiu  , [InstrStage<1, [A9_Pipe1]>,
+                                InstrStage<2, [A9_LSPipe]>], [3, 2, 1]>,
+  //
+  // Register offset with update
+  InstrItinData<IIC_iLoadru  , [InstrStage<1, [A9_Pipe1]>,
+                                InstrStage<2, [A9_LSPipe]>], [3, 2, 1, 1]>,
+  //
+  // Scaled register offset with update
+  InstrItinData<IIC_iLoadsiu , [InstrStage<1, [A9_Pipe1]>,
+                                InstrStage<2, [A9_LSPipe]>], [4, 3, 1, 1]>,
+  //
+  // Load multiple
+  InstrItinData<IIC_iLoadm   , [InstrStage<1, [A9_Pipe1]>,
+                                InstrStage<1, [A9_LSPipe]>]>,
 
+  // Integer store pipeline
+  ///
+  // Immediate offset
+  InstrItinData<IIC_iStorei  , [InstrStage<1, [A9_Pipe1]>,
+                                InstrStage<1, [A9_LSPipe]>], [3, 1]>,
+  //
+  // Register offset
+  InstrItinData<IIC_iStorer  , [InstrStage<1, [ A9_Pipe1]>,
+                                InstrStage<1, [A9_LSPipe]>], [3, 1, 1]>,
+  //
+  // Scaled register offset
+  InstrItinData<IIC_iStoresi , [InstrStage<1, [A9_Pipe1]>,
+                                InstrStage<2, [A9_LSPipe]>], [3, 1, 1]>,
+  //
+  // Immediate offset with update
+  InstrItinData<IIC_iStoreiu , [InstrStage<1, [A9_Pipe1]>,
+                                InstrStage<1, [A9_LSPipe]>], [2, 3, 1]>,
+  //
+  // Register offset with update
+  InstrItinData<IIC_iStoreru , [InstrStage<1, [A9_Pipe1]>,
+                                InstrStage<1, [A9_LSPipe]>], [2, 3, 1, 1]>,
+  //
+  // Scaled register offset with update
+  InstrItinData<IIC_iStoresiu, [InstrStage<1, [A9_Pipe1]>,
+                                InstrStage<2, [A9_LSPipe]>], [3, 3, 1, 1]>,
+  //
+  // Store multiple
+  InstrItinData<IIC_iStorem  , [InstrStage<1, [A9_Pipe1]>,
+                                InstrStage<1, [A9_LSPipe]>]>,
   // Branch
   //
   // no delay slots, so the latency of a branch is unimportant
@@ -94,8 +152,8 @@ def CortexA9Itineraries : ProcessorItineraries<
   //    register file writeback!).
   // Every NEON instruction does the same but with FUs swapped.
   //
-  // Since the reserved FU cannot be acquired this models precisly "cross-domain"
-  // stalls.
+  // Since the reserved FU cannot be acquired, this models precisely
+  // "cross-domain" stalls.
 
   // VFP
   // Issue through integer pipeline, and execute in NEON unit.
@@ -273,69 +331,56 @@ def CortexA9Itineraries : ProcessorItineraries<
                                InstrStage<1, [A9_NPipe]>], [1, 1, 1]>,
   //
   // Single-precision FP Load
-  // use A9_Issue to enforce the 1 load/store per cycle limit
   InstrItinData<IIC_fpLoad32, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
-                               InstrStage<1, [A9_Issue], 0>, 
-                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
-                               InstrStage<1, [A9_LSPipe], 0>,
+                               InstrStage<1, [A9_Pipe1], 0>,
+                               InstrStage<1, [A9_LSPipe]>,
                                InstrStage<1, [A9_NPipe]>]>,
   //
   // Double-precision FP Load
-  // use A9_Issue to enforce the 1 load/store per cycle limit
   InstrItinData<IIC_fpLoad64, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
-                               InstrStage<1, [A9_Issue], 0>, 
-                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
-                               InstrStage<1, [A9_LSPipe], 0>,
+                               InstrStage<1, [A9_Pipe1], 0>,
+                               InstrStage<1, [A9_LSPipe]>,
                                InstrStage<1, [A9_NPipe]>]>,
   //
   // FP Load Multiple
-  // use A9_Issue to enforce the 1 load/store per cycle limit
   InstrItinData<IIC_fpLoadm,  [InstrStage<1, [A9_DRegsVFP], 0, Required>,
                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
-                               InstrStage<1, [A9_Issue], 0>, 
-                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
-                               InstrStage<1, [A9_LSPipe], 0>,
+                               InstrStage<1, [A9_Pipe1], 0>,
+                               InstrStage<1, [A9_LSPipe]>,
                                InstrStage<1, [A9_NPipe]>]>,
   //
   // Single-precision FP Store
-  // use A9_Issue to enforce the 1 load/store per cycle limit
   InstrItinData<IIC_fpStore32,[InstrStage<1, [A9_DRegsVFP], 0, Required>,
                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
-                               InstrStage<1, [A9_Issue], 0>, 
-                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
-                               InstrStage<1, [A9_LSPipe], 0>,
+                               InstrStage<1, [A9_Pipe1], 0>,
+                               InstrStage<1, [A9_LSPipe]>,
                                InstrStage<1, [A9_NPipe]>]>,
   //
   // Double-precision FP Store
-  // use A9_Issue to enforce the 1 load/store per cycle limit
   InstrItinData<IIC_fpStore64,[InstrStage<1, [A9_DRegsVFP], 0, Required>,
                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
-                               InstrStage<1, [A9_Issue], 0>, 
-                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
-                               InstrStage<1, [A9_LSPipe], 0>,
+                               InstrStage<1, [A9_Pipe1], 0>,
+                               InstrStage<1, [A9_LSPipe]>,
                                InstrStage<1, [A9_NPipe]>]>,
   //
   // FP Store Multiple
-  // use A9_Issue to enforce the 1 load/store per cycle limit
   InstrItinData<IIC_fpStorem, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
-                               InstrStage<1, [A9_Issue], 0>, 
-                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
-                               InstrStage<1, [A9_LSPipe], 0>,
+                               InstrStage<1, [A9_Pipe1], 0>,
+                               InstrStage<1, [A9_LSPipe]>,
                                InstrStage<1, [A9_NPipe]>]>,
   // NEON
   // Issue through integer pipeline, and execute in NEON unit.
-  // FIXME: Neon pipeline and LdSt unit are multiplexed. 
+  // FIXME: Neon pipeline and LdSt unit are multiplexed.
   //        Add some syntactic sugar to model this!
   // VLD1
   // FIXME: We don't model this instruction properly
   InstrItinData<IIC_VLD1,     [InstrStage<1, [A9_DRegsN],   0, Required>,
                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [A9_Issue], 0>, 
-                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
-                               InstrStage<1, [A9_LSPipe], 0>,
+                               InstrStage<1, [A9_Pipe1], 0>,
+                               InstrStage<1, [A9_LSPipe]>,
                                InstrStage<1, [A9_NPipe]>]>,
   //
   // VLD2
@@ -343,9 +388,8 @@ def CortexA9Itineraries : ProcessorItineraries<
   InstrItinData<IIC_VLD2,     [InstrStage<1, [A9_DRegsN],   0, Required>,
                                // Extra latency cycles since wbck is 6 cycles
                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [A9_Issue], 0>, 
-                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
-                               InstrStage<1, [A9_LSPipe], 0>,
+                               InstrStage<1, [A9_Pipe1], 0>,
+                               InstrStage<1, [A9_LSPipe]>,
                                InstrStage<1, [A9_NPipe]>], [2, 2, 1]>,
   //
   // VLD3
@@ -353,9 +397,8 @@ def CortexA9Itineraries : ProcessorItineraries<
   InstrItinData<IIC_VLD3,     [InstrStage<1, [A9_DRegsN],   0, Required>,
                                // Extra latency cycles since wbck is 6 cycles
                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [A9_Issue], 0>, 
-                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
-                               InstrStage<1, [A9_LSPipe], 0>,
+                               InstrStage<1, [A9_Pipe1], 0>,
+                               InstrStage<1, [A9_LSPipe]>,
                                InstrStage<1, [A9_NPipe]>], [2, 2, 2, 1]>,
   //
   // VLD4
@@ -363,9 +406,8 @@ def CortexA9Itineraries : ProcessorItineraries<
   InstrItinData<IIC_VLD4,     [InstrStage<1, [A9_DRegsN],   0, Required>,
                                // Extra latency cycles since wbck is 6 cycles
                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [A9_Issue], 0>, 
-                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
-                               InstrStage<1, [A9_LSPipe], 0>,
+                               InstrStage<1, [A9_Pipe1], 0>,
+                               InstrStage<1, [A9_LSPipe]>,
                                InstrStage<1, [A9_NPipe]>], [2, 2, 2, 2, 1]>,
   //
   // VST
@@ -373,9 +415,8 @@ def CortexA9Itineraries : ProcessorItineraries<
   InstrItinData<IIC_VST,      [InstrStage<1, [A9_DRegsN],   0, Required>,
                                // Extra latency cycles since wbck is 6 cycles
                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
-                               InstrStage<1, [A9_Issue], 0>, 
-                               InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
-                               InstrStage<1, [A9_LSPipe], 0>,
+                               InstrStage<1, [A9_Pipe1], 0>,
+                               InstrStage<1, [A9_LSPipe]>,
                                InstrStage<1, [A9_NPipe]>]>,
   //
   // Double-register Integer Unary
@@ -800,5 +841,5 @@ def CortexA9Itineraries : ProcessorItineraries<
                                // Extra latency cycles since wbck is 8 cycles
                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
                                InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<2, [A9_NPipe]>], [4, 1, 2, 2, 3, 3, 1]>
+                              InstrStage<2, [A9_NPipe]>], [4, 1, 2, 2, 3, 3, 1]>
 ]>;