Revert "Make NumMicroOps a variable in the subtarget's instruction itinerary."
authorAndrew Trick <atrick@apple.com>
Fri, 29 Jun 2012 07:10:41 +0000 (07:10 +0000)
committerAndrew Trick <atrick@apple.com>
Fri, 29 Jun 2012 07:10:41 +0000 (07:10 +0000)
This reverts commit r159406. I noticed a performance regression so I'll back out for now.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@159411 91177308-0d34-0410-b5e6-96231b3b80d8

include/llvm/MC/MCInstrItineraries.h
include/llvm/Target/TargetInstrInfo.h
include/llvm/Target/TargetSchedule.td
lib/CodeGen/TargetInstrInfoImpl.cpp
lib/Target/ARM/ARMBaseInstrInfo.cpp
lib/Target/ARM/ARMSchedule.td
lib/Target/ARM/ARMScheduleA8.td
lib/Target/ARM/ARMScheduleA9.td
utils/TableGen/SubtargetEmitter.cpp

index d8587068ae5019d4bdbd213ea4774b2e2e133902..1003fb586cef9f8bfacb6116c4ae925c2c9d0aa1 100644 (file)
@@ -95,7 +95,7 @@ struct InstrStage {
 /// operands are read and written.
 ///
 struct InstrItinerary {
-  int      NumMicroOps;        ///< # of micro-ops, -1 means it's variable
+  unsigned NumMicroOps;        ///< # of micro-ops, 0 means it's variable
   unsigned FirstStage;         ///< Index of first stage in itinerary
   unsigned LastStage;          ///< Index of last + 1 stage in itinerary
   unsigned FirstOperandCycle;  ///< Index of first operand rd/wr
@@ -323,6 +323,7 @@ public:
   }
 };
 
+
 } // End llvm namespace
 
 #endif
index 87f1fe5b0b26dbd6bf09061074adcb1e38e9e48d..4e731398813e5cb396765116e22e06171da8662a 100644 (file)
@@ -648,9 +648,7 @@ public:
   }
 
   /// getNumMicroOps - Return the number of u-operations the given machine
-  /// instruction will be decoded to on the target cpu. The itinerary's
-  /// IssueWidth is the number of microops that can be dispatched each
-  /// cycle. An instruction with zero microops takes no dispatch resources.
+  /// instruction will be decoded to on the target cpu.
   virtual unsigned getNumMicroOps(const InstrItineraryData *ItinData,
                                   const MachineInstr *MI) const = 0;
 
index caa5a84c83df9f7ed454bbfea50469d8e6197b95..e22e67cdac298f1c70e11a1393dd6b5a47b9ceff 100644 (file)
@@ -73,20 +73,20 @@ class InstrStage<int cycles, list<FuncUnit> units,
 // across all chip sets.  Thus a new chip set can be added without modifying
 // instruction information.
 //
-class InstrItinClass;
+// NumMicroOps represents the number of micro-operations that each instruction
+// in the class are decoded to. If the number is zero, then it means the
+// instruction can decode into variable number of micro-ops and it must be
+// determined dynamically.
+//
+class InstrItinClass<int ops = 1> {
+  int NumMicroOps = ops;
+}
 def NoItinerary : InstrItinClass;
 
 //===----------------------------------------------------------------------===//
 // Instruction itinerary data - These values provide a runtime map of an
 // instruction itinerary class (name) to its itinerary data.
 //
-// NumMicroOps represents the number of micro-operations that each instruction
-// in the class are decoded to. If the number is zero, then it means the
-// instruction can decode into variable number of micro-ops and it must be
-// determined dynamically. This directly relates to the itineraries
-// global IssueWidth property, which constrains the number of microops
-// that can issue per cycle.
-//
 // OperandCycles are optional "cycle counts". They specify the cycle after
 // instruction issue the values which correspond to specific operand indices
 // are defined or read. Bypasses are optional "pipeline forwarding pathes", if
@@ -106,9 +106,8 @@ def NoItinerary : InstrItinClass;
 // is reduced by 1.
 class InstrItinData<InstrItinClass Class, list<InstrStage> stages,
                     list<int> operandcycles = [],
-                    list<Bypass> bypasses = [], int uops = 1> {
+                    list<Bypass> bypasses = []> {
   InstrItinClass TheClass = Class;
-  int NumMicroOps = uops;
   list<InstrStage> Stages = stages;
   list<int> OperandCycles = operandcycles;
   list<Bypass> Bypasses = bypasses;
index 54be88a8bb06cedf31a5b8b191583c4dfb0d055f..7af08f591f13621439716647d8fa68c458b94a08 100644 (file)
@@ -560,8 +560,8 @@ TargetInstrInfoImpl::getNumMicroOps(const InstrItineraryData *ItinData,
     return 1;
 
   unsigned Class = MI->getDesc().getSchedClass();
-  int UOps = ItinData->Itineraries[Class].NumMicroOps;
-  if (UOps >= 0)
+  unsigned UOps = ItinData->Itineraries[Class].NumMicroOps;
+  if (UOps)
     return UOps;
 
   // The # of u-ops is dynamically determined. The specific target should
index b3fef2909316aaa0bf9e9029e037ad59edd06dce..3a180dfa2793041a2a36e0c5a84f5fac314a58f7 100644 (file)
@@ -2176,9 +2176,9 @@ ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData,
 
   const MCInstrDesc &Desc = MI->getDesc();
   unsigned Class = Desc.getSchedClass();
-  int ItinUOps = ItinData->Itineraries[Class].NumMicroOps;
-  if (ItinUOps >= 0)
-    return ItinUOps;
+  unsigned UOps = ItinData->Itineraries[Class].NumMicroOps;
+  if (UOps)
+    return UOps;
 
   unsigned Opc = MI->getOpcode();
   switch (Opc) {
@@ -2252,19 +2252,19 @@ ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData,
         return 2;
       // 4 registers would be issued: 2, 2.
       // 5 registers would be issued: 2, 2, 1.
-      int A8UOps = (NumRegs / 2);
+      UOps = (NumRegs / 2);
       if (NumRegs % 2)
-        ++A8UOps;
-      return A8UOps;
+        ++UOps;
+      return UOps;
     } else if (Subtarget.isCortexA9()) {
-      int A9UOps = (NumRegs / 2);
+      UOps = (NumRegs / 2);
       // If there are odd number of registers or if it's not 64-bit aligned,
       // then it takes an extra AGU (Address Generation Unit) cycle.
       if ((NumRegs % 2) ||
           !MI->hasOneMemOperand() ||
           (*MI->memoperands_begin())->getAlignment() < 8)
-        ++A9UOps;
-      return A9UOps;
+        ++UOps;
+      return UOps;
     } else {
       // Assume the worst.
       return NumRegs;
index 81d2fa37c2d1e991bcf7b07c41f9fe195be00810..b9a07f1ee68ee8a08682f70114705f3e48c338fd 100644 (file)
@@ -70,11 +70,11 @@ def IIC_iLoad_bh_siu : InstrItinClass;
 def IIC_iLoad_d_i  : InstrItinClass;
 def IIC_iLoad_d_r  : InstrItinClass;
 def IIC_iLoad_d_ru : InstrItinClass;
-def IIC_iLoad_m    : InstrItinClass;
-def IIC_iLoad_mu   : InstrItinClass;
-def IIC_iLoad_mBr  : InstrItinClass;
-def IIC_iPop       : InstrItinClass;
-def IIC_iPop_Br    : InstrItinClass;
+def IIC_iLoad_m    : InstrItinClass<0>;  // micro-coded
+def IIC_iLoad_mu   : InstrItinClass<0>;  // micro-coded
+def IIC_iLoad_mBr  : InstrItinClass<0>;  // micro-coded
+def IIC_iPop       : InstrItinClass<0>;  // micro-coded
+def IIC_iPop_Br    : InstrItinClass<0>;  // micro-coded
 def IIC_iLoadiALU  : InstrItinClass;
 def IIC_iStore_i   : InstrItinClass;
 def IIC_iStore_r   : InstrItinClass;
@@ -91,8 +91,8 @@ def IIC_iStore_bh_siu : InstrItinClass;
 def IIC_iStore_d_i   : InstrItinClass;
 def IIC_iStore_d_r   : InstrItinClass;
 def IIC_iStore_d_ru  : InstrItinClass;
-def IIC_iStore_m   : InstrItinClass;
-def IIC_iStore_mu  : InstrItinClass;
+def IIC_iStore_m   : InstrItinClass<0>;  // micro-coded
+def IIC_iStore_mu  : InstrItinClass<0>;  // micro-coded
 def IIC_Preload    : InstrItinClass;
 def IIC_Br         : InstrItinClass;
 def IIC_fpSTAT     : InstrItinClass;
@@ -126,12 +126,12 @@ def IIC_fpSQRT32   : InstrItinClass;
 def IIC_fpSQRT64   : InstrItinClass;
 def IIC_fpLoad32   : InstrItinClass;
 def IIC_fpLoad64   : InstrItinClass;
-def IIC_fpLoad_m   : InstrItinClass;
-def IIC_fpLoad_mu  : InstrItinClass;
+def IIC_fpLoad_m   : InstrItinClass<0>;  // micro-coded
+def IIC_fpLoad_mu  : InstrItinClass<0>;  // micro-coded
 def IIC_fpStore32  : InstrItinClass;
 def IIC_fpStore64  : InstrItinClass;
-def IIC_fpStore_m  : InstrItinClass;
-def IIC_fpStore_mu : InstrItinClass;
+def IIC_fpStore_m  : InstrItinClass<0>;  // micro-coded
+def IIC_fpStore_mu : InstrItinClass<0>;  // micro-coded
 def IIC_VLD1       : InstrItinClass;
 def IIC_VLD1x2     : InstrItinClass;
 def IIC_VLD1x3     : InstrItinClass;
index 61de00a2086af3227e1dede9f279910713a57dde..eb1083ca23f3a2329242e7747e8d06ae43674775 100644 (file)
@@ -155,30 +155,28 @@ def CortexA8Itineraries : MultiIssueItineraries<
   // Load multiple, def is the 5th operand. Pipeline 0 only.
   // FIXME: A8_LSPipe cycle time is dynamic, this assumes 3 to 4 registers.
   InstrItinData<IIC_iLoad_m  , [InstrStage<2, [A8_Pipe0], 0>,
-                                InstrStage<2, [A8_LSPipe]>],
-                [1, 1, 1, 1, 3], [], -1>, // dynamic uops
+                                InstrStage<2, [A8_LSPipe]>], [1, 1, 1, 1, 3]>,
   //
   // Load multiple + update, defs are the 1st and 5th operands.
   InstrItinData<IIC_iLoad_mu , [InstrStage<3, [A8_Pipe0], 0>,
-                                InstrStage<3, [A8_LSPipe]>],
-                [2, 1, 1, 1, 3], [], -1>, // dynamic uops
+                                InstrStage<3, [A8_LSPipe]>], [2, 1, 1, 1, 3]>,
   //
   // Load multiple plus branch
   InstrItinData<IIC_iLoad_mBr, [InstrStage<3, [A8_Pipe0], 0>,
                                 InstrStage<3, [A8_LSPipe]>,
                                 InstrStage<1, [A8_Pipe0, A8_Pipe1]>],
-                              [1, 2, 1, 1, 3], [], -1>, // dynamic uops
+                               [1, 2, 1, 1, 3]>,
   //
   // Pop, def is the 3rd operand.
   InstrItinData<IIC_iPop  ,    [InstrStage<3, [A8_Pipe0], 0>,
-                                InstrStage<3, [A8_LSPipe]>],
-                [1, 1, 3], [], -1>, // dynamic uops
+                                InstrStage<3, [A8_LSPipe]>], [1, 1, 3]>,
   //
   // Push, def is the 3th operand.
   InstrItinData<IIC_iPop_Br,   [InstrStage<3, [A8_Pipe0], 0>,
                                 InstrStage<3, [A8_LSPipe]>,
                                 InstrStage<1, [A8_Pipe0, A8_Pipe1]>],
-                               [1, 1, 3], [], -1>, // dynamic uops
+                               [1, 1, 3]>,
+
   //
   // iLoadi + iALUr for t2LDRpci_pic.
   InstrItinData<IIC_iLoadiALU, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
@@ -233,13 +231,12 @@ def CortexA8Itineraries : MultiIssueItineraries<
   // Store multiple. Pipeline 0 only.
   // FIXME: A8_LSPipe cycle time is dynamic, this assumes 3 to 4 registers.
   InstrItinData<IIC_iStore_m , [InstrStage<2, [A8_Pipe0], 0>,
-                                InstrStage<2, [A8_LSPipe]>],
-                [], [], -1>, // dynamic uops
+                                InstrStage<2, [A8_LSPipe]>]>,
   //
   // Store multiple + update
   InstrItinData<IIC_iStore_mu, [InstrStage<2, [A8_Pipe0], 0>,
-                                InstrStage<2, [A8_LSPipe]>],
-                [2], [], -1>, // dynamic uops
+                                InstrStage<2, [A8_LSPipe]>], [2]>,
+
   //
   // Preload
   InstrItinData<IIC_Preload, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
@@ -400,16 +397,14 @@ def CortexA8Itineraries : MultiIssueItineraries<
                                InstrStage<1, [A8_NLSPipe], 0>,
                                InstrStage<1, [A8_LSPipe]>,
                                InstrStage<1, [A8_NLSPipe], 0>,
-                               InstrStage<1, [A8_LSPipe]>],
-                [1, 1, 1, 2], [], -1>, // dynamic uops
+                               InstrStage<1, [A8_LSPipe]>], [1, 1, 1, 2]>,
   //
   // FP Load Multiple + update
   InstrItinData<IIC_fpLoad_mu,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
                                InstrStage<1, [A8_NLSPipe], 0>,
                                InstrStage<1, [A8_LSPipe]>,
                                InstrStage<1, [A8_NLSPipe], 0>,
-                               InstrStage<1, [A8_LSPipe]>],
-                [2, 1, 1, 1, 2], [], -1>, // dynamic uops
+                               InstrStage<1, [A8_LSPipe]>], [2, 1, 1, 1, 2]>,
   //
   // Single-precision FP Store
   InstrItinData<IIC_fpStore32,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
@@ -428,16 +423,15 @@ def CortexA8Itineraries : MultiIssueItineraries<
                                InstrStage<1, [A8_NLSPipe], 0>,
                                InstrStage<1, [A8_LSPipe]>,
                                InstrStage<1, [A8_NLSPipe], 0>,
-                               InstrStage<1, [A8_LSPipe]>],
-                [1, 1, 1, 1], [], -1>, // dynamic uops
+                               InstrStage<1, [A8_LSPipe]>], [1, 1, 1, 1]>,
   //
   // FP Store Multiple + update
   InstrItinData<IIC_fpStore_mu,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
                                 InstrStage<1, [A8_NLSPipe], 0>,
                                 InstrStage<1, [A8_LSPipe]>,
                                 InstrStage<1, [A8_NLSPipe], 0>,
-                                InstrStage<1, [A8_LSPipe]>],
-                [2, 1, 1, 1, 1], [], -1>, // dynamic uops
+                                InstrStage<1, [A8_LSPipe]>], [2, 1, 1, 1, 1]>,
+
   // NEON
   // Issue through integer pipeline, and execute in NEON unit.
   //
index 1677ba6a98a30ee64f4f50e3099100c7e12148a4..a00577bf3d3029471bc923a60af504e5afe49760 100644 (file)
@@ -284,8 +284,7 @@ def CortexA9Itineraries : MultiIssueItineraries<
                                 InstrStage<2, [A9_AGU], 1>,
                                 InstrStage<2, [A9_LSUnit]>],
                                [1, 1, 1, 1, 3],
-                         [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass],
-                         -1>, // dynamic uops
+                         [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass]>,
   //
   // Load multiple + update, defs are the 1st and 5th operands.
   InstrItinData<IIC_iLoad_mu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
@@ -293,8 +292,7 @@ def CortexA9Itineraries : MultiIssueItineraries<
                                 InstrStage<2, [A9_AGU], 1>,
                                 InstrStage<2, [A9_LSUnit]>],
                                [2, 1, 1, 1, 3],
-                         [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass],
-                         -1>, // dynamic uops
+                         [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass]>,
   //
   // Load multiple plus branch
   InstrItinData<IIC_iLoad_mBr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
@@ -303,8 +301,7 @@ def CortexA9Itineraries : MultiIssueItineraries<
                                 InstrStage<2, [A9_LSUnit]>,
                                 InstrStage<1, [A9_Branch]>],
                                [1, 2, 1, 1, 3],
-                         [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass],
-                         -1>, // dynamic uops
+                         [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass]>,
   //
   // Pop, def is the 3rd operand.
   InstrItinData<IIC_iPop  ,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
@@ -312,8 +309,7 @@ def CortexA9Itineraries : MultiIssueItineraries<
                                 InstrStage<2, [A9_AGU], 1>,
                                 InstrStage<2, [A9_LSUnit]>],
                                [1, 1, 3],
-                               [NoBypass, NoBypass, A9_LdBypass],
-                               -1>, // dynamic uops
+                               [NoBypass, NoBypass, A9_LdBypass]>,
   //
   // Pop + branch, def is the 3rd operand.
   InstrItinData<IIC_iPop_Br,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
@@ -322,8 +318,8 @@ def CortexA9Itineraries : MultiIssueItineraries<
                                 InstrStage<2, [A9_LSUnit]>,
                                 InstrStage<1, [A9_Branch]>],
                                [1, 1, 3],
-                               [NoBypass, NoBypass, A9_LdBypass],
-                               -1>, // dynamic uops
+                               [NoBypass, NoBypass, A9_LdBypass]>,
+
   //
   // iLoadi + iALUr for t2LDRpci_pic.
   InstrItinData<IIC_iLoadiALU, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
@@ -417,15 +413,14 @@ def CortexA9Itineraries : MultiIssueItineraries<
   InstrItinData<IIC_iStore_m , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
                                 InstrStage<1, [A9_MUX0], 0>,
                                 InstrStage<1, [A9_AGU], 0>,
-                                InstrStage<2, [A9_LSUnit]>],
-                [], [], -1>, // dynamic uops
+                                InstrStage<2, [A9_LSUnit]>]>,
   //
   // Store multiple + update
   InstrItinData<IIC_iStore_mu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
                                 InstrStage<1, [A9_MUX0], 0>,
                                 InstrStage<1, [A9_AGU], 0>,
-                                InstrStage<2, [A9_LSUnit]>],
-                [2], [], -1>, // dynamic uops
+                                InstrStage<2, [A9_LSUnit]>], [2]>,
+
   //
   // Preload
   InstrItinData<IIC_Preload,   [InstrStage<1, [A9_Issue0, A9_Issue1]>], [1, 1]>,
@@ -722,8 +717,7 @@ def CortexA9Itineraries : MultiIssueItineraries<
                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
                                InstrStage<1, [A9_NPipe], 0>,
-                               InstrStage<2, [A9_LSUnit]>],
-                [1, 1, 1, 1], [], -1>, // dynamic uops
+                               InstrStage<2, [A9_LSUnit]>], [1, 1, 1, 1]>,
   //
   // FP Load Multiple + update
   // FIXME: assumes 2 doubles which requires 2 LS cycles.
@@ -732,8 +726,7 @@ def CortexA9Itineraries : MultiIssueItineraries<
                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
                                InstrStage<1, [A9_NPipe], 0>,
-                               InstrStage<2, [A9_LSUnit]>],
-                [2, 1, 1, 1], [], -1>, // dynamic uops
+                               InstrStage<2, [A9_LSUnit]>], [2, 1, 1, 1]>,
   //
   // Single-precision FP Store
   InstrItinData<IIC_fpStore32,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
@@ -760,8 +753,7 @@ def CortexA9Itineraries : MultiIssueItineraries<
                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
                                InstrStage<1, [A9_NPipe], 0>,
-                               InstrStage<2, [A9_LSUnit]>],
-                [1, 1, 1, 1], [], -1>, // dynamic uops
+                               InstrStage<2, [A9_LSUnit]>], [1, 1, 1, 1]>,
   //
   // FP Store Multiple + update
   // FIXME: assumes 2 doubles which requires 2 LS cycles.
@@ -770,8 +762,7 @@ def CortexA9Itineraries : MultiIssueItineraries<
                                 InstrStage<1, [A9_DRegsVFP], 0, Required>,
                                 InstrStage<2, [A9_DRegsN],   0, Reserved>,
                                 InstrStage<1, [A9_NPipe], 0>,
-                                InstrStage<2, [A9_LSUnit]>],
-                [2, 1, 1, 1], [], -1>, // dynamic uops
+                                InstrStage<2, [A9_LSUnit]>], [2, 1, 1, 1]>,
   // NEON
   // VLD1
   InstrItinData<IIC_VLD1,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
index adfd416fc0c08cd8747718c08afdea4c58ffdfd7..59464d294056dcbc1fd3a87d21cd1735fc6a8d86 100644 (file)
@@ -498,7 +498,7 @@ void SubtargetEmitter::EmitStageAndOperandCycleData(raw_ostream &OS,
       unsigned Find = ItinClassesMap[Name];
 
       // Set up itinerary as location and location + stage count
-      unsigned NumUOps = ItinData->getValueAsInt("NumMicroOps");
+      unsigned NumUOps = ItinClassList[Find]->getValueAsInt("NumMicroOps");
       InstrItinerary Intinerary = { NumUOps, FindStage, FindStage + NStages,
                                     FindOperandCycle,
                                     FindOperandCycle + NOperandCycles};