NEON scheduling info fix. vmov reg, reg are single cycle instructions.

author Evan Cheng <evan.cheng@apple.com>

Fri, 1 Oct 2010 20:50:58 +0000 (20:50 +0000)

committer Evan Cheng <evan.cheng@apple.com>

Fri, 1 Oct 2010 20:50:58 +0000 (20:50 +0000)
author Evan Cheng <evan.cheng@apple.com>
Fri, 1 Oct 2010 20:50:58 +0000 (20:50 +0000)
committer Evan Cheng <evan.cheng@apple.com>
Fri, 1 Oct 2010 20:50:58 +0000 (20:50 +0000)
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td

index 1a4a8847d9ef7fef32ae3fb3da447fc035e700b9..93e9e6607754b4deed28c16de2b8169b38570b04 100644 (file)
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -3177,7 +3177,7 @@ def  VSHLLi32 : N2VLShMax<1, 1, 0b111010, 0b0011, 0, 0, 0, "vshll", "i32",
                            v2i64, v2i32, NEONvshlli>;
  
  //   VSHRN    : Vector Shift Right and Narrow
-defm VSHRN    : N2VNSh_HSD<0,1,0b1000,0,0,1, IIC_VSHLiD, "vshrn", "i",
+defm VSHRN    : N2VNSh_HSD<0,1,0b1000,0,0,1, IIC_VMOVN, "vshrn", "i",
                             NEONvshrn>;
  
  //   VRSHL    : Vector Rounding Shift
@@ -3284,7 +3284,7 @@ class VNEGD<bits<2> size, string OpcodeStr, string Dt, ValueType Ty>
          [(set DPR:$dst, (Ty (vnegd DPR:$src)))]>;
  class VNEGQ<bits<2> size, string OpcodeStr, string Dt, ValueType Ty>
    : N2V<0b11, 0b11, size, 0b01, 0b00111, 1, 0, (outs QPR:$dst), (ins QPR:$src),
-        IIC_VSHLiD, OpcodeStr, Dt, "$dst, $src", "",
+        IIC_VSHLiQ, OpcodeStr, Dt, "$dst, $src", "",
          [(set QPR:$dst, (Ty (vnegq QPR:$src)))]>;
  
  //   VNEG     : Vector Negate (integer)
@@ -3349,9 +3349,9 @@ def  VSWPq    : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 1, 0,
  
  let neverHasSideEffects = 1 in {
  def  VMOVDneon: N3VX<0, 0, 0b10, 0b0001, 0, 1, (outs DPR:$dst), (ins DPR:$src),
-                     N3RegFrm, IIC_VMOVD, "vmov", "$dst, $src", "", []>;
+                     N3RegFrm, IIC_VMOV, "vmov", "$dst, $src", "", []>;
  def  VMOVQ    : N3VX<0, 0, 0b10, 0b0001, 1, 1, (outs QPR:$dst), (ins QPR:$src),
-                     N3RegFrm, IIC_VMOVD, "vmov", "$dst, $src", "", []>;
+                     N3RegFrm, IIC_VMOV, "vmov", "$dst, $src", "", []>;
  
  // Pseudo vector move instructions for QQ and QQQQ registers. This should
  // be expanded after register allocation is completed.
@@ -3577,7 +3577,7 @@ class VDUPLND<bits<4> op19_16, string OpcodeStr, string Dt,
  class VDUPLNQ<bits<4> op19_16, string OpcodeStr, string Dt,
                ValueType ResTy, ValueType OpTy>
    : NVDupLane<op19_16, 1, (outs QPR:$dst), (ins DPR:$src, nohash_imm:$lane),
-              IIC_VMOVD, OpcodeStr, Dt, "$dst, $src[$lane]",
+              IIC_VMOVQ, OpcodeStr, Dt, "$dst, $src[$lane]",
                [(set QPR:$dst, (ResTy (NEONvduplane (OpTy DPR:$src),
                                        imm:$lane)))]>;
  
@@ -3616,11 +3616,11 @@ def  VDUPfdf  : N2V<0b11, 0b11, {?,1}, {0,0}, 0b11000, 0, 0,
  
  def  VDUPfqf  : N2V<0b11, 0b11, {?,1}, {0,0}, 0b11000, 1, 0,
                      (outs QPR:$dst), (ins SPR:$src),
-                    IIC_VMOVD, "vdup", "32", "$dst, ${src:lane}", "",
+                    IIC_VMOVQ, "vdup", "32", "$dst, ${src:lane}", "",
                      [(set QPR:$dst, (v4f32 (NEONvdup (f32 SPR:$src))))]>;
  
  //   VMOVN    : Vector Narrowing Move
-defm VMOVN    : N2VN_HSD<0b11,0b11,0b10,0b00100,0,0, IIC_VMOVD,
+defm VMOVN    : N2VN_HSD<0b11,0b11,0b10,0b00100,0,0, IIC_VMOVN,
                           "vmovn", "i", trunc>;
  //   VQMOVN   : Vector Saturating Narrowing Move
  defm VQMOVNs  : N2VNInt_HSD<0b11,0b11,0b10,0b00101,0,0, IIC_VQUNAiD,
@@ -3684,7 +3684,7 @@ class VREV64D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
          [(set DPR:$dst, (Ty (NEONvrev64 (Ty DPR:$src))))]>;
  class VREV64Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
    : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 1, 0, (outs QPR:$dst),
-        (ins QPR:$src), IIC_VMOVD, 
+        (ins QPR:$src), IIC_VMOVQ, 
          OpcodeStr, Dt, "$dst, $src", "",
          [(set QPR:$dst, (Ty (NEONvrev64 (Ty QPR:$src))))]>;
  
@@ -3707,7 +3707,7 @@ class VREV32D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
          [(set DPR:$dst, (Ty (NEONvrev32 (Ty DPR:$src))))]>;
  class VREV32Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
    : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 1, 0, (outs QPR:$dst),
-        (ins QPR:$src), IIC_VMOVD, 
+        (ins QPR:$src), IIC_VMOVQ, 
          OpcodeStr, Dt, "$dst, $src", "",
          [(set QPR:$dst, (Ty (NEONvrev32 (Ty QPR:$src))))]>;
  
@@ -3726,7 +3726,7 @@ class VREV16D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
          [(set DPR:$dst, (Ty (NEONvrev16 (Ty DPR:$src))))]>;
  class VREV16Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
    : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 1, 0, (outs QPR:$dst),
-        (ins QPR:$src), IIC_VMOVD, 
+        (ins QPR:$src), IIC_VMOVQ, 
          OpcodeStr, Dt, "$dst, $src", "",
          [(set QPR:$dst, (Ty (NEONvrev16 (Ty QPR:$src))))]>;
  
diff --git a/lib/Target/ARM/ARMSchedule.td b/lib/Target/ARM/ARMSchedule.td

index 07bd0fdf287b0614bcf44505cd63620b48999164..ec7d29aac0381308a5603d66e5365941803ee3c9 100644 (file)
--- a/lib/Target/ARM/ARMSchedule.td
+++ b/lib/Target/ARM/ARMSchedule.td
@@ -129,6 +129,7 @@ def IIC_VUNAD      : InstrItinClass;
  def IIC_VUNAQ      : InstrItinClass;
  def IIC_VBIND      : InstrItinClass;
  def IIC_VBINQ      : InstrItinClass;
+def IIC_VMOV       : InstrItinClass;
  def IIC_VMOVImm    : InstrItinClass;
  def IIC_VMOVD      : InstrItinClass;
  def IIC_VMOVQ      : InstrItinClass;
@@ -137,6 +138,7 @@ def IIC_VMOVID     : InstrItinClass;
  def IIC_VMOVISL    : InstrItinClass;
  def IIC_VMOVSI     : InstrItinClass;
  def IIC_VMOVDI     : InstrItinClass;
+def IIC_VMOVN      : InstrItinClass;
  def IIC_VPERMD     : InstrItinClass;
  def IIC_VPERMQ     : InstrItinClass;
  def IIC_VPERMQ3    : InstrItinClass;
diff --git a/lib/Target/ARM/ARMScheduleA8.td b/lib/Target/ARM/ARMScheduleA8.td

index ff2a673c26beeea6cff09abccbdc69e35859837e..d2e1df13f98f97037e186815b3cfeeac6cc42d5c 100644 (file)
--- a/lib/Target/ARM/ARMScheduleA8.td
+++ b/lib/Target/ARM/ARMScheduleA8.td
@@ -487,6 +487,10 @@ def CortexA8Itineraries : ProcessorItineraries<
    InstrItinData<IIC_VBINQ,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
                                 InstrStage<2, [A8_NPipe]>], [6, 2, 2]>,
    //
+  // Move
+  InstrItinData<IIC_VMOV,     [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<1, [A8_NPipe]>], [1, 1]>,
+  //
    // Move Immediate
    InstrItinData<IIC_VMOVImm,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
                                 InstrStage<1, [A8_NPipe]>], [3]>,
@@ -521,6 +525,10 @@ def CortexA8Itineraries : ProcessorItineraries<
    InstrItinData<IIC_VMOVISL , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
                                 InstrStage<2, [A8_NLSPipe]>], [3, 1, 1]>,
    //
+  // Vector narrow move
+  InstrItinData<IIC_VMOVN   , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<1, [A8_NPipe]>], [3, 1]>,
+  //
    // Double-register Permute
    InstrItinData<IIC_VPERMD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
                                 InstrStage<1, [A8_NLSPipe]>], [2, 2, 1, 1]>,
diff --git a/lib/Target/ARM/ARMScheduleA9.td b/lib/Target/ARM/ARMScheduleA9.td

index 02058618ad0800eeb6ba2f28bdaba0d2ff170348..8acc172668f49a9b762208ae79acd4b71dce993f 100644 (file)
--- a/lib/Target/ARM/ARMScheduleA9.td
+++ b/lib/Target/ARM/ARMScheduleA9.td
@@ -801,6 +801,14 @@ def CortexA9Itineraries : ProcessorItineraries<
                                 InstrStage<1, [A9_Pipe1]>,
                                 InstrStage<4, [A9_MUX0, A9_NPipe]>],
                                [9, 3, 2, 1]>,
+
+  //
+  // Move
+  InstrItinData<IIC_VMOV,     [InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Pipe1]>,
+                               InstrStage<1, [A9_MUX0, A9_NPipe]>],
+                              [1,1]>,
    //
    // Move Immediate
    InstrItinData<IIC_VMOVImm,  [InstrStage<1, [A9_DRegsN],   0, Required>,
@@ -819,14 +827,12 @@ def CortexA9Itineraries : ProcessorItineraries<
                                [2, 1]>,
    //
    // Quad-register Permute Move
-  // Result written in N2, but that is relative to the last cycle of multicycle,
-  // so we use 3 for those cases
    InstrItinData<IIC_VMOVQ,    [InstrStage<1, [A9_DRegsN],   0, Required>,
    // FIXME: all latencies are arbitrary, no information is available
-                               InstrStage<4, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
                                 InstrStage<1, [A9_Pipe1]>,
-                               InstrStage<2, [A9_MUX0, A9_NPipe]>],
-                              [3, 1]>,
+                               InstrStage<1, [A9_MUX0, A9_NPipe]>],
+                              [2, 1]>,
    //
    // Integer to Single-precision Move
    InstrItinData<IIC_VMOVIS ,  [InstrStage<1, [A9_DRegsN],   0, Required>,
@@ -869,6 +875,14 @@ def CortexA9Itineraries : ProcessorItineraries<
                                [3, 1, 1]>,
  
    //
+  // Vector narrow move
+  InstrItinData<IIC_VMOVN,    [InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_Pipe1]>,
+                               InstrStage<1, [A9_MUX0, A9_NPipe]>],
+                              [3, 1]>,
+  //
    // Double-register FP Unary
    InstrItinData<IIC_VUNAD,    [InstrStage<1, [A9_DRegsN],   0, Required>,
                                 // Extra latency cycles since wbck is 6 cycles
author	Evan Cheng <evan.cheng@apple.com>
	Fri, 1 Oct 2010 20:50:58 +0000 (20:50 +0000)
committer	Evan Cheng <evan.cheng@apple.com>
	Fri, 1 Oct 2010 20:50:58 +0000 (20:50 +0000)
lib/Target/ARM/ARMInstrNEON.td		patch \| blob \| history
lib/Target/ARM/ARMSchedule.td		patch \| blob \| history
lib/Target/ARM/ARMScheduleA8.td		patch \| blob \| history
lib/Target/ARM/ARMScheduleA9.td		patch \| blob \| history