Inflate register classes after coalescing.

author Jakob Stoklund Olesen <stoklund@2pi.dk>

Tue, 9 Aug 2011 18:19:41 +0000 (18:19 +0000)

committer Jakob Stoklund Olesen <stoklund@2pi.dk>

Tue, 9 Aug 2011 18:19:41 +0000 (18:19 +0000)
author Jakob Stoklund Olesen <stoklund@2pi.dk>
Tue, 9 Aug 2011 18:19:41 +0000 (18:19 +0000)
committer Jakob Stoklund Olesen <stoklund@2pi.dk>
Tue, 9 Aug 2011 18:19:41 +0000 (18:19 +0000)
diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp

index d2087f9beea7cd8119964cf6754edf63d63ea839..c07970d69bf7d9f741d0cd40d598e3ff665eb3b0 100644 (file)
--- a/lib/CodeGen/RegisterCoalescer.cpp
+++ b/lib/CodeGen/RegisterCoalescer.cpp
@@ -55,6 +55,7 @@ STATISTIC(numExtends  , "Number of copies extended");
  STATISTIC(NumReMats   , "Number of instructions re-materialized");
  STATISTIC(numPeep     , "Number of identity moves eliminated after coalescing");
  STATISTIC(numAborts   , "Number of times interval joining aborted");
+STATISTIC(NumInflated , "Number of register classes inflated");
  
  static cl::opt<bool>
  EnableJoining("join-liveintervals",
@@ -1852,7 +1853,7 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {
  
    // Perform a final pass over the instructions and compute spill weights
    // and remove identity moves.
-  SmallVector<unsigned, 4> DeadDefs;
+  SmallVector<unsigned, 4> DeadDefs, InflateRegs;
    for (MachineFunction::iterator mbbi = MF->begin(), mbbe = MF->end();
         mbbi != mbbe; ++mbbi) {
      MachineBasicBlock* mbb = mbbi;
@@ -1864,6 +1865,16 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {
          bool DoDelete = true;
          assert(MI->isCopyLike() && "Unrecognized copy instruction");
          unsigned SrcReg = MI->getOperand(MI->isSubregToReg() ? 2 : 1).getReg();
+        unsigned DstReg = MI->getOperand(0).getReg();
+
+        // Collect candidates for register class inflation.
+        if (TargetRegisterInfo::isVirtualRegister(SrcReg) &&
+            RegClassInfo.isProperSubClass(MRI->getRegClass(SrcReg)))
+          InflateRegs.push_back(SrcReg);
+        if (TargetRegisterInfo::isVirtualRegister(DstReg) &&
+            RegClassInfo.isProperSubClass(MRI->getRegClass(DstReg)))
+          InflateRegs.push_back(DstReg);
+
          if (TargetRegisterInfo::isPhysicalRegister(SrcReg) &&
              MI->getNumOperands() > 2)
            // Do not delete extract_subreg, insert_subreg of physical
@@ -1905,8 +1916,12 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {
            unsigned Reg = MO.getReg();
            if (!Reg)
              continue;
-          if (TargetRegisterInfo::isVirtualRegister(Reg))
+          if (TargetRegisterInfo::isVirtualRegister(Reg)) {
              DeadDefs.push_back(Reg);
+            // Remat may also enable register class inflation.
+            if (RegClassInfo.isProperSubClass(MRI->getRegClass(Reg)))
+              InflateRegs.push_back(Reg);
+          }
            if (MO.isDead())
              continue;
            if (TargetRegisterInfo::isPhysicalRegister(Reg) ||
@@ -1954,6 +1969,24 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {
      }
    }
  
+  // After deleting a lot of copies, register classes may be less constrained.
+  // Removing sub-register opreands may alow GR32_ABCD -> GR32 and DPR_VFP2 ->
+  // DPR inflation.
+  array_pod_sort(InflateRegs.begin(), InflateRegs.end());
+  InflateRegs.erase(std::unique(InflateRegs.begin(), InflateRegs.end()),
+                    InflateRegs.end());
+  DEBUG(dbgs() << "Trying to inflate " << InflateRegs.size() << " regs.\n");
+  for (unsigned i = 0, e = InflateRegs.size(); i != e; ++i) {
+    unsigned Reg = InflateRegs[i];
+    if (MRI->reg_nodbg_empty(Reg))
+      continue;
+    if (MRI->recomputeRegClass(Reg, *TM)) {
+      DEBUG(dbgs() << PrintReg(Reg) << " inflated to "
+                   << MRI->getRegClass(Reg)->getName() << '\n');
+      ++NumInflated;
+    }
+  }
+
    DEBUG(dump());
    DEBUG(LDV->dump());
    if (VerifyCoalescing)
diff --git a/test/CodeGen/ARM/fabss.ll b/test/CodeGen/ARM/fabss.ll

index 51efe51bf1525acb212e472c023addd17ff75f96..45c322dce8b969c61790b856859b84b5c8f4c970 100644 (file)
--- a/test/CodeGen/ARM/fabss.ll
+++ b/test/CodeGen/ARM/fabss.ll
@@ -22,6 +22,8 @@ declare float @fabsf(float)
  ; NFP0:        vabs.f32        s1, s1
  
  ; CORTEXA8: test:
-; CORTEXA8:    vabs.f32        d1, d1
+; CORTEXA8:     vadd.f32        [[D1:d[0-9]+]]
+; CORTEXA8:    vabs.f32        {{d[0-9]+}}, [[D1]]
+
  ; CORTEXA9: test:
  ; CORTEXA9:    vabs.f32        s{{.}}, s{{.}}
diff --git a/test/CodeGen/ARM/fp_convert.ll b/test/CodeGen/ARM/fp_convert.ll

index 86c06f1ddd9ec03a86e81065c45ed69fbb5aa918..7002cecf364010b2786a03ea846756da35705d53 100644 (file)
--- a/test/CodeGen/ARM/fp_convert.ll
+++ b/test/CodeGen/ARM/fp_convert.ll
@@ -7,7 +7,8 @@ define i32 @test1(float %a, float %b) {
  ; VFP2: test1:
  ; VFP2: vcvt.s32.f32 s{{.}}, s{{.}}
  ; NEON: test1:
-; NEON: vcvt.s32.f32 d0, d0
+; NEON: vadd.f32 [[D0:d[0-9]+]]
+; NEON: vcvt.s32.f32 d0, [[D0]]
  entry:
          %0 = fadd float %a, %b
          %1 = fptosi float %0 to i32
@@ -18,7 +19,8 @@ define i32 @test2(float %a, float %b) {
  ; VFP2: test2:
  ; VFP2: vcvt.u32.f32 s{{.}}, s{{.}}
  ; NEON: test2:
-; NEON: vcvt.u32.f32 d0, d0
+; NEON: vadd.f32 [[D0:d[0-9]+]]
+; NEON: vcvt.u32.f32 d0, [[D0]]
  entry:
          %0 = fadd float %a, %b
          %1 = fptoui float %0 to i32
author	Jakob Stoklund Olesen <stoklund@2pi.dk>
	Tue, 9 Aug 2011 18:19:41 +0000 (18:19 +0000)
committer	Jakob Stoklund Olesen <stoklund@2pi.dk>
	Tue, 9 Aug 2011 18:19:41 +0000 (18:19 +0000)
lib/CodeGen/RegisterCoalescer.cpp		patch \| blob \| history
test/CodeGen/ARM/fabss.ll		patch \| blob \| history
test/CodeGen/ARM/fp_convert.ll		patch \| blob \| history