R600/SI: Custom select 64-bit ADD

[oota-llvm.git] / lib / Target / R600 / SIInstrInfo.cpp
diff --git a/lib/Target/R600/SIInstrInfo.cpp b/lib/Target/R600/SIInstrInfo.cpp

index e293c3a22ae823ba737cf716a6254e96af6d71a0..a239fb92f35af9ca9851bf774c249400c53ae84a 100644 (file)
--- a/lib/Target/R600/SIInstrInfo.cpp
+++ b/lib/Target/R600/SIInstrInfo.cpp
@@ -16,6 +16,7 @@
  #include "SIInstrInfo.h"
  #include "AMDGPUTargetMachine.h"
  #include "SIDefines.h"
+#include "SIMachineFunctionInfo.h"
  #include "llvm/CodeGen/MachineInstrBuilder.h"
  #include "llvm/CodeGen/MachineRegisterInfo.h"
  #include "llvm/MC/MCInstrDesc.h"
@@ -172,7 +173,6 @@ SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
  }
  
  unsigned SIInstrInfo::commuteOpcode(unsigned Opcode) const {
-
    int NewOpc;
  
    // Try to map original to commuted opcode
@@ -186,6 +186,67 @@ unsigned SIInstrInfo::commuteOpcode(unsigned Opcode) const {
    return Opcode;
  }
  
+void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
+                                      MachineBasicBlock::iterator MI,
+                                      unsigned SrcReg, bool isKill,
+                                      int FrameIndex,
+                                      const TargetRegisterClass *RC,
+                                      const TargetRegisterInfo *TRI) const {
+  MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+  SIMachineFunctionInfo *MFI = MBB.getParent()->getInfo<SIMachineFunctionInfo>();
+  DebugLoc DL = MBB.findDebugLoc(MI);
+  unsigned KillFlag = isKill ? RegState::Kill : 0;
+
+  if (TRI->getCommonSubClass(RC, &AMDGPU::SGPR_32RegClass)) {
+    unsigned Lane = MFI->SpillTracker.getNextLane(MRI);
+    BuildMI(MBB, MI, DL, get(AMDGPU::V_WRITELANE_B32),
+            MFI->SpillTracker.LaneVGPR)
+            .addReg(SrcReg, KillFlag)
+            .addImm(Lane);
+    MFI->SpillTracker.addSpilledReg(FrameIndex, MFI->SpillTracker.LaneVGPR,
+                                    Lane);
+  } else {
+    for (unsigned i = 0, e = RC->getSize() / 4; i != e; ++i) {
+      unsigned SubReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
+      BuildMI(MBB, MI, MBB.findDebugLoc(MI), get(AMDGPU::COPY), SubReg)
+              .addReg(SrcReg, 0, RI.getSubRegFromChannel(i));
+      storeRegToStackSlot(MBB, MI, SubReg, isKill, FrameIndex + i,
+                          &AMDGPU::SReg_32RegClass, TRI);
+    }
+  }
+}
+
+void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
+                                       MachineBasicBlock::iterator MI,
+                                       unsigned DestReg, int FrameIndex,
+                                       const TargetRegisterClass *RC,
+                                       const TargetRegisterInfo *TRI) const {
+  MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+  SIMachineFunctionInfo *MFI = MBB.getParent()->getInfo<SIMachineFunctionInfo>();
+  DebugLoc DL = MBB.findDebugLoc(MI);
+  if (TRI->getCommonSubClass(RC, &AMDGPU::SReg_32RegClass)) {
+     SIMachineFunctionInfo::SpilledReg Spill =
+        MFI->SpillTracker.getSpilledReg(FrameIndex);
+    assert(Spill.VGPR);
+    BuildMI(MBB, MI, DL, get(AMDGPU::V_READLANE_B32), DestReg)
+            .addReg(Spill.VGPR)
+            .addImm(Spill.Lane);
+  } else {
+    for (unsigned i = 0, e = RC->getSize() / 4; i != e; ++i) {
+      unsigned Flags = RegState::Define;
+      if (i == 0) {
+        Flags |= RegState::Undef;
+      }
+      unsigned SubReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
+      loadRegFromStackSlot(MBB, MI, SubReg, FrameIndex + i,
+                           &AMDGPU::SReg_32RegClass, TRI);
+      BuildMI(MBB, MI, DL, get(AMDGPU::COPY))
+              .addReg(DestReg, Flags, RI.getSubRegFromChannel(i))
+              .addReg(SubReg);
+    }
+  }
+}
+
  MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI,
                                                bool NewMI) const {
  
@@ -214,8 +275,10 @@ MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI,
        return 0;
  
      unsigned Reg = MI->getOperand(1).getReg();
+    unsigned SubReg = MI->getOperand(1).getSubReg();
      MI->getOperand(1).ChangeToImmediate(MI->getOperand(2).getImm());
      MI->getOperand(2).ChangeToRegister(Reg, false);
+    MI->getOperand(2).setSubReg(SubReg);
    } else {
      MI = TargetInstrInfo::commuteInstruction(MI, NewMI);
    }
@@ -250,6 +313,18 @@ SIInstrInfo::isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const {
    return RC != &AMDGPU::EXECRegRegClass;
  }
  
+namespace llvm {
+namespace AMDGPU {
+// Helper function generated by tablegen.  We are wrapping this with
+// an SIInstrInfo function that reutrns bool rather than int.
+int isDS(uint16_t Opcode);
+}
+}
+
+bool SIInstrInfo::isDS(uint16_t Opcode) const {
+  return ::AMDGPU::isDS(Opcode) != -1;
+}
+
  int SIInstrInfo::isMIMG(uint16_t Opcode) const {
    return get(Opcode).TSFlags & SIInstrFlags::MIMG;
  }
@@ -368,12 +443,16 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr *MI,
    return true;
  }
  
-unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) const {
+unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) {
    switch (MI.getOpcode()) {
    default: return AMDGPU::INSTRUCTION_LIST_END;
    case AMDGPU::REG_SEQUENCE: return AMDGPU::REG_SEQUENCE;
    case AMDGPU::COPY: return AMDGPU::COPY;
    case AMDGPU::PHI: return AMDGPU::PHI;
+  case AMDGPU::S_ADD_I32: return AMDGPU::V_ADD_I32_e32;
+  case AMDGPU::S_ADDC_U32: return AMDGPU::V_ADDC_U32_e32;
+  case AMDGPU::S_SUB_I32: return AMDGPU::V_SUB_I32_e32;
+  case AMDGPU::S_SUBB_U32: return AMDGPU::V_SUBB_U32_e32;
    case AMDGPU::S_ASHR_I32: return AMDGPU::V_ASHR_I32_e32;
    case AMDGPU::S_ASHR_I64: return AMDGPU::V_ASHR_I64;
    case AMDGPU::S_LSHL_B32: return AMDGPU::V_LSHL_B32_e32;
@@ -419,10 +498,11 @@ void SIInstrInfo::legalizeOpWithMove(MachineInstr *MI, unsigned OpIdx) const {
    if (MO.isReg()) {
      Opcode = AMDGPU::COPY;
    } else if (RI.isSGPRClass(RC)) {
-      Opcode = AMDGPU::S_MOV_B32;
+    Opcode = AMDGPU::S_MOV_B32;
    }
  
-  unsigned Reg = MRI.createVirtualRegister(RI.getRegClass(RCID));
+  const TargetRegisterClass *VRC = RI.getEquivalentVGPRClass(RC);
+  unsigned Reg = MRI.createVirtualRegister(VRC);
    BuildMI(*MI->getParent(), I, MI->getParent()->findDebugLoc(I), get(Opcode),
            Reg).addOperand(MO);
    MO.ChangeToRegister(Reg, false);
@@ -439,8 +519,26 @@ void SIInstrInfo::legalizeOperands(MachineInstr *MI) const {
  
    // Legalize VOP2
    if (isVOP2(MI->getOpcode()) && Src1Idx != -1) {
+    MachineOperand &Src0 = MI->getOperand(Src0Idx);
      MachineOperand &Src1 = MI->getOperand(Src1Idx);
-    // Legalize VOP2 instructions where src1 is not a VGPR.
+
+    // If the instruction implicitly reads VCC, we can't have any SGPR operands,
+    // so move any.
+    bool ReadsVCC = MI->readsRegister(AMDGPU::VCC, &RI);
+    if (ReadsVCC && Src0.isReg() &&
+        RI.isSGPRClass(MRI.getRegClass(Src0.getReg()))) {
+      legalizeOpWithMove(MI, Src0Idx);
+      return;
+    }
+
+    if (ReadsVCC && Src1.isReg() &&
+        RI.isSGPRClass(MRI.getRegClass(Src1.getReg()))) {
+      legalizeOpWithMove(MI, Src1Idx);
+      return;
+    }
+
+    // Legalize VOP2 instructions where src1 is not a VGPR. An SGPR input must
+    // be the first operand, and there can only be one.
      if (Src1.isImm() || Src1.isFPImm() ||
          (Src1.isReg() && RI.isSGPRClass(MRI.getRegClass(Src1.getReg())))) {
        if (MI->isCommutable()) {
@@ -451,6 +549,7 @@ void SIInstrInfo::legalizeOperands(MachineInstr *MI) const {
      }
    }
  
+  // XXX - Do any VOP3 instructions read VCC?
    // Legalize VOP3
    if (isVOP3(MI->getOpcode())) {
      int VOP3Idx[3] = {Src0Idx, Src1Idx, Src2Idx};
@@ -465,6 +564,8 @@ void SIInstrInfo::legalizeOperands(MachineInstr *MI) const {
          if (!RI.isSGPRClass(MRI.getRegClass(MO.getReg())))
            continue; // VGPRs are legal
  
+        assert(MO.getReg() != AMDGPU::SCC && "SCC operand to VOP3 instruction");
+
          if (SGPRReg == AMDGPU::NoRegister || SGPRReg == MO.getReg()) {
            SGPRReg = MO.getReg();
            // We can use one SGPR in each VOP3 instruction.
@@ -542,18 +643,27 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
      const MCInstrDesc &NewDesc = get(NewOpcode);
      Inst->setDesc(NewDesc);
  
+    // Remove any references to SCC. Vector instructions can't read from it, and
+    // We're just about to add the implicit use / defs of VCC, and we don't want
+    // both.
+    for (unsigned i = Inst->getNumOperands() - 1; i > 0; --i) {
+      MachineOperand &Op = Inst->getOperand(i);
+      if (Op.isReg() && Op.getReg() == AMDGPU::SCC)
+        Inst->RemoveOperand(i);
+    }
+
      // Add the implict and explicit register definitions.
      if (NewDesc.ImplicitUses) {
        for (unsigned i = 0; NewDesc.ImplicitUses[i]; ++i) {
-        Inst->addOperand(MachineOperand::CreateReg(NewDesc.ImplicitUses[i],
-                                                   false, true));
+        unsigned Reg = NewDesc.ImplicitUses[i];
+        Inst->addOperand(MachineOperand::CreateReg(Reg, false, true));
        }
      }
  
      if (NewDesc.ImplicitDefs) {
        for (unsigned i = 0; NewDesc.ImplicitDefs[i]; ++i) {
-        Inst->addOperand(MachineOperand::CreateReg(NewDesc.ImplicitDefs[i],
-                                                   true, true));
+        unsigned Reg = NewDesc.ImplicitDefs[i];
+        Inst->addOperand(MachineOperand::CreateReg(Reg, true, true));
        }
      }