R600: Add carry and borrow instructions. Use them to implement UADDO/USUBO
[oota-llvm.git] / lib / Target / R600 / SIFoldOperands.cpp
index ddb285213d2a58af2c4d585a5ff2c28609bef817..7ba5a6d7c3830a18d3da8f020cb3c76bcb457201 100644 (file)
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Function.h"
+#include "llvm/IR/LLVMContext.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetMachine.h"
 
 #define DEBUG_TYPE "si-fold-operands"
@@ -56,10 +57,16 @@ struct FoldCandidate {
   uint64_t ImmToFold;
 
   FoldCandidate(MachineInstr *MI, unsigned OpNo, MachineOperand *FoldOp) :
-      UseMI(MI), UseOpNo(OpNo), OpToFold(FoldOp), ImmToFold(0) { }
-
-  FoldCandidate(MachineInstr *MI, unsigned OpNo, uint64_t Imm) :
-      UseMI(MI), UseOpNo(OpNo), OpToFold(nullptr), ImmToFold(Imm) { }
+                UseMI(MI), UseOpNo(OpNo) {
+
+    if (FoldOp->isImm()) {
+      OpToFold = nullptr;
+      ImmToFold = FoldOp->getImm();
+    } else {
+      assert(FoldOp->isReg());
+      OpToFold = FoldOp;
+    }
+  }
 
   bool isImm() const {
     return !OpToFold;
@@ -119,6 +126,35 @@ static bool updateOperand(FoldCandidate &Fold,
   return false;
 }
 
+static bool tryAddToFoldList(std::vector<FoldCandidate> &FoldList,
+                             MachineInstr *MI, unsigned OpNo,
+                             MachineOperand *OpToFold,
+                             const SIInstrInfo *TII) {
+  if (!TII->isOperandLegal(MI, OpNo, OpToFold)) {
+    // Operand is not legal, so try to commute the instruction to
+    // see if this makes it possible to fold.
+    unsigned CommuteIdx0;
+    unsigned CommuteIdx1;
+    bool CanCommute = TII->findCommutedOpIndices(MI, CommuteIdx0, CommuteIdx1);
+
+    if (CanCommute) {
+      if (CommuteIdx0 == OpNo)
+        OpNo = CommuteIdx1;
+      else if (CommuteIdx1 == OpNo)
+        OpNo = CommuteIdx0;
+    }
+
+    if (!CanCommute || !TII->commuteInstruction(MI))
+      return false;
+
+    if (!TII->isOperandLegal(MI, OpNo, OpToFold))
+      return false;
+  }
+
+  FoldList.push_back(FoldCandidate(MI, OpNo, OpToFold));
+  return true;
+}
+
 bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
   MachineRegisterInfo &MRI = MF.getRegInfo();
   const SIInstrInfo *TII =
@@ -137,7 +173,21 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
       if (!isSafeToFold(MI.getOpcode()))
         continue;
 
+      unsigned OpSize = TII->getOpSize(MI, 1);
       MachineOperand &OpToFold = MI.getOperand(1);
+      bool FoldingImm = OpToFold.isImm();
+
+      // FIXME: We could also be folding things like FrameIndexes and
+      // TargetIndexes.
+      if (!FoldingImm && !OpToFold.isReg())
+        continue;
+
+      // Folding immediates with more than one use will increase program size.
+      // FIXME: This will also reduce register usage, which may be better
+      // in some cases.  A better heuristic is needed.
+      if (FoldingImm && !TII->isInlineConstant(OpToFold, OpSize) &&
+          !MRI.hasOneUse(MI.getOperand(0).getReg()))
+        continue;
 
       // FIXME: Fold operands with subregs.
       if (OpToFold.isReg() &&
@@ -154,21 +204,21 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
         const MachineOperand &UseOp = UseMI->getOperand(Use.getOperandNo());
 
         // FIXME: Fold operands with subregs.
-        if (UseOp.isReg() && UseOp.getSubReg() && OpToFold.isReg()) {
+        if (UseOp.isReg() && ((UseOp.getSubReg() && OpToFold.isReg()) ||
+            UseOp.isImplicit())) {
           continue;
         }
 
-        bool FoldingImm = OpToFold.isImm() || OpToFold.isFPImm();
         APInt Imm;
 
         if (FoldingImm) {
-          const TargetRegisterClass *UseRC = MRI.getRegClass(UseOp.getReg());
+          unsigned UseReg = UseOp.getReg();
+          const TargetRegisterClass *UseRC
+            = TargetRegisterInfo::isVirtualRegister(UseReg) ?
+            MRI.getRegClass(UseReg) :
+            TRI.getRegClass(UseReg);
 
-          if (OpToFold.isFPImm()) {
-            Imm = OpToFold.getFPImm()->getValueAPF().bitcastToAPInt();
-          } else {
-            Imm = APInt(64, OpToFold.getImm());
-          }
+          Imm = APInt(64, OpToFold.getImm());
 
           // Split 64-bit constants into 32-bits for folding.
           if (UseOp.getSubReg()) {
@@ -186,8 +236,13 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
           // In order to fold immediates into copies, we need to change the
           // copy to a MOV.
           if (UseMI->getOpcode() == AMDGPU::COPY) {
-            unsigned MovOp = TII->getMovOpcode(
-                MRI.getRegClass(UseMI->getOperand(0).getReg()));
+            unsigned DestReg = UseMI->getOperand(0).getReg();
+            const TargetRegisterClass *DestRC
+              = TargetRegisterInfo::isVirtualRegister(DestReg) ?
+              MRI.getRegClass(DestReg) :
+              TRI.getRegClass(DestReg);
+
+            unsigned MovOp = TII->getMovOpcode(DestRC);
             if (MovOp == AMDGPU::COPY)
               continue;
 
@@ -203,24 +258,13 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
             UseDesc.OpInfo[Use.getOperandNo()].RegClass == -1)
           continue;
 
-
         if (FoldingImm) {
-          const MachineOperand ImmOp = MachineOperand::CreateImm(Imm.getSExtValue());
-          if (TII->isOperandLegal(UseMI, Use.getOperandNo(), &ImmOp)) {
-            FoldList.push_back(FoldCandidate(UseMI, Use.getOperandNo(),
-                               Imm.getSExtValue()));
-          }
-          continue;
-        }
-
-        // Normal substitution with registers
-        if (TII->isOperandLegal(UseMI, Use.getOperandNo(), &OpToFold)) {
-          FoldList.push_back(FoldCandidate(UseMI, Use.getOperandNo(), &OpToFold));
+          MachineOperand ImmOp = MachineOperand::CreateImm(Imm.getSExtValue());
+          tryAddToFoldList(FoldList, UseMI, Use.getOperandNo(), &ImmOp, TII);
           continue;
         }
 
-        // FIXME: We could commute the instruction to create more opportunites
-        // for folding.  This will only be useful if we have 32-bit instructions.
+        tryAddToFoldList(FoldList, UseMI, Use.getOperandNo(), &OpToFold, TII);
 
         // FIXME: We could try to change the instruction from 64-bit to 32-bit
         // to enable more folding opportunites.  The shrink operands pass