Use move semantics if possible to construct ConstantRanges.

[oota-llvm.git] / lib / Target / R600 / R600InstrInfo.cpp
diff --git a/lib/Target/R600/R600InstrInfo.cpp b/lib/Target/R600/R600InstrInfo.cpp

index f267ee92cd96ca853b04dd779fc79d699a72461b..1a07b05c4bf3e7badc7ec044ffd213726c1783c7 100644 (file)
--- a/lib/Target/R600/R600InstrInfo.cpp
+++ b/lib/Target/R600/R600InstrInfo.cpp
@@ -114,9 +114,7 @@ bool R600InstrInfo::isPlaceHolderOpcode(unsigned Opcode) const {
  }
  
  bool R600InstrInfo::isReductionOp(unsigned Opcode) const {
-  switch(Opcode) {
-    default: return false;
-  }
+  return false;
  }
  
  bool R600InstrInfo::isCubeOp(unsigned Opcode) const {
@@ -136,6 +134,21 @@ bool R600InstrInfo::isALUInstr(unsigned Opcode) const {
    return (TargetFlags & R600_InstFlag::ALU_INST);
  }
  
+bool R600InstrInfo::hasInstrModifiers(unsigned Opcode) const {
+  unsigned TargetFlags = get(Opcode).TSFlags;
+
+  return ((TargetFlags & R600_InstFlag::OP1) |
+          (TargetFlags & R600_InstFlag::OP2) |
+          (TargetFlags & R600_InstFlag::OP3));
+}
+
+bool R600InstrInfo::isLDSInstr(unsigned Opcode) const {
+  unsigned TargetFlags = get(Opcode).TSFlags;
+
+  return ((TargetFlags & R600_InstFlag::LDS_1A) |
+          (TargetFlags & R600_InstFlag::LDS_1A1D));
+}
+
  bool R600InstrInfo::isTransOnly(unsigned Opcode) const {
    return (get(Opcode).TSFlags & R600_InstFlag::TRANS_ONLY);
  }
@@ -163,6 +176,16 @@ bool R600InstrInfo::usesTextureCache(const MachineInstr *MI) const {
           usesTextureCache(MI->getOpcode());
  }
  
+bool R600InstrInfo::mustBeLastInClause(unsigned Opcode) const {
+  switch (Opcode) {
+  case AMDGPU::KILLGT:
+  case AMDGPU::GROUP_BARRIER:
+    return true;
+  default:
+    return false;
+  }
+}
+
  SmallVector<std::pair<MachineOperand *, int64_t>, 3>
  R600InstrInfo::getSrcs(MachineInstr *MI) const {
    SmallVector<std::pair<MachineOperand *, int64_t>, 3> Result;
@@ -225,8 +248,9 @@ R600InstrInfo::getSrcs(MachineInstr *MI) const {
  
  std::vector<std::pair<int, unsigned> >
  R600InstrInfo::ExtractSrcs(MachineInstr *MI,
-                           const DenseMap<unsigned, unsigned> &PV)
-    const {
+                           const DenseMap<unsigned, unsigned> &PV,
+                           unsigned &ConstCount) const {
+  ConstCount = 0;
    const SmallVector<std::pair<MachineOperand *, int64_t>, 3> Srcs = getSrcs(MI);
    const std::pair<int, unsigned> DummyPair(-1, 0);
    std::vector<std::pair<int, unsigned> > Result;
@@ -234,15 +258,20 @@ R600InstrInfo::ExtractSrcs(MachineInstr *MI,
    for (unsigned n = Srcs.size(); i < n; ++i) {
      unsigned Reg = Srcs[i].first->getReg();
      unsigned Index = RI.getEncodingValue(Reg) & 0xff;
-    unsigned Chan = RI.getHWRegChan(Reg);
-    if (Index > 127) {
-      Result.push_back(DummyPair);
-      continue;
+    if (Reg == AMDGPU::OQAP) {
+      Result.push_back(std::pair<int, unsigned>(Index, 0));
      }
      if (PV.find(Reg) != PV.end()) {
+      // 255 is used to tells its a PS/PV reg
+      Result.push_back(std::pair<int, unsigned>(255, 0));
+      continue;
+    }
+    if (Index > 127) {
+      ConstCount++;
        Result.push_back(DummyPair);
        continue;
      }
+    unsigned Chan = RI.getHWRegChan(Reg);
      Result.push_back(std::pair<int, unsigned>(Index, Chan));
    }
    for (; i < 3; ++i)
@@ -254,15 +283,15 @@ static std::vector<std::pair<int, unsigned> >
  Swizzle(std::vector<std::pair<int, unsigned> > Src,
          R600InstrInfo::BankSwizzle Swz) {
    switch (Swz) {
-  case R600InstrInfo::ALU_VEC_012:
+  case R600InstrInfo::ALU_VEC_012_SCL_210:
      break;
-  case R600InstrInfo::ALU_VEC_021:
+  case R600InstrInfo::ALU_VEC_021_SCL_122:
      std::swap(Src[1], Src[2]);
      break;
-  case R600InstrInfo::ALU_VEC_102:
+  case R600InstrInfo::ALU_VEC_102_SCL_221:
      std::swap(Src[0], Src[1]);
      break;
-  case R600InstrInfo::ALU_VEC_120:
+  case R600InstrInfo::ALU_VEC_120_SCL_212:
      std::swap(Src[0], Src[1]);
      std::swap(Src[0], Src[2]);
      break;
@@ -277,66 +306,182 @@ Swizzle(std::vector<std::pair<int, unsigned> > Src,
    return Src;
  }
  
-static bool
-isLegal(const std::vector<std::vector<std::pair<int, unsigned> > > &IGSrcs,
+static unsigned
+getTransSwizzle(R600InstrInfo::BankSwizzle Swz, unsigned Op) {
+  switch (Swz) {
+  case R600InstrInfo::ALU_VEC_012_SCL_210: {
+    unsigned Cycles[3] = { 2, 1, 0};
+    return Cycles[Op];
+  }
+  case R600InstrInfo::ALU_VEC_021_SCL_122: {
+    unsigned Cycles[3] = { 1, 2, 2};
+    return Cycles[Op];
+  }
+  case R600InstrInfo::ALU_VEC_120_SCL_212: {
+    unsigned Cycles[3] = { 2, 1, 2};
+    return Cycles[Op];
+  }
+  case R600InstrInfo::ALU_VEC_102_SCL_221: {
+    unsigned Cycles[3] = { 2, 2, 1};
+    return Cycles[Op];
+  }
+  default:
+    llvm_unreachable("Wrong Swizzle for Trans Slot");
+    return 0;
+  }
+}
+
+/// returns how many MIs (whose inputs are represented by IGSrcs) can be packed
+/// in the same Instruction Group while meeting read port limitations given a
+/// Swz swizzle sequence.
+unsigned  R600InstrInfo::isLegalUpTo(
+    const std::vector<std::vector<std::pair<int, unsigned> > > &IGSrcs,
      const std::vector<R600InstrInfo::BankSwizzle> &Swz,
-    unsigned CheckedSize) {
+    const std::vector<std::pair<int, unsigned> > &TransSrcs,
+    R600InstrInfo::BankSwizzle TransSwz) const {
    int Vector[4][3];
    memset(Vector, -1, sizeof(Vector));
-  for (unsigned i = 0; i < CheckedSize; i++) {
+  for (unsigned i = 0, e = IGSrcs.size(); i < e; i++) {
      const std::vector<std::pair<int, unsigned> > &Srcs =
          Swizzle(IGSrcs[i], Swz[i]);
      for (unsigned j = 0; j < 3; j++) {
        const std::pair<int, unsigned> &Src = Srcs[j];
-      if (Src.first < 0)
+      if (Src.first < 0 || Src.first == 255)
          continue;
+      if (Src.first == GET_REG_INDEX(RI.getEncodingValue(AMDGPU::OQAP))) {
+        if (Swz[i] != R600InstrInfo::ALU_VEC_012_SCL_210 &&
+            Swz[i] != R600InstrInfo::ALU_VEC_021_SCL_122) {
+            // The value from output queue A (denoted by register OQAP) can
+            // only be fetched during the first cycle.
+            return false;
+        }
+        // OQAP does not count towards the normal read port restrictions
+        continue;
+      }
        if (Vector[Src.second][j] < 0)
          Vector[Src.second][j] = Src.first;
        if (Vector[Src.second][j] != Src.first)
-        return false;
+        return i;
      }
    }
-  return true;
+  // Now check Trans Alu
+  for (unsigned i = 0, e = TransSrcs.size(); i < e; ++i) {
+    const std::pair<int, unsigned> &Src = TransSrcs[i];
+    unsigned Cycle = getTransSwizzle(TransSwz, i);
+    if (Src.first < 0)
+      continue;
+    if (Src.first == 255)
+      continue;
+    if (Vector[Src.second][Cycle] < 0)
+      Vector[Src.second][Cycle] = Src.first;
+    if (Vector[Src.second][Cycle] != Src.first)
+      return IGSrcs.size() - 1;
+  }
+  return IGSrcs.size();
  }
  
-static bool recursiveFitsFPLimitation(
-const std::vector<std::vector<std::pair<int, unsigned> > > &IGSrcs,
-std::vector<R600InstrInfo::BankSwizzle> &SwzCandidate,
-unsigned Depth = 0) {
-  if (!isLegal(IGSrcs, SwzCandidate, Depth))
+/// Given a swizzle sequence SwzCandidate and an index Idx, returns the next
+/// (in lexicographic term) swizzle sequence assuming that all swizzles after
+/// Idx can be skipped
+static bool
+NextPossibleSolution(
+    std::vector<R600InstrInfo::BankSwizzle> &SwzCandidate,
+    unsigned Idx) {
+  assert(Idx < SwzCandidate.size());
+  int ResetIdx = Idx;
+  while (ResetIdx > -1 && SwzCandidate[ResetIdx] == R600InstrInfo::ALU_VEC_210)
+    ResetIdx --;
+  for (unsigned i = ResetIdx + 1, e = SwzCandidate.size(); i < e; i++) {
+    SwzCandidate[i] = R600InstrInfo::ALU_VEC_012_SCL_210;
+  }
+  if (ResetIdx == -1)
      return false;
-  if (IGSrcs.size() == Depth)
-    return true;
-  unsigned i = SwzCandidate[Depth];
-  for (; i < 6; i++) {
-    SwzCandidate[Depth] = (R600InstrInfo::BankSwizzle) i;
-    if (recursiveFitsFPLimitation(IGSrcs, SwzCandidate, Depth + 1))
+  int NextSwizzle = SwzCandidate[ResetIdx] + 1;
+  SwzCandidate[ResetIdx] = (R600InstrInfo::BankSwizzle)NextSwizzle;
+  return true;
+}
+
+/// Enumerate all possible Swizzle sequence to find one that can meet all
+/// read port requirements.
+bool R600InstrInfo::FindSwizzleForVectorSlot(
+    const std::vector<std::vector<std::pair<int, unsigned> > > &IGSrcs,
+    std::vector<R600InstrInfo::BankSwizzle> &SwzCandidate,
+    const std::vector<std::pair<int, unsigned> > &TransSrcs,
+    R600InstrInfo::BankSwizzle TransSwz) const {
+  unsigned ValidUpTo = 0;
+  do {
+    ValidUpTo = isLegalUpTo(IGSrcs, SwzCandidate, TransSrcs, TransSwz);
+    if (ValidUpTo == IGSrcs.size())
        return true;
-  }
-  SwzCandidate[Depth] = R600InstrInfo::ALU_VEC_012;
+  } while (NextPossibleSolution(SwzCandidate, ValidUpTo));
    return false;
  }
  
+/// Instructions in Trans slot can't read gpr at cycle 0 if they also read
+/// a const, and can't read a gpr at cycle 1 if they read 2 const.
+static bool
+isConstCompatible(R600InstrInfo::BankSwizzle TransSwz,
+                  const std::vector<std::pair<int, unsigned> > &TransOps,
+                  unsigned ConstCount) {
+  for (unsigned i = 0, e = TransOps.size(); i < e; ++i) {
+    const std::pair<int, unsigned> &Src = TransOps[i];
+    unsigned Cycle = getTransSwizzle(TransSwz, i);
+    if (Src.first < 0)
+      continue;
+    if (ConstCount > 0 && Cycle == 0)
+      return false;
+    if (ConstCount > 1 && Cycle == 1)
+      return false;
+  }
+  return true;
+}
+
  bool
  R600InstrInfo::fitsReadPortLimitations(const std::vector<MachineInstr *> &IG,
-                                      const DenseMap<unsigned, unsigned> &PV,
-                                      std::vector<BankSwizzle> &ValidSwizzle)
+                                       const DenseMap<unsigned, unsigned> &PV,
+                                       std::vector<BankSwizzle> &ValidSwizzle,
+                                       bool isLastAluTrans)
      const {
    //Todo : support shared src0 - src1 operand
  
    std::vector<std::vector<std::pair<int, unsigned> > > IGSrcs;
    ValidSwizzle.clear();
+  unsigned ConstCount;
+  BankSwizzle TransBS = ALU_VEC_012_SCL_210;
    for (unsigned i = 0, e = IG.size(); i < e; ++i) {
-    IGSrcs.push_back(ExtractSrcs(IG[i], PV));
+    IGSrcs.push_back(ExtractSrcs(IG[i], PV, ConstCount));
      unsigned Op = getOperandIdx(IG[i]->getOpcode(),
          AMDGPU::OpName::bank_swizzle);
      ValidSwizzle.push_back( (R600InstrInfo::BankSwizzle)
          IG[i]->getOperand(Op).getImm());
    }
-  bool Result = recursiveFitsFPLimitation(IGSrcs, ValidSwizzle);
-  if (!Result)
-    return false;
-  return true;
+  std::vector<std::pair<int, unsigned> > TransOps;
+  if (!isLastAluTrans)
+    return FindSwizzleForVectorSlot(IGSrcs, ValidSwizzle, TransOps, TransBS);
+
+  TransOps = IGSrcs.back();
+  IGSrcs.pop_back();
+  ValidSwizzle.pop_back();
+
+  static const R600InstrInfo::BankSwizzle TransSwz[] = {
+    ALU_VEC_012_SCL_210,
+    ALU_VEC_021_SCL_122,
+    ALU_VEC_120_SCL_212,
+    ALU_VEC_102_SCL_221
+  };
+  for (unsigned i = 0; i < 4; i++) {
+    TransBS = TransSwz[i];
+    if (!isConstCompatible(TransBS, TransOps, ConstCount))
+      continue;
+    bool Result = FindSwizzleForVectorSlot(IGSrcs, ValidSwizzle, TransOps,
+        TransBS);
+    if (Result) {
+      ValidSwizzle.push_back(TransBS);
+      return true;
+    }
+  }
+
+  return false;
  }
  
  
@@ -366,7 +511,8 @@ R600InstrInfo::fitsConstReadLimitations(const std::vector<unsigned> &Consts)
  }
  
  bool
-R600InstrInfo::canBundle(const std::vector<MachineInstr *> &MIs) const {
+R600InstrInfo::fitsConstReadLimitations(const std::vector<MachineInstr *> &MIs)
+    const {
    std::vector<unsigned> Consts;
    for (unsigned i = 0, n = MIs.size(); i < n; i++) {
      MachineInstr *MI = MIs[i];
@@ -503,6 +649,17 @@ int R600InstrInfo::getBranchInstr(const MachineOperand &op) const {
    };
  }
  
+static
+MachineBasicBlock::iterator FindLastAluClause(MachineBasicBlock &MBB) {
+  for (MachineBasicBlock::reverse_iterator It = MBB.rbegin(), E = MBB.rend();
+      It != E; ++It) {
+    if (It->getOpcode() == AMDGPU::CF_ALU ||
+        It->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE)
+      return llvm::prior(It.base());
+  }
+  return MBB.end();
+}
+
  unsigned
  R600InstrInfo::InsertBranch(MachineBasicBlock &MBB,
                              MachineBasicBlock *TBB,
@@ -524,6 +681,11 @@ R600InstrInfo::InsertBranch(MachineBasicBlock &MBB,
        BuildMI(&MBB, DL, get(AMDGPU::JUMP_COND))
               .addMBB(TBB)
               .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
+      MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB);
+      if (CfAlu == MBB.end())
+        return 1;
+      assert (CfAlu->getOpcode() == AMDGPU::CF_ALU);
+      CfAlu->setDesc(get(AMDGPU::CF_ALU_PUSH_BEFORE));
        return 1;
      }
    } else {
@@ -535,6 +697,11 @@ R600InstrInfo::InsertBranch(MachineBasicBlock &MBB,
              .addMBB(TBB)
              .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
      BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(FBB);
+    MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB);
+    if (CfAlu == MBB.end())
+      return 2;
+    assert (CfAlu->getOpcode() == AMDGPU::CF_ALU);
+    CfAlu->setDesc(get(AMDGPU::CF_ALU_PUSH_BEFORE));
      return 2;
    }
  }
@@ -558,6 +725,11 @@ R600InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
      MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I);
      clearFlag(predSet, 0, MO_FLAG_PUSH);
      I->eraseFromParent();
+    MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB);
+    if (CfAlu == MBB.end())
+      break;
+    assert (CfAlu->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE);
+    CfAlu->setDesc(get(AMDGPU::CF_ALU));
      break;
    }
    case AMDGPU::JUMP:
@@ -578,6 +750,11 @@ R600InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
      MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I);
      clearFlag(predSet, 0, MO_FLAG_PUSH);
      I->eraseFromParent();
+    MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB);
+    if (CfAlu == MBB.end())
+      break;
+    assert (CfAlu->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE);
+    CfAlu->setDesc(get(AMDGPU::CF_ALU));
      break;
    }
    case AMDGPU::JUMP:
@@ -612,6 +789,15 @@ R600InstrInfo::isPredicable(MachineInstr *MI) const {
  
    if (MI->getOpcode() == AMDGPU::KILLGT) {
      return false;
+  } else if (MI->getOpcode() == AMDGPU::CF_ALU) {
+    // If the clause start in the middle of MBB then the MBB has more
+    // than a single clause, unable to predicate several clauses.
+    if (MI->getParent()->begin() != MachineBasicBlock::iterator(MI))
+      return false;
+    // TODO: We don't support KC merging atm
+    if (MI->getOperand(3).getImm() != 0 || MI->getOperand(4).getImm() != 0)
+      return false;
+    return true;
    } else if (isVector(*MI)) {
      return false;
    } else {
@@ -707,6 +893,11 @@ R600InstrInfo::PredicateInstruction(MachineInstr *MI,
                        const SmallVectorImpl<MachineOperand> &Pred) const {
    int PIdx = MI->findFirstPredOperandIdx();
  
+  if (MI->getOpcode() == AMDGPU::CF_ALU) {
+    MI->getOperand(8).setImm(0);
+    return true;
+  }
+
    if (PIdx != -1) {
      MachineOperand &PMO = MI->getOperand(PIdx);
      PMO.setReg(Pred[2].getReg());