--- /dev/null
+//===--- HexagonSplitDouble.cpp -------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "hsdr"
+
+#include "HexagonRegisterInfo.h"
+#include "HexagonTargetMachine.h"
+
+#include "llvm/Pass.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+#include <map>
+#include <set>
+#include <vector>
+
+using namespace llvm;
+
+namespace llvm {
+ FunctionPass *createHexagonSplitDoubleRegs();
+ void initializeHexagonSplitDoubleRegsPass(PassRegistry&);
+}
+
+namespace {
+ static cl::opt<int> MaxHSDR("max-hsdr", cl::Hidden, cl::init(-1),
+ cl::desc("Maximum number of split partitions"));
+ static cl::opt<bool> MemRefsFixed("hsdr-no-mem", cl::Hidden, cl::init(true),
+ cl::desc("Do not split loads or stores"));
+
+ class HexagonSplitDoubleRegs : public MachineFunctionPass {
+ public:
+ static char ID;
+ HexagonSplitDoubleRegs() : MachineFunctionPass(ID), TRI(nullptr),
+ TII(nullptr) {
+ initializeHexagonSplitDoubleRegsPass(*PassRegistry::getPassRegistry());
+ }
+ const char *getPassName() const override {
+ return "Hexagon Split Double Registers";
+ }
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<MachineLoopInfo>();
+ AU.addPreserved<MachineLoopInfo>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ private:
+ static const TargetRegisterClass *const DoubleRC;
+
+ const HexagonRegisterInfo *TRI;
+ const HexagonInstrInfo *TII;
+ const MachineLoopInfo *MLI;
+ MachineRegisterInfo *MRI;
+
+ typedef std::set<unsigned> USet;
+ typedef std::map<unsigned,USet> UUSetMap;
+ typedef std::pair<unsigned,unsigned> UUPair;
+ typedef std::map<unsigned,UUPair> UUPairMap;
+ typedef std::map<const MachineLoop*,USet> LoopRegMap;
+
+ bool isInduction(unsigned Reg, LoopRegMap &IRM) const;
+ bool isVolatileInstr(const MachineInstr *MI) const;
+ bool isFixedInstr(const MachineInstr *MI) const;
+ void partitionRegisters(UUSetMap &P2Rs);
+ int32_t profit(const MachineInstr *MI) const;
+ bool isProfitable(const USet &Part, LoopRegMap &IRM) const;
+
+ void collectIndRegsForLoop(const MachineLoop *L, USet &Rs);
+ void collectIndRegs(LoopRegMap &IRM);
+
+ void createHalfInstr(unsigned Opc, MachineInstr *MI,
+ const UUPairMap &PairMap, unsigned SubR);
+ void splitMemRef(MachineInstr *MI, const UUPairMap &PairMap);
+ void splitImmediate(MachineInstr *MI, const UUPairMap &PairMap);
+ void splitCombine(MachineInstr *MI, const UUPairMap &PairMap);
+ void splitExt(MachineInstr *MI, const UUPairMap &PairMap);
+ void splitShift(MachineInstr *MI, const UUPairMap &PairMap);
+ void splitAslOr(MachineInstr *MI, const UUPairMap &PairMap);
+ bool splitInstr(MachineInstr *MI, const UUPairMap &PairMap);
+ void replaceSubregUses(MachineInstr *MI, const UUPairMap &PairMap);
+ void collapseRegPairs(MachineInstr *MI, const UUPairMap &PairMap);
+ bool splitPartition(const USet &Part);
+
+ static int Counter;
+ static void dump_partition(raw_ostream&, const USet&,
+ const TargetRegisterInfo&);
+ };
+ char HexagonSplitDoubleRegs::ID;
+ int HexagonSplitDoubleRegs::Counter = 0;
+ const TargetRegisterClass *const HexagonSplitDoubleRegs::DoubleRC
+ = &Hexagon::DoubleRegsRegClass;
+}
+
+INITIALIZE_PASS(HexagonSplitDoubleRegs, "hexagon-split-double",
+ "Hexagon Split Double Registers", false, false)
+
+
+static inline uint32_t getRegState(const MachineOperand &R) {
+ assert(R.isReg());
+ return getDefRegState(R.isDef()) |
+ getImplRegState(R.isImplicit()) |
+ getKillRegState(R.isKill()) |
+ getDeadRegState(R.isDead()) |
+ getUndefRegState(R.isUndef()) |
+ getInternalReadRegState(R.isInternalRead()) |
+ (R.isDebug() ? RegState::Debug : 0);
+}
+
+
+void HexagonSplitDoubleRegs::dump_partition(raw_ostream &os,
+ const USet &Part, const TargetRegisterInfo &TRI) {
+ dbgs() << '{';
+ for (auto I : Part)
+ dbgs() << ' ' << PrintReg(I, &TRI);
+ dbgs() << " }";
+}
+
+
+bool HexagonSplitDoubleRegs::isInduction(unsigned Reg, LoopRegMap &IRM) const {
+ for (auto I : IRM) {
+ const USet &Rs = I.second;
+ if (Rs.find(Reg) != Rs.end())
+ return true;
+ }
+ return false;
+}
+
+
+bool HexagonSplitDoubleRegs::isVolatileInstr(const MachineInstr *MI) const {
+ for (auto &I : MI->memoperands())
+ if (I->isVolatile())
+ return true;
+ return false;
+}
+
+
+bool HexagonSplitDoubleRegs::isFixedInstr(const MachineInstr *MI) const {
+ if (MI->mayLoad() || MI->mayStore())
+ if (MemRefsFixed || isVolatileInstr(MI))
+ return true;
+ if (MI->isDebugValue())
+ return false;
+
+ unsigned Opc = MI->getOpcode();
+ switch (Opc) {
+ default:
+ return true;
+
+ case TargetOpcode::PHI:
+ case TargetOpcode::COPY:
+ break;
+
+ case Hexagon::L2_loadrd_io:
+ // Not handling stack stores (only reg-based addresses).
+ if (MI->getOperand(1).isReg())
+ break;
+ return true;
+ case Hexagon::S2_storerd_io:
+ // Not handling stack stores (only reg-based addresses).
+ if (MI->getOperand(0).isReg())
+ break;
+ return true;
+ case Hexagon::L2_loadrd_pi:
+ case Hexagon::S2_storerd_pi:
+
+ case Hexagon::A2_tfrpi:
+ case Hexagon::A2_combineii:
+ case Hexagon::A4_combineir:
+ case Hexagon::A4_combineii:
+ case Hexagon::A4_combineri:
+ case Hexagon::A2_combinew:
+ case Hexagon::CONST64_Int_Real:
+
+ case Hexagon::A2_sxtw:
+
+ case Hexagon::A2_andp:
+ case Hexagon::A2_orp:
+ case Hexagon::A2_xorp:
+ case Hexagon::S2_asl_i_p_or:
+ case Hexagon::S2_asl_i_p:
+ case Hexagon::S2_asr_i_p:
+ case Hexagon::S2_lsr_i_p:
+ break;
+ }
+
+ for (auto &Op : MI->operands()) {
+ if (!Op.isReg())
+ continue;
+ unsigned R = Op.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(R))
+ return true;
+ }
+ return false;
+}
+
+
+void HexagonSplitDoubleRegs::partitionRegisters(UUSetMap &P2Rs) {
+ typedef std::map<unsigned,unsigned> UUMap;
+ typedef std::vector<unsigned> UVect;
+
+ unsigned NumRegs = MRI->getNumVirtRegs();
+ BitVector DoubleRegs(NumRegs);
+ for (unsigned i = 0; i < NumRegs; ++i) {
+ unsigned R = TargetRegisterInfo::index2VirtReg(i);
+ if (MRI->getRegClass(R) == DoubleRC)
+ DoubleRegs.set(i);
+ }
+
+ BitVector FixedRegs(NumRegs);
+ for (int x = DoubleRegs.find_first(); x >= 0; x = DoubleRegs.find_next(x)) {
+ unsigned R = TargetRegisterInfo::index2VirtReg(x);
+ MachineInstr *DefI = MRI->getVRegDef(R);
+ // In some cases a register may exist, but never be defined or used.
+ // It should never appear anywhere, but mark it as "fixed", just to be
+ // safe.
+ if (!DefI || isFixedInstr(DefI))
+ FixedRegs.set(x);
+ }
+
+ UUSetMap AssocMap;
+ for (int x = DoubleRegs.find_first(); x >= 0; x = DoubleRegs.find_next(x)) {
+ if (FixedRegs[x])
+ continue;
+ unsigned R = TargetRegisterInfo::index2VirtReg(x);
+ DEBUG(dbgs() << PrintReg(R, TRI) << " ~~");
+ USet &Asc = AssocMap[R];
+ for (auto U = MRI->use_nodbg_begin(R), Z = MRI->use_nodbg_end();
+ U != Z; ++U) {
+ MachineOperand &Op = *U;
+ MachineInstr *UseI = Op.getParent();
+ if (isFixedInstr(UseI))
+ continue;
+ for (unsigned i = 0, n = UseI->getNumOperands(); i < n; ++i) {
+ MachineOperand &MO = UseI->getOperand(i);
+ // Skip non-registers or registers with subregisters.
+ if (&MO == &Op || !MO.isReg() || MO.getSubReg())
+ continue;
+ unsigned T = MO.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(T)) {
+ FixedRegs.set(x);
+ continue;
+ }
+ if (MRI->getRegClass(T) != DoubleRC)
+ continue;
+ unsigned u = TargetRegisterInfo::virtReg2Index(T);
+ if (FixedRegs[u])
+ continue;
+ DEBUG(dbgs() << ' ' << PrintReg(T, TRI));
+ Asc.insert(T);
+ // Make it symmetric.
+ AssocMap[T].insert(R);
+ }
+ }
+ DEBUG(dbgs() << '\n');
+ }
+
+ UUMap R2P;
+ unsigned NextP = 1;
+ USet Visited;
+ for (int x = DoubleRegs.find_first(); x >= 0; x = DoubleRegs.find_next(x)) {
+ unsigned R = TargetRegisterInfo::index2VirtReg(x);
+ if (Visited.count(R))
+ continue;
+ // Create a new partition for R.
+ unsigned ThisP = FixedRegs[x] ? 0 : NextP++;
+ UVect WorkQ;
+ WorkQ.push_back(R);
+ for (unsigned i = 0; i < WorkQ.size(); ++i) {
+ unsigned T = WorkQ[i];
+ if (Visited.count(T))
+ continue;
+ R2P[T] = ThisP;
+ Visited.insert(T);
+ // Add all registers associated with T.
+ USet &Asc = AssocMap[T];
+ for (USet::iterator J = Asc.begin(), F = Asc.end(); J != F; ++J)
+ WorkQ.push_back(*J);
+ }
+ }
+
+ for (auto I : R2P)
+ P2Rs[I.second].insert(I.first);
+}
+
+
+static inline int32_t profitImm(unsigned Lo, unsigned Hi) {
+ int32_t P = 0;
+ bool LoZ1 = false, HiZ1 = false;
+ if (Lo == 0 || Lo == 0xFFFFFFFF)
+ P += 10, LoZ1 = true;
+ if (Hi == 0 || Hi == 0xFFFFFFFF)
+ P += 10, HiZ1 = true;
+ if (!LoZ1 && !HiZ1 && Lo == Hi)
+ P += 3;
+ return P;
+}
+
+
+int32_t HexagonSplitDoubleRegs::profit(const MachineInstr *MI) const {
+ unsigned ImmX = 0;
+ unsigned Opc = MI->getOpcode();
+ switch (Opc) {
+ case TargetOpcode::PHI:
+ for (const auto &Op : MI->operands())
+ if (!Op.getSubReg())
+ return 0;
+ return 10;
+ case TargetOpcode::COPY:
+ if (MI->getOperand(1).getSubReg() != 0)
+ return 10;
+ return 0;
+
+ case Hexagon::L2_loadrd_io:
+ case Hexagon::S2_storerd_io:
+ return -1;
+ case Hexagon::L2_loadrd_pi:
+ case Hexagon::S2_storerd_pi:
+ return 2;
+
+ case Hexagon::A2_tfrpi:
+ case Hexagon::CONST64_Int_Real: {
+ uint64_t D = MI->getOperand(1).getImm();
+ unsigned Lo = D & 0xFFFFFFFFULL;
+ unsigned Hi = D >> 32;
+ return profitImm(Lo, Hi);
+ }
+ case Hexagon::A2_combineii:
+ case Hexagon::A4_combineii:
+ return profitImm(MI->getOperand(1).getImm(),
+ MI->getOperand(2).getImm());
+ case Hexagon::A4_combineri:
+ ImmX++;
+ case Hexagon::A4_combineir: {
+ ImmX++;
+ int64_t V = MI->getOperand(ImmX).getImm();
+ if (V == 0 || V == -1)
+ return 10;
+ // Fall through into A2_combinew.
+ }
+ case Hexagon::A2_combinew:
+ return 2;
+
+ case Hexagon::A2_sxtw:
+ return 3;
+
+ case Hexagon::A2_andp:
+ case Hexagon::A2_orp:
+ case Hexagon::A2_xorp:
+ return 1;
+
+ case Hexagon::S2_asl_i_p_or: {
+ unsigned S = MI->getOperand(3).getImm();
+ if (S == 0 || S == 32)
+ return 10;
+ return -1;
+ }
+ case Hexagon::S2_asl_i_p:
+ case Hexagon::S2_asr_i_p:
+ case Hexagon::S2_lsr_i_p:
+ unsigned S = MI->getOperand(2).getImm();
+ if (S == 0 || S == 32)
+ return 10;
+ if (S == 16)
+ return 5;
+ if (S == 48)
+ return 7;
+ return -10;
+ }
+
+ return 0;
+}
+
+
+bool HexagonSplitDoubleRegs::isProfitable(const USet &Part, LoopRegMap &IRM)
+ const {
+ unsigned FixedNum = 0, SplitNum = 0, LoopPhiNum = 0;
+ int32_t TotalP = 0;
+
+ for (unsigned DR : Part) {
+ MachineInstr *DefI = MRI->getVRegDef(DR);
+ int32_t P = profit(DefI);
+ if (P == INT_MIN)
+ return false;
+ TotalP += P;
+ // Reduce the profitability of splitting induction registers.
+ if (isInduction(DR, IRM))
+ TotalP -= 30;
+
+ for (auto U = MRI->use_nodbg_begin(DR), W = MRI->use_nodbg_end();
+ U != W; ++U) {
+ MachineInstr *UseI = U->getParent();
+ if (isFixedInstr(UseI)) {
+ FixedNum++;
+ // Calculate the cost of generating REG_SEQUENCE instructions.
+ for (auto &Op : UseI->operands()) {
+ if (Op.isReg() && Part.count(Op.getReg()))
+ if (Op.getSubReg())
+ TotalP -= 2;
+ }
+ continue;
+ }
+ // If a register from this partition is used in a fixed instruction,
+ // and there is also a register in this partition that is used in
+ // a loop phi node, then decrease the splitting profit as this can
+ // confuse the modulo scheduler.
+ if (UseI->isPHI()) {
+ const MachineBasicBlock *PB = UseI->getParent();
+ const MachineLoop *L = MLI->getLoopFor(PB);
+ if (L && L->getHeader() == PB)
+ LoopPhiNum++;
+ }
+ // Splittable instruction.
+ SplitNum++;
+ int32_t P = profit(UseI);
+ if (P == INT_MIN)
+ return false;
+ TotalP += P;
+ }
+ }
+
+ if (FixedNum > 0 && LoopPhiNum > 0)
+ TotalP -= 20*LoopPhiNum;
+
+ DEBUG(dbgs() << "Partition profit: " << TotalP << '\n');
+ return TotalP > 0;
+}
+
+
+void HexagonSplitDoubleRegs::collectIndRegsForLoop(const MachineLoop *L,
+ USet &Rs) {
+ const MachineBasicBlock *HB = L->getHeader();
+ const MachineBasicBlock *LB = L->getLoopLatch();
+ if (!HB || !LB)
+ return;
+
+ // Examine the latch branch. Expect it to be a conditional branch to
+ // the header (either "br-cond header" or "br-cond exit; br header").
+ MachineBasicBlock *TB = 0, *FB = 0;
+ MachineBasicBlock *TmpLB = const_cast<MachineBasicBlock*>(LB);
+ SmallVector<MachineOperand,2> Cond;
+ bool BadLB = TII->AnalyzeBranch(*TmpLB, TB, FB, Cond, false);
+ // Only analyzable conditional branches. HII::AnalyzeBranch will put
+ // the branch opcode as the first element of Cond, and the predicate
+ // operand as the second.
+ if (BadLB || Cond.size() != 2)
+ return;
+ // Only simple jump-conditional (with or without negation).
+ if (!TII->PredOpcodeHasJMP_c(Cond[0].getImm()))
+ return;
+ // Must go to the header.
+ if (TB != HB && FB != HB)
+ return;
+ assert(Cond[1].isReg() && "Unexpected Cond vector from AnalyzeBranch");
+ // Expect a predicate register.
+ unsigned PR = Cond[1].getReg();
+ assert(MRI->getRegClass(PR) == &Hexagon::PredRegsRegClass);
+
+ // Get the registers on which the loop controlling compare instruction
+ // depends.
+ unsigned CmpR1 = 0, CmpR2 = 0;
+ const MachineInstr *CmpI = MRI->getVRegDef(PR);
+ while (CmpI->getOpcode() == Hexagon::C2_not)
+ CmpI = MRI->getVRegDef(CmpI->getOperand(1).getReg());
+
+ int Mask = 0, Val = 0;
+ bool OkCI = TII->analyzeCompare(CmpI, CmpR1, CmpR2, Mask, Val);
+ if (!OkCI)
+ return;
+ // Eliminate non-double input registers.
+ if (CmpR1 && MRI->getRegClass(CmpR1) != DoubleRC)
+ CmpR1 = 0;
+ if (CmpR2 && MRI->getRegClass(CmpR2) != DoubleRC)
+ CmpR2 = 0;
+ if (!CmpR1 && !CmpR2)
+ return;
+
+ // Now examine the top of the loop: the phi nodes that could poten-
+ // tially define loop induction registers. The registers defined by
+ // such a phi node would be used in a 64-bit add, which then would
+ // be used in the loop compare instruction.
+
+ // Get the set of all double registers defined by phi nodes in the
+ // loop header.
+ typedef std::vector<unsigned> UVect;
+ UVect DP;
+ for (auto &MI : *HB) {
+ if (!MI.isPHI())
+ break;
+ const MachineOperand &MD = MI.getOperand(0);
+ unsigned R = MD.getReg();
+ if (MRI->getRegClass(R) == DoubleRC)
+ DP.push_back(R);
+ }
+ if (DP.empty())
+ return;
+
+ auto NoIndOp = [this, CmpR1, CmpR2] (unsigned R) -> bool {
+ for (auto I = MRI->use_nodbg_begin(R), E = MRI->use_nodbg_end();
+ I != E; ++I) {
+ const MachineInstr *UseI = I->getParent();
+ if (UseI->getOpcode() != Hexagon::A2_addp)
+ continue;
+ // Get the output from the add. If it is one of the inputs to the
+ // loop-controlling compare instruction, then R is likely an induc-
+ // tion register.
+ unsigned T = UseI->getOperand(0).getReg();
+ if (T == CmpR1 || T == CmpR2)
+ return false;
+ }
+ return true;
+ };
+ UVect::iterator End = std::remove_if(DP.begin(), DP.end(), NoIndOp);
+ Rs.insert(DP.begin(), End);
+ Rs.insert(CmpR1);
+ Rs.insert(CmpR2);
+
+ DEBUG({
+ dbgs() << "For loop at BB#" << HB->getNumber() << " ind regs: ";
+ dump_partition(dbgs(), Rs, *TRI);
+ dbgs() << '\n';
+ });
+}
+
+
+void HexagonSplitDoubleRegs::collectIndRegs(LoopRegMap &IRM) {
+ typedef std::vector<MachineLoop*> LoopVector;
+ LoopVector WorkQ;
+
+ for (auto I : *MLI)
+ WorkQ.push_back(I);
+ for (unsigned i = 0; i < WorkQ.size(); ++i) {
+ for (auto I : *WorkQ[i])
+ WorkQ.push_back(I);
+ }
+
+ USet Rs;
+ for (unsigned i = 0, n = WorkQ.size(); i < n; ++i) {
+ MachineLoop *L = WorkQ[i];
+ Rs.clear();
+ collectIndRegsForLoop(L, Rs);
+ if (!Rs.empty())
+ IRM.insert(std::make_pair(L, Rs));
+ }
+}
+
+
+void HexagonSplitDoubleRegs::createHalfInstr(unsigned Opc, MachineInstr *MI,
+ const UUPairMap &PairMap, unsigned SubR) {
+ MachineBasicBlock &B = *MI->getParent();
+ DebugLoc DL = MI->getDebugLoc();
+ MachineInstr *NewI = BuildMI(B, MI, DL, TII->get(Opc));
+
+ for (auto &Op : MI->operands()) {
+ if (!Op.isReg()) {
+ NewI->addOperand(Op);
+ continue;
+ }
+ // For register operands, set the subregister.
+ unsigned R = Op.getReg();
+ unsigned SR = Op.getSubReg();
+ bool isVirtReg = TargetRegisterInfo::isVirtualRegister(R);
+ bool isKill = Op.isKill();
+ if (isVirtReg && MRI->getRegClass(R) == DoubleRC) {
+ isKill = false;
+ UUPairMap::const_iterator F = PairMap.find(R);
+ if (F == PairMap.end()) {
+ SR = SubR;
+ } else {
+ const UUPair &P = F->second;
+ R = (SubR == Hexagon::subreg_loreg) ? P.first : P.second;
+ SR = 0;
+ }
+ }
+ auto CO = MachineOperand::CreateReg(R, Op.isDef(), Op.isImplicit(), isKill,
+ Op.isDead(), Op.isUndef(), Op.isEarlyClobber(), SR, Op.isDebug(),
+ Op.isInternalRead());
+ NewI->addOperand(CO);
+ }
+}
+
+
+void HexagonSplitDoubleRegs::splitMemRef(MachineInstr *MI,
+ const UUPairMap &PairMap) {
+ bool Load = MI->mayLoad();
+ unsigned OrigOpc = MI->getOpcode();
+ bool PostInc = (OrigOpc == Hexagon::L2_loadrd_pi ||
+ OrigOpc == Hexagon::S2_storerd_pi);
+ MachineInstr *LowI, *HighI;
+ MachineBasicBlock &B = *MI->getParent();
+ DebugLoc DL = MI->getDebugLoc();
+
+ // Index of the base-address-register operand.
+ unsigned AdrX = PostInc ? (Load ? 2 : 1)
+ : (Load ? 1 : 0);
+ MachineOperand &AdrOp = MI->getOperand(AdrX);
+ unsigned RSA = getRegState(AdrOp);
+ MachineOperand &ValOp = Load ? MI->getOperand(0)
+ : (PostInc ? MI->getOperand(3)
+ : MI->getOperand(2));
+ UUPairMap::const_iterator F = PairMap.find(ValOp.getReg());
+ assert(F != PairMap.end());
+
+ if (Load) {
+ const UUPair &P = F->second;
+ int64_t Off = PostInc ? 0 : MI->getOperand(2).getImm();
+ LowI = BuildMI(B, MI, DL, TII->get(Hexagon::L2_loadri_io), P.first)
+ .addReg(AdrOp.getReg(), RSA & ~RegState::Kill, AdrOp.getSubReg())
+ .addImm(Off);
+ HighI = BuildMI(B, MI, DL, TII->get(Hexagon::L2_loadri_io), P.second)
+ .addReg(AdrOp.getReg(), RSA & ~RegState::Kill, AdrOp.getSubReg())
+ .addImm(Off+4);
+ } else {
+ const UUPair &P = F->second;
+ int64_t Off = PostInc ? 0 : MI->getOperand(1).getImm();
+ LowI = BuildMI(B, MI, DL, TII->get(Hexagon::S2_storeri_io))
+ .addReg(AdrOp.getReg(), RSA & ~RegState::Kill, AdrOp.getSubReg())
+ .addImm(Off)
+ .addReg(P.first);
+ HighI = BuildMI(B, MI, DL, TII->get(Hexagon::S2_storeri_io))
+ .addReg(AdrOp.getReg(), RSA & ~RegState::Kill, AdrOp.getSubReg())
+ .addImm(Off+4)
+ .addReg(P.second);
+ }
+
+ if (PostInc) {
+ // Create the increment of the address register.
+ int64_t Inc = Load ? MI->getOperand(3).getImm()
+ : MI->getOperand(2).getImm();
+ MachineOperand &UpdOp = Load ? MI->getOperand(1) : MI->getOperand(0);
+ const TargetRegisterClass *RC = MRI->getRegClass(UpdOp.getReg());
+ unsigned NewR = MRI->createVirtualRegister(RC);
+ assert(!UpdOp.getSubReg() && "Def operand with subreg");
+ BuildMI(B, MI, DL, TII->get(Hexagon::A2_addi), NewR)
+ .addReg(AdrOp.getReg(), RSA)
+ .addImm(Inc);
+ MRI->replaceRegWith(UpdOp.getReg(), NewR);
+ // The original instruction will be deleted later.
+ }
+
+ // Generate a new pair of memory-operands.
+ MachineFunction &MF = *B.getParent();
+ for (auto &MO : MI->memoperands()) {
+ const MachinePointerInfo &Ptr = MO->getPointerInfo();
+ unsigned F = MO->getFlags();
+ int A = MO->getAlignment();
+
+ auto *Tmp1 = MF.getMachineMemOperand(Ptr, F, 4/*size*/, A);
+ LowI->addMemOperand(MF, Tmp1);
+ auto *Tmp2 = MF.getMachineMemOperand(Ptr, F, 4/*size*/, std::min(A, 4));
+ HighI->addMemOperand(MF, Tmp2);
+ }
+}
+
+
+void HexagonSplitDoubleRegs::splitImmediate(MachineInstr *MI,
+ const UUPairMap &PairMap) {
+ MachineOperand &Op0 = MI->getOperand(0);
+ MachineOperand &Op1 = MI->getOperand(1);
+ assert(Op0.isReg() && Op1.isImm());
+ uint64_t V = Op1.getImm();
+
+ MachineBasicBlock &B = *MI->getParent();
+ DebugLoc DL = MI->getDebugLoc();
+ UUPairMap::const_iterator F = PairMap.find(Op0.getReg());
+ assert(F != PairMap.end());
+ const UUPair &P = F->second;
+
+ // The operand to A2_tfrsi can only have 32 significant bits. Immediate
+ // values in MachineOperand are stored as 64-bit integers, and so the
+ // value -1 may be represented either as 64-bit -1, or 4294967295. Both
+ // will have the 32 higher bits truncated in the end, but -1 will remain
+ // as -1, while the latter may appear to be a large unsigned value
+ // requiring a constant extender. The casting to int32_t will select the
+ // former representation. (The same reasoning applies to all 32-bit
+ // values.)
+ BuildMI(B, MI, DL, TII->get(Hexagon::A2_tfrsi), P.first)
+ .addImm(int32_t(V & 0xFFFFFFFFULL));
+ BuildMI(B, MI, DL, TII->get(Hexagon::A2_tfrsi), P.second)
+ .addImm(int32_t(V >> 32));
+}
+
+
+void HexagonSplitDoubleRegs::splitCombine(MachineInstr *MI,
+ const UUPairMap &PairMap) {
+ MachineOperand &Op0 = MI->getOperand(0);
+ MachineOperand &Op1 = MI->getOperand(1);
+ MachineOperand &Op2 = MI->getOperand(2);
+ assert(Op0.isReg());
+
+ MachineBasicBlock &B = *MI->getParent();
+ DebugLoc DL = MI->getDebugLoc();
+ UUPairMap::const_iterator F = PairMap.find(Op0.getReg());
+ assert(F != PairMap.end());
+ const UUPair &P = F->second;
+
+ if (Op1.isImm()) {
+ BuildMI(B, MI, DL, TII->get(Hexagon::A2_tfrsi), P.second)
+ .addImm(Op1.getImm());
+ } else if (Op1.isReg()) {
+ BuildMI(B, MI, DL, TII->get(TargetOpcode::COPY), P.second)
+ .addReg(Op1.getReg(), getRegState(Op1), Op1.getSubReg());
+ } else
+ llvm_unreachable("Unexpected operand");
+
+ if (Op2.isImm()) {
+ BuildMI(B, MI, DL, TII->get(Hexagon::A2_tfrsi), P.first)
+ .addImm(Op2.getImm());
+ } else if (Op2.isReg()) {
+ BuildMI(B, MI, DL, TII->get(TargetOpcode::COPY), P.first)
+ .addReg(Op2.getReg(), getRegState(Op2), Op2.getSubReg());
+ } else
+ llvm_unreachable("Unexpected operand");
+}
+
+
+void HexagonSplitDoubleRegs::splitExt(MachineInstr *MI,
+ const UUPairMap &PairMap) {
+ MachineOperand &Op0 = MI->getOperand(0);
+ MachineOperand &Op1 = MI->getOperand(1);
+ assert(Op0.isReg() && Op1.isReg());
+
+ MachineBasicBlock &B = *MI->getParent();
+ DebugLoc DL = MI->getDebugLoc();
+ UUPairMap::const_iterator F = PairMap.find(Op0.getReg());
+ assert(F != PairMap.end());
+ const UUPair &P = F->second;
+ unsigned RS = getRegState(Op1);
+
+ BuildMI(B, MI, DL, TII->get(TargetOpcode::COPY), P.first)
+ .addReg(Op1.getReg(), RS & ~RegState::Kill, Op1.getSubReg());
+ BuildMI(B, MI, DL, TII->get(Hexagon::S2_asr_i_r), P.second)
+ .addReg(Op1.getReg(), RS, Op1.getSubReg())
+ .addImm(31);
+}
+
+
+void HexagonSplitDoubleRegs::splitShift(MachineInstr *MI,
+ const UUPairMap &PairMap) {
+ MachineOperand &Op0 = MI->getOperand(0);
+ MachineOperand &Op1 = MI->getOperand(1);
+ MachineOperand &Op2 = MI->getOperand(2);
+ assert(Op0.isReg() && Op1.isReg() && Op2.isImm());
+ int64_t Sh64 = Op2.getImm();
+ assert(Sh64 >= 0 && Sh64 < 64);
+ unsigned S = Sh64;
+
+ UUPairMap::const_iterator F = PairMap.find(Op0.getReg());
+ assert(F != PairMap.end());
+ const UUPair &P = F->second;
+ unsigned LoR = P.first;
+ unsigned HiR = P.second;
+ using namespace Hexagon;
+
+ unsigned Opc = MI->getOpcode();
+ bool Right = (Opc == S2_lsr_i_p || Opc == S2_asr_i_p);
+ bool Left = !Right;
+ bool Signed = (Opc == S2_asr_i_p);
+
+ MachineBasicBlock &B = *MI->getParent();
+ DebugLoc DL = MI->getDebugLoc();
+ unsigned RS = getRegState(Op1);
+ unsigned ShiftOpc = Left ? S2_asl_i_r
+ : (Signed ? S2_asr_i_r : S2_lsr_i_r);
+ unsigned LoSR = subreg_loreg;
+ unsigned HiSR = subreg_hireg;
+
+ if (S == 0) {
+ // No shift, subregister copy.
+ BuildMI(B, MI, DL, TII->get(TargetOpcode::COPY), LoR)
+ .addReg(Op1.getReg(), RS & ~RegState::Kill, LoSR);
+ BuildMI(B, MI, DL, TII->get(TargetOpcode::COPY), HiR)
+ .addReg(Op1.getReg(), RS, HiSR);
+ } else if (S < 32) {
+ const TargetRegisterClass *IntRC = &IntRegsRegClass;
+ unsigned TmpR = MRI->createVirtualRegister(IntRC);
+ // Expansion:
+ // Shift left: DR = shl R, #s
+ // LoR = shl R.lo, #s
+ // TmpR = extractu R.lo, #s, #32-s
+ // HiR = or (TmpR, asl(R.hi, #s))
+ // Shift right: DR = shr R, #s
+ // HiR = shr R.hi, #s
+ // TmpR = shr R.lo, #s
+ // LoR = insert TmpR, R.hi, #s, #32-s
+
+ // Shift left:
+ // LoR = shl R.lo, #s
+ // Shift right:
+ // TmpR = shr R.lo, #s
+
+ // Make a special case for A2_aslh and A2_asrh (they are predicable as
+ // opposed to S2_asl_i_r/S2_asr_i_r).
+ if (S == 16 && Left)
+ BuildMI(B, MI, DL, TII->get(A2_aslh), LoR)
+ .addReg(Op1.getReg(), RS & ~RegState::Kill, LoSR);
+ else if (S == 16 && Signed)
+ BuildMI(B, MI, DL, TII->get(A2_asrh), TmpR)
+ .addReg(Op1.getReg(), RS & ~RegState::Kill, LoSR);
+ else
+ BuildMI(B, MI, DL, TII->get(ShiftOpc), (Left ? LoR : TmpR))
+ .addReg(Op1.getReg(), RS & ~RegState::Kill, LoSR)
+ .addImm(S);
+
+ if (Left) {
+ // TmpR = extractu R.lo, #s, #32-s
+ BuildMI(B, MI, DL, TII->get(S2_extractu), TmpR)
+ .addReg(Op1.getReg(), RS & ~RegState::Kill, LoSR)
+ .addImm(S)
+ .addImm(32-S);
+ // HiR = or (TmpR, asl(R.hi, #s))
+ BuildMI(B, MI, DL, TII->get(S2_asl_i_r_or), HiR)
+ .addReg(TmpR)
+ .addReg(Op1.getReg(), RS, HiSR)
+ .addImm(S);
+ } else {
+ // HiR = shr R.hi, #s
+ BuildMI(B, MI, DL, TII->get(ShiftOpc), HiR)
+ .addReg(Op1.getReg(), RS & ~RegState::Kill, HiSR)
+ .addImm(S);
+ // LoR = insert TmpR, R.hi, #s, #32-s
+ BuildMI(B, MI, DL, TII->get(S2_insert), LoR)
+ .addReg(TmpR)
+ .addReg(Op1.getReg(), RS, HiSR)
+ .addImm(S)
+ .addImm(32-S);
+ }
+ } else if (S == 32) {
+ BuildMI(B, MI, DL, TII->get(TargetOpcode::COPY), (Left ? HiR : LoR))
+ .addReg(Op1.getReg(), RS & ~RegState::Kill, (Left ? LoSR : HiSR));
+ if (!Signed)
+ BuildMI(B, MI, DL, TII->get(A2_tfrsi), (Left ? LoR : HiR))
+ .addImm(0);
+ else // Must be right shift.
+ BuildMI(B, MI, DL, TII->get(S2_asr_i_r), HiR)
+ .addReg(Op1.getReg(), RS, HiSR)
+ .addImm(31);
+ } else if (S < 64) {
+ S -= 32;
+ if (S == 16 && Left)
+ BuildMI(B, MI, DL, TII->get(A2_aslh), HiR)
+ .addReg(Op1.getReg(), RS & ~RegState::Kill, LoSR);
+ else if (S == 16 && Signed)
+ BuildMI(B, MI, DL, TII->get(A2_asrh), LoR)
+ .addReg(Op1.getReg(), RS & ~RegState::Kill, HiSR);
+ else
+ BuildMI(B, MI, DL, TII->get(ShiftOpc), (Left ? HiR : LoR))
+ .addReg(Op1.getReg(), RS & ~RegState::Kill, (Left ? LoSR : HiSR))
+ .addImm(S);
+
+ if (Signed)
+ BuildMI(B, MI, DL, TII->get(S2_asr_i_r), HiR)
+ .addReg(Op1.getReg(), RS, HiSR)
+ .addImm(31);
+ else
+ BuildMI(B, MI, DL, TII->get(A2_tfrsi), (Left ? LoR : HiR))
+ .addImm(0);
+ }
+}
+
+
+void HexagonSplitDoubleRegs::splitAslOr(MachineInstr *MI,
+ const UUPairMap &PairMap) {
+ MachineOperand &Op0 = MI->getOperand(0);
+ MachineOperand &Op1 = MI->getOperand(1);
+ MachineOperand &Op2 = MI->getOperand(2);
+ MachineOperand &Op3 = MI->getOperand(3);
+ assert(Op0.isReg() && Op1.isReg() && Op2.isReg() && Op3.isImm());
+ int64_t Sh64 = Op3.getImm();
+ assert(Sh64 >= 0 && Sh64 < 64);
+ unsigned S = Sh64;
+
+ UUPairMap::const_iterator F = PairMap.find(Op0.getReg());
+ assert(F != PairMap.end());
+ const UUPair &P = F->second;
+ unsigned LoR = P.first;
+ unsigned HiR = P.second;
+ using namespace Hexagon;
+
+ MachineBasicBlock &B = *MI->getParent();
+ DebugLoc DL = MI->getDebugLoc();
+ unsigned RS1 = getRegState(Op1);
+ unsigned RS2 = getRegState(Op2);
+ const TargetRegisterClass *IntRC = &IntRegsRegClass;
+
+ unsigned LoSR = subreg_loreg;
+ unsigned HiSR = subreg_hireg;
+
+ // Op0 = S2_asl_i_p_or Op1, Op2, Op3
+ // means: Op0 = or (Op1, asl(Op2, Op3))
+
+ // Expansion of
+ // DR = or (R1, asl(R2, #s))
+ //
+ // LoR = or (R1.lo, asl(R2.lo, #s))
+ // Tmp1 = extractu R2.lo, #s, #32-s
+ // Tmp2 = or R1.hi, Tmp1
+ // HiR = or (Tmp2, asl(R2.hi, #s))
+
+ if (S == 0) {
+ // DR = or (R1, asl(R2, #0))
+ // -> or (R1, R2)
+ // i.e. LoR = or R1.lo, R2.lo
+ // HiR = or R1.hi, R2.hi
+ BuildMI(B, MI, DL, TII->get(A2_or), LoR)
+ .addReg(Op1.getReg(), RS1 & ~RegState::Kill, LoSR)
+ .addReg(Op2.getReg(), RS2 & ~RegState::Kill, LoSR);
+ BuildMI(B, MI, DL, TII->get(A2_or), HiR)
+ .addReg(Op1.getReg(), RS1, HiSR)
+ .addReg(Op2.getReg(), RS2, HiSR);
+ } else if (S < 32) {
+ BuildMI(B, MI, DL, TII->get(S2_asl_i_r_or), LoR)
+ .addReg(Op1.getReg(), RS1 & ~RegState::Kill, LoSR)
+ .addReg(Op2.getReg(), RS2 & ~RegState::Kill, LoSR)
+ .addImm(S);
+ unsigned TmpR1 = MRI->createVirtualRegister(IntRC);
+ BuildMI(B, MI, DL, TII->get(S2_extractu), TmpR1)
+ .addReg(Op2.getReg(), RS2 & ~RegState::Kill, LoSR)
+ .addImm(S)
+ .addImm(32-S);
+ unsigned TmpR2 = MRI->createVirtualRegister(IntRC);
+ BuildMI(B, MI, DL, TII->get(A2_or), TmpR2)
+ .addReg(Op1.getReg(), RS1, HiSR)
+ .addReg(TmpR1);
+ BuildMI(B, MI, DL, TII->get(S2_asl_i_r_or), HiR)
+ .addReg(TmpR2)
+ .addReg(Op2.getReg(), RS2, HiSR)
+ .addImm(S);
+ } else if (S == 32) {
+ // DR = or (R1, asl(R2, #32))
+ // -> or R1, R2.lo
+ // LoR = R1.lo
+ // HiR = or R1.hi, R2.lo
+ BuildMI(B, MI, DL, TII->get(TargetOpcode::COPY), LoR)
+ .addReg(Op1.getReg(), RS1 & ~RegState::Kill, LoSR);
+ BuildMI(B, MI, DL, TII->get(A2_or), HiR)
+ .addReg(Op1.getReg(), RS1, HiSR)
+ .addReg(Op2.getReg(), RS2, LoSR);
+ } else if (S < 64) {
+ // DR = or (R1, asl(R2, #s))
+ //
+ // LoR = R1:lo
+ // HiR = or (R1:hi, asl(R2:lo, #s-32))
+ S -= 32;
+ BuildMI(B, MI, DL, TII->get(TargetOpcode::COPY), LoR)
+ .addReg(Op1.getReg(), RS1 & ~RegState::Kill, LoSR);
+ BuildMI(B, MI, DL, TII->get(S2_asl_i_r_or), HiR)
+ .addReg(Op1.getReg(), RS1, HiSR)
+ .addReg(Op2.getReg(), RS2, LoSR)
+ .addImm(S);
+ }
+}
+
+
+bool HexagonSplitDoubleRegs::splitInstr(MachineInstr *MI,
+ const UUPairMap &PairMap) {
+ DEBUG(dbgs() << "Splitting: " << *MI);
+ bool Split = false;
+ unsigned Opc = MI->getOpcode();
+ using namespace Hexagon;
+
+ switch (Opc) {
+ case TargetOpcode::PHI:
+ case TargetOpcode::COPY: {
+ unsigned DstR = MI->getOperand(0).getReg();
+ if (MRI->getRegClass(DstR) == DoubleRC) {
+ createHalfInstr(Opc, MI, PairMap, subreg_loreg);
+ createHalfInstr(Opc, MI, PairMap, subreg_hireg);
+ Split = true;
+ }
+ break;
+ }
+ case A2_andp:
+ createHalfInstr(A2_and, MI, PairMap, subreg_loreg);
+ createHalfInstr(A2_and, MI, PairMap, subreg_hireg);
+ Split = true;
+ break;
+ case A2_orp:
+ createHalfInstr(A2_or, MI, PairMap, subreg_loreg);
+ createHalfInstr(A2_or, MI, PairMap, subreg_hireg);
+ Split = true;
+ break;
+ case A2_xorp:
+ createHalfInstr(A2_xor, MI, PairMap, subreg_loreg);
+ createHalfInstr(A2_xor, MI, PairMap, subreg_hireg);
+ Split = true;
+ break;
+
+ case L2_loadrd_io:
+ case L2_loadrd_pi:
+ case S2_storerd_io:
+ case S2_storerd_pi:
+ splitMemRef(MI, PairMap);
+ Split = true;
+ break;
+
+ case A2_tfrpi:
+ case CONST64_Int_Real:
+ splitImmediate(MI, PairMap);
+ Split = true;
+ break;
+
+ case A2_combineii:
+ case A4_combineir:
+ case A4_combineii:
+ case A4_combineri:
+ case A2_combinew:
+ splitCombine(MI, PairMap);
+ Split = true;
+ break;
+
+ case A2_sxtw:
+ splitExt(MI, PairMap);
+ Split = true;
+ break;
+
+ case S2_asl_i_p:
+ case S2_asr_i_p:
+ case S2_lsr_i_p:
+ splitShift(MI, PairMap);
+ Split = true;
+ break;
+
+ case S2_asl_i_p_or:
+ splitAslOr(MI, PairMap);
+ Split = true;
+ break;
+
+ default:
+ llvm_unreachable("Instruction not splitable");
+ return false;
+ }
+
+ return Split;
+}
+
+
+void HexagonSplitDoubleRegs::replaceSubregUses(MachineInstr *MI,
+ const UUPairMap &PairMap) {
+ for (auto &Op : MI->operands()) {
+ if (!Op.isReg() || !Op.isUse() || !Op.getSubReg())
+ continue;
+ unsigned R = Op.getReg();
+ UUPairMap::const_iterator F = PairMap.find(R);
+ if (F == PairMap.end())
+ continue;
+ const UUPair &P = F->second;
+ switch (Op.getSubReg()) {
+ case Hexagon::subreg_loreg:
+ Op.setReg(P.first);
+ break;
+ case Hexagon::subreg_hireg:
+ Op.setReg(P.second);
+ break;
+ }
+ Op.setSubReg(0);
+ }
+}
+
+
+void HexagonSplitDoubleRegs::collapseRegPairs(MachineInstr *MI,
+ const UUPairMap &PairMap) {
+ MachineBasicBlock &B = *MI->getParent();
+ DebugLoc DL = MI->getDebugLoc();
+
+ for (auto &Op : MI->operands()) {
+ if (!Op.isReg() || !Op.isUse())
+ continue;
+ unsigned R = Op.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(R))
+ continue;
+ if (MRI->getRegClass(R) != DoubleRC || Op.getSubReg())
+ continue;
+ UUPairMap::const_iterator F = PairMap.find(R);
+ if (F == PairMap.end())
+ continue;
+ const UUPair &Pr = F->second;
+ unsigned NewDR = MRI->createVirtualRegister(DoubleRC);
+ BuildMI(B, MI, DL, TII->get(TargetOpcode::REG_SEQUENCE), NewDR)
+ .addReg(Pr.first)
+ .addImm(Hexagon::subreg_loreg)
+ .addReg(Pr.second)
+ .addImm(Hexagon::subreg_hireg);
+ Op.setReg(NewDR);
+ }
+}
+
+
+bool HexagonSplitDoubleRegs::splitPartition(const USet &Part) {
+ const TargetRegisterClass *IntRC = &Hexagon::IntRegsRegClass;
+ typedef std::set<MachineInstr*> MISet;
+ bool Changed = false;
+
+ DEBUG(dbgs() << "Splitting partition: "; dump_partition(dbgs(), Part, *TRI);
+ dbgs() << '\n');
+
+ UUPairMap PairMap;
+
+ MISet SplitIns;
+ for (unsigned DR : Part) {
+ MachineInstr *DefI = MRI->getVRegDef(DR);
+ SplitIns.insert(DefI);
+
+ // Collect all instructions, including fixed ones. We won't split them,
+ // but we need to visit them again to insert the REG_SEQUENCE instructions.
+ for (auto U = MRI->use_nodbg_begin(DR), W = MRI->use_nodbg_end();
+ U != W; ++U)
+ SplitIns.insert(U->getParent());
+
+ unsigned LoR = MRI->createVirtualRegister(IntRC);
+ unsigned HiR = MRI->createVirtualRegister(IntRC);
+ DEBUG(dbgs() << "Created mapping: " << PrintReg(DR, TRI) << " -> "
+ << PrintReg(HiR, TRI) << ':' << PrintReg(LoR, TRI) << '\n');
+ PairMap.insert(std::make_pair(DR, UUPair(LoR, HiR)));
+ }
+
+ MISet Erase;
+ for (auto MI : SplitIns) {
+ if (isFixedInstr(MI)) {
+ collapseRegPairs(MI, PairMap);
+ } else {
+ bool Done = splitInstr(MI, PairMap);
+ if (Done)
+ Erase.insert(MI);
+ Changed |= Done;
+ }
+ }
+
+ for (unsigned DR : Part) {
+ // Before erasing "double" instructions, revisit all uses of the double
+ // registers in this partition, and replace all uses of them with subre-
+ // gisters, with the corresponding single registers.
+ MISet Uses;
+ for (auto U = MRI->use_nodbg_begin(DR), W = MRI->use_nodbg_end();
+ U != W; ++U)
+ Uses.insert(U->getParent());
+ for (auto M : Uses)
+ replaceSubregUses(M, PairMap);
+ }
+
+ for (auto MI : Erase) {
+ MachineBasicBlock *B = MI->getParent();
+ B->erase(MI);
+ }
+
+ return Changed;
+}
+
+
+bool HexagonSplitDoubleRegs::runOnMachineFunction(MachineFunction &MF) {
+ DEBUG(dbgs() << "Splitting double registers in function: "
+ << MF.getName() << '\n');
+
+ auto &ST = MF.getSubtarget<HexagonSubtarget>();
+ TRI = ST.getRegisterInfo();
+ TII = ST.getInstrInfo();
+ MRI = &MF.getRegInfo();
+ MLI = &getAnalysis<MachineLoopInfo>();
+
+ UUSetMap P2Rs;
+ LoopRegMap IRM;
+
+ collectIndRegs(IRM);
+ partitionRegisters(P2Rs);
+
+ DEBUG({
+ dbgs() << "Register partitioning: (partition #0 is fixed)\n";
+ for (UUSetMap::iterator I = P2Rs.begin(), E = P2Rs.end(); I != E; ++I) {
+ dbgs() << '#' << I->first << " -> ";
+ dump_partition(dbgs(), I->second, *TRI);
+ dbgs() << '\n';
+ }
+ });
+
+ bool Changed = false;
+ int Limit = MaxHSDR;
+
+ for (UUSetMap::iterator I = P2Rs.begin(), E = P2Rs.end(); I != E; ++I) {
+ if (I->first == 0)
+ continue;
+ if (Limit >= 0 && Counter >= Limit)
+ break;
+ USet &Part = I->second;
+ DEBUG(dbgs() << "Calculating profit for partition #" << I->first << '\n');
+ if (!isProfitable(Part, IRM))
+ continue;
+ Counter++;
+ Changed |= splitPartition(Part);
+ }
+
+ return Changed;
+}
+
+FunctionPass *llvm::createHexagonSplitDoubleRegs() {
+ return new HexagonSplitDoubleRegs();
+}
-; RUN: llc -march=hexagon < %s | FileCheck %s
+; RUN: llc -march=hexagon -O0 < %s | FileCheck %s
; CHECK-LABEL: @test00
-; CHECK: p0 = cmp.eq(r1:0, r3:2)
+; CHECK: = cmp.eq(r1:0, r3:2)
define i32 @test00(i64 %Rs, i64 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.C2.cmpeqp(i64 %Rs, i64 %Rt)
}
; CHECK-LABEL: @test01
-; CHECK: p0 = cmp.gt(r1:0, r3:2)
+; CHECK: = cmp.gt(r1:0, r3:2)
define i32 @test01(i64 %Rs, i64 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.C2.cmpgtp(i64 %Rs, i64 %Rt)
}
; CHECK-LABEL: @test02
-; CHECK: p0 = cmp.gtu(r1:0, r3:2)
+; CHECK: = cmp.gtu(r1:0, r3:2)
define i32 @test02(i64 %Rs, i64 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.C2.cmpgtup(i64 %Rs, i64 %Rt)
}
; CHECK-LABEL: @test10
-; CHECK: r0 = cmp.eq(r0, r1)
+; CHECK: = cmp.eq(r0, r1)
define i32 @test10(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A4.rcmpeq(i32 %Rs, i32 %Rt)
}
; CHECK-LABEL: @test11
-; CHECK: r0 = !cmp.eq(r0, r1)
+; CHECK: = !cmp.eq(r0, r1)
define i32 @test11(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A4.rcmpneq(i32 %Rs, i32 %Rt)
}
; CHECK-LABEL: @test12
-; CHECK: r0 = cmp.eq(r0, #23)
+; CHECK: = cmp.eq(r0, #23)
define i32 @test12(i32 %Rs) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A4.rcmpeqi(i32 %Rs, i32 23)
}
; CHECK-LABEL: @test13
-; CHECK: r0 = !cmp.eq(r0, #47)
+; CHECK: = !cmp.eq(r0, #47)
define i32 @test13(i32 %Rs) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A4.rcmpneqi(i32 %Rs, i32 47)
}
; CHECK-LABEL: @test20
-; CHECK: p0 = cmpb.eq(r0, r1)
+; CHECK: = cmpb.eq(r0, r1)
define i32 @test20(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A4.cmpbeq(i32 %Rs, i32 %Rt)
}
; CHECK-LABEL: @test21
-; CHECK: p0 = cmpb.gt(r0, r1)
+; CHECK: = cmpb.gt(r0, r1)
define i32 @test21(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A4.cmpbgt(i32 %Rs, i32 %Rt)
}
; CHECK-LABEL: @test22
-; CHECK: p0 = cmpb.gtu(r0, r1)
+; CHECK: = cmpb.gtu(r0, r1)
define i32 @test22(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A4.cmpbgtu(i32 %Rs, i32 %Rt)
}
; CHECK-LABEL: @test23
-; CHECK: p0 = cmpb.eq(r0, #56)
+; CHECK: = cmpb.eq(r0, #56)
define i32 @test23(i32 %Rs) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A4.cmpbeqi(i32 %Rs, i32 56)
}
; CHECK-LABEL: @test24
-; CHECK: p0 = cmpb.gt(r0, #29)
+; CHECK: = cmpb.gt(r0, #29)
define i32 @test24(i32 %Rs) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A4.cmpbgti(i32 %Rs, i32 29)
}
; CHECK-LABEL: @test25
-; CHECK: p0 = cmpb.gtu(r0, #111)
+; CHECK: = cmpb.gtu(r0, #111)
define i32 @test25(i32 %Rs) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A4.cmpbgtui(i32 %Rs, i32 111)
}
; CHECK-LABEL: @test30
-; CHECK: p0 = cmph.eq(r0, r1)
+; CHECK: = cmph.eq(r0, r1)
define i32 @test30(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A4.cmpheq(i32 %Rs, i32 %Rt)
}
; CHECK-LABEL: @test31
-; CHECK: p0 = cmph.gt(r0, r1)
+; CHECK: = cmph.gt(r0, r1)
define i32 @test31(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A4.cmphgt(i32 %Rs, i32 %Rt)
}
; CHECK-LABEL: @test32
-; CHECK: p0 = cmph.gtu(r0, r1)
+; CHECK: = cmph.gtu(r0, r1)
define i32 @test32(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A4.cmphgtu(i32 %Rs, i32 %Rt)
}
; CHECK-LABEL: @test33
-; CHECK: p0 = cmph.eq(r0, #-123)
+; CHECK: = cmph.eq(r0, #-123)
define i32 @test33(i32 %Rs) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A4.cmpheqi(i32 %Rs, i32 -123)
}
; CHECK-LABEL: @test34
-; CHECK: p0 = cmph.gt(r0, #-3)
+; CHECK: = cmph.gt(r0, #-3)
define i32 @test34(i32 %Rs) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A4.cmphgti(i32 %Rs, i32 -3)
}
; CHECK-LABEL: @test35
-; CHECK: p0 = cmph.gtu(r0, #13)
+; CHECK: = cmph.gtu(r0, #13)
define i32 @test35(i32 %Rs) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A4.cmphgtui(i32 %Rs, i32 13)
}
; CHECK-LABEL: @test40
-; CHECK: r1:0 = vmux(p0, r3:2, r5:4)
+; CHECK: = vmux(p0, r3:2, r5:4)
define i64 @test40(i32 %Pu, i64 %Rs, i64 %Rt) #0 {
entry:
%0 = tail call i64 @llvm.hexagon.C2.vmux(i32 %Pu, i64 %Rs, i64 %Rt)
}
; CHECK-LABEL: @test41
-; CHECK: p0 = any8(vcmpb.eq(r1:0, r3:2))
+; CHECK: = any8(vcmpb.eq(r1:0, r3:2))
define i32 @test41(i64 %Rs, i64 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A4.vcmpbeq.any(i64 %Rs, i64 %Rt)
}
; CHECK-LABEL: @test50
-; CHECK: r1:0 = add(r1:0, r3:2)
+; CHECK: = add(r1:0, r3:2)
define i64 @test50(i64 %Rs, i64 %Rt) #0 {
entry:
%0 = tail call i64 @llvm.hexagon.A2.addp(i64 %Rs, i64 %Rt)
}
; CHECK-LABEL: @test51
-; CHECK: r1:0 = add(r1:0, r3:2):sat
+; CHECK: = add(r1:0, r3:2):sat
define i64 @test51(i64 %Rs, i64 %Rt) #0 {
entry:
%0 = tail call i64 @llvm.hexagon.A2.addpsat(i64 %Rs, i64 %Rt)
}
; CHECK-LABEL: @test52
-; CHECK: r1:0 = sub(r1:0, r3:2)
+; CHECK: = sub(r1:0, r3:2)
define i64 @test52(i64 %Rs, i64 %Rt) #0 {
entry:
%0 = tail call i64 @llvm.hexagon.A2.subp(i64 %Rs, i64 %Rt)
}
; CHECK-LABEL: @test53
-; CHECK: r1:0 = add(r0, r3:2)
+; CHECK: = add(r0, r3:2)
define i64 @test53(i32 %Rs, i64 %Rt) #0 {
entry:
%0 = tail call i64 @llvm.hexagon.A2.addsp(i32 %Rs, i64 %Rt)
}
; CHECK-LABEL: @test54
-; CHECK: r1:0 = and(r1:0, r3:2)
+; CHECK: = and(r1:0, r3:2)
define i64 @test54(i64 %Rs, i64 %Rt) #0 {
entry:
%0 = tail call i64 @llvm.hexagon.A2.andp(i64 %Rs, i64 %Rt)
}
; CHECK-LABEL: @test55
-; CHECK: r1:0 = or(r1:0, r3:2)
+; CHECK: = or(r1:0, r3:2)
define i64 @test55(i64 %Rs, i64 %Rt) #0 {
entry:
%0 = tail call i64 @llvm.hexagon.A2.orp(i64 %Rs, i64 %Rt)
}
; CHECK-LABEL: @test56
-; CHECK: r1:0 = xor(r1:0, r3:2)
+; CHECK: = xor(r1:0, r3:2)
define i64 @test56(i64 %Rs, i64 %Rt) #0 {
entry:
%0 = tail call i64 @llvm.hexagon.A2.xorp(i64 %Rs, i64 %Rt)
}
; CHECK-LABEL: @test57
-; CHECK: r1:0 = and(r1:0, ~r3:2)
+; CHECK: = and(r1:0, ~r3:2)
define i64 @test57(i64 %Rs, i64 %Rt) #0 {
entry:
%0 = tail call i64 @llvm.hexagon.A4.andnp(i64 %Rs, i64 %Rt)
}
; CHECK-LABEL: @test58
-; CHECK: r1:0 = or(r1:0, ~r3:2)
+; CHECK: = or(r1:0, ~r3:2)
define i64 @test58(i64 %Rs, i64 %Rt) #0 {
entry:
%0 = tail call i64 @llvm.hexagon.A4.ornp(i64 %Rs, i64 %Rt)
}
; CHECK-LABEL: @test60
-; CHECK: r0 = add(r0.l, r1.l)
+; CHECK: = add(r0.l, r1.l)
define i32 @test60(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A2.addh.l16.ll(i32 %Rs, i32 %Rt)
}
; CHECK-LABEL: @test61
-; CHECK: r0 = add(r0.l, r1.h)
+; CHECK: = add(r0.l, r1.h)
define i32 @test61(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A2.addh.l16.hl(i32 %Rs, i32 %Rt)
}
; CHECK-LABEL: @test62
-; CHECK: r0 = add(r0.l, r1.l):sat
+; CHECK: = add(r0.l, r1.l):sat
define i32 @test62(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A2.addh.l16.sat.ll(i32 %Rs, i32 %Rt)
}
; CHECK-LABEL: @test63
-; CHECK: r0 = add(r0.l, r1.h):sat
+; CHECK: = add(r0.l, r1.h):sat
define i32 @test63(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A2.addh.l16.sat.hl(i32 %Rs, i32 %Rt)
}
; CHECK-LABEL: @test64
-; CHECK: r0 = add(r0.l, r1.l):<<16
+; CHECK: = add(r0.l, r1.l):<<16
define i32 @test64(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A2.addh.h16.ll(i32 %Rs, i32 %Rt)
}
; CHECK-LABEL: @test65
-; CHECK: r0 = add(r0.l, r1.h):<<16
+; CHECK: = add(r0.l, r1.h):<<16
define i32 @test65(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A2.addh.h16.lh(i32 %Rs, i32 %Rt)
}
; CHECK-LABEL: @test66
-; CHECK: r0 = add(r0.h, r1.l):<<16
+; CHECK: = add(r0.h, r1.l):<<16
define i32 @test66(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A2.addh.h16.hl(i32 %Rs, i32 %Rt)
}
; CHECK-LABEL: @test67
-; CHECK: r0 = add(r0.h, r1.h):<<16
+; CHECK: = add(r0.h, r1.h):<<16
define i32 @test67(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A2.addh.h16.hh(i32 %Rs, i32 %Rt)
}
; CHECK-LABEL: @test68
-; CHECK: r0 = add(r0.l, r1.l):sat:<<16
+; CHECK: = add(r0.l, r1.l):sat:<<16
define i32 @test68(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A2.addh.h16.sat.ll(i32 %Rs, i32 %Rt)
}
; CHECK-LABEL: @test69
-; CHECK: r0 = add(r0.l, r1.h):sat:<<16
+; CHECK: = add(r0.l, r1.h):sat:<<16
define i32 @test69(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A2.addh.h16.sat.lh(i32 %Rs, i32 %Rt)
}
; CHECK-LABEL: @test6A
-; CHECK: r0 = add(r0.h, r1.l):sat:<<16
+; CHECK: = add(r0.h, r1.l):sat:<<16
define i32 @test6A(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A2.addh.h16.sat.hl(i32 %Rs, i32 %Rt)
}
; CHECK-LABEL: @test6B
-; CHECK: r0 = add(r0.h, r1.h):sat:<<16
+; CHECK: = add(r0.h, r1.h):sat:<<16
define i32 @test6B(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A2.addh.h16.sat.hh(i32 %Rs, i32 %Rt)
}
; CHECK-LABEL: @test70
-; CHECK: r0 = sub(r0.l, r1.l)
+; CHECK: = sub(r0.l, r1.l)
define i32 @test70(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A2.subh.l16.ll(i32 %Rs, i32 %Rt)
}
; CHECK-LABEL: @test71
-; CHECK: r0 = sub(r0.l, r1.h)
+; CHECK: = sub(r0.l, r1.h)
define i32 @test71(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A2.subh.l16.hl(i32 %Rs, i32 %Rt)
}
; CHECK-LABEL: @test72
-; CHECK: r0 = sub(r0.l, r1.l):sat
+; CHECK: = sub(r0.l, r1.l):sat
define i32 @test72(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A2.subh.l16.sat.ll(i32 %Rs, i32 %Rt)
}
; CHECK-LABEL: @test73
-; CHECK: r0 = sub(r0.l, r1.h):sat
+; CHECK: = sub(r0.l, r1.h):sat
define i32 @test73(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A2.subh.l16.sat.hl(i32 %Rs, i32 %Rt)
}
; CHECK-LABEL: @test74
-; CHECK: r0 = sub(r0.l, r1.l):<<16
+; CHECK: = sub(r0.l, r1.l):<<16
define i32 @test74(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A2.subh.h16.ll(i32 %Rs, i32 %Rt)
}
; CHECK-LABEL: @test75
-; CHECK: r0 = sub(r0.l, r1.h):<<16
+; CHECK: = sub(r0.l, r1.h):<<16
define i32 @test75(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A2.subh.h16.lh(i32 %Rs, i32 %Rt)
}
; CHECK-LABEL: @test76
-; CHECK: r0 = sub(r0.h, r1.l):<<16
+; CHECK: = sub(r0.h, r1.l):<<16
define i32 @test76(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A2.subh.h16.hl(i32 %Rs, i32 %Rt)
}
; CHECK-LABEL: @test77
-; CHECK: r0 = sub(r0.h, r1.h):<<16
+; CHECK: = sub(r0.h, r1.h):<<16
define i32 @test77(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A2.subh.h16.hh(i32 %Rs, i32 %Rt)
}
; CHECK-LABEL: @test78
-; CHECK: r0 = sub(r0.l, r1.l):sat:<<16
+; CHECK: = sub(r0.l, r1.l):sat:<<16
define i32 @test78(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A2.subh.h16.sat.ll(i32 %Rs, i32 %Rt)
}
; CHECK-LABEL: @test79
-; CHECK: r0 = sub(r0.l, r1.h):sat:<<16
+; CHECK: = sub(r0.l, r1.h):sat:<<16
define i32 @test79(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A2.subh.h16.sat.lh(i32 %Rs, i32 %Rt)
}
; CHECK-LABEL: @test7A
-; CHECK: r0 = sub(r0.h, r1.l):sat:<<16
+; CHECK: = sub(r0.h, r1.l):sat:<<16
define i32 @test7A(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A2.subh.h16.sat.hl(i32 %Rs, i32 %Rt)
}
; CHECK-LABEL: @test7B
-; CHECK: r0 = sub(r0.h, r1.h):sat:<<16
+; CHECK: = sub(r0.h, r1.h):sat:<<16
define i32 @test7B(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A2.subh.h16.sat.hh(i32 %Rs, i32 %Rt)
}
; CHECK-LABEL: @test90
-; CHECK: r0 = and(#1, asl(r0, #2))
+; CHECK: = and(#1, asl(r0, #2))
define i32 @test90(i32 %Rs) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.S4.andi.asl.ri(i32 1, i32 %Rs, i32 2)
}
; CHECK-LABEL: @test91
-; CHECK: r0 = or(#1, asl(r0, #2))
+; CHECK: = or(#1, asl(r0, #2))
define i32 @test91(i32 %Rs) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.S4.ori.asl.ri(i32 1, i32 %Rs, i32 2)
}
; CHECK-LABEL: @test92
-; CHECK: r0 = add(#1, asl(r0, #2))
+; CHECK: = add(#1, asl(r0, #2))
define i32 @test92(i32 %Rs) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.S4.addi.asl.ri(i32 1, i32 %Rs, i32 2)
}
; CHECK-LABEL: @test93
-; CHECK: r0 = sub(#1, asl(r0, #2))
+; CHECK: = sub(#1, asl(r0, #2))
define i32 @test93(i32 %Rs) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.S4.subi.asl.ri(i32 1, i32 %Rs, i32 2)
}
; CHECK-LABEL: @test94
-; CHECK: r0 = and(#1, lsr(r0, #2))
+; CHECK: = and(#1, lsr(r0, #2))
define i32 @test94(i32 %Rs) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.S4.andi.lsr.ri(i32 1, i32 %Rs, i32 2)
}
; CHECK-LABEL: @test95
-; CHECK: r0 = or(#1, lsr(r0, #2))
+; CHECK: = or(#1, lsr(r0, #2))
define i32 @test95(i32 %Rs) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.S4.ori.lsr.ri(i32 1, i32 %Rs, i32 2)
}
; CHECK-LABEL: @test96
-; CHECK: r0 = add(#1, lsr(r0, #2))
+; CHECK: = add(#1, lsr(r0, #2))
define i32 @test96(i32 %Rs) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.S4.addi.lsr.ri(i32 1, i32 %Rs, i32 2)
}
; CHECK-LABEL: @test97
-; CHECK: r0 = sub(#1, lsr(r0, #2))
+; CHECK: = sub(#1, lsr(r0, #2))
define i32 @test97(i32 %Rs) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.S4.subi.lsr.ri(i32 1, i32 %Rs, i32 2)
}
; CHECK-LABEL: @test100
-; CHECK: r1:0 = bitsplit(r0, r1)
+; CHECK: = bitsplit(r0, r1)
define i64 @test100(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i64 @llvm.hexagon.A4.bitsplit(i32 %Rs, i32 %Rt)
}
; CHECK-LABEL: @test101
-; CHECK: r0 = modwrap(r0, r1)
+; CHECK: = modwrap(r0, r1)
define i32 @test101(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A4.modwrapu(i32 %Rs, i32 %Rt)
}
; CHECK-LABEL: @test102
-; CHECK: r0 = parity(r1:0, r3:2)
+; CHECK: = parity(r1:0, r3:2)
define i32 @test102(i64 %Rs, i64 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.S2.parityp(i64 %Rs, i64 %Rt)
}
; CHECK-LABEL: @test103
-; CHECK: r0 = parity(r0, r1)
+; CHECK: = parity(r0, r1)
define i32 @test103(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.S4.parity(i32 %Rs, i32 %Rt)