lib/CodeGen/ScheduleDAGInstrs.cpp

   1 //===---- ScheduleDAGInstrs.cpp - MachineInstr Rescheduling ---------------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This implements the ScheduleDAGInstrs class, which implements re-scheduling
  11 // of MachineInstrs.
  12 //
  13 //===----------------------------------------------------------------------===//
  14
  15 #define DEBUG_TYPE "sched-instrs"
  16 #include "llvm/Operator.h"
  17 #include "llvm/Analysis/AliasAnalysis.h"
  18 #include "llvm/Analysis/ValueTracking.h"
  19 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
  20 #include "llvm/CodeGen/MachineFunctionPass.h"
  21 #include "llvm/CodeGen/MachineMemOperand.h"
  22 #include "llvm/CodeGen/MachineRegisterInfo.h"
  23 #include "llvm/CodeGen/PseudoSourceValue.h"
  24 #include "llvm/CodeGen/RegisterPressure.h"
  25 #include "llvm/CodeGen/ScheduleDAGInstrs.h"
  26 #include "llvm/MC/MCInstrItineraries.h"
  27 #include "llvm/Target/TargetMachine.h"
  28 #include "llvm/Target/TargetInstrInfo.h"
  29 #include "llvm/Target/TargetRegisterInfo.h"
  30 #include "llvm/Target/TargetSubtargetInfo.h"
  31 #include "llvm/Support/CommandLine.h"
  32 #include "llvm/Support/Debug.h"
  33 #include "llvm/Support/raw_ostream.h"
  34 #include "llvm/ADT/SmallSet.h"
  35 #include "llvm/ADT/SmallPtrSet.h"
  36 using namespace llvm;
  37
  38 static cl::opt<bool> EnableAASchedMI("enable-aa-sched-mi", cl::Hidden,
  39     cl::ZeroOrMore, cl::init(false),
  40     cl::desc("Enable use of AA during MI GAD construction"));
  41
  42 ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf,
  43                                      const MachineLoopInfo &mli,
  44                                      const MachineDominatorTree &mdt,
  45                                      bool IsPostRAFlag,
  46                                      LiveIntervals *lis)
  47   : ScheduleDAG(mf), MLI(mli), MDT(mdt), MFI(mf.getFrameInfo()), LIS(lis),
  48     IsPostRA(IsPostRAFlag), CanHandleTerminators(false), FirstDbgValue(0) {
  49   assert((IsPostRA || LIS) && "PreRA scheduling requires LiveIntervals");
  50   DbgValues.clear();
  51   assert(!(IsPostRA && MRI.getNumVirtRegs()) &&
  52          "Virtual registers must be removed prior to PostRA scheduling");
  53
  54   const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>();
  55   SchedModel.init(*ST.getSchedModel(), &ST, TII);
  56 }
  57
  58 /// getUnderlyingObjectFromInt - This is the function that does the work of
  59 /// looking through basic ptrtoint+arithmetic+inttoptr sequences.
  60 static const Value *getUnderlyingObjectFromInt(const Value *V) {
  61   do {
  62     if (const Operator *U = dyn_cast<Operator>(V)) {
  63       // If we find a ptrtoint, we can transfer control back to the
  64       // regular getUnderlyingObjectFromInt.
  65       if (U->getOpcode() == Instruction::PtrToInt)
  66         return U->getOperand(0);
  67       // If we find an add of a constant or a multiplied value, it's
  68       // likely that the other operand will lead us to the base
  69       // object. We don't have to worry about the case where the
  70       // object address is somehow being computed by the multiply,
  71       // because our callers only care when the result is an
  72       // identifibale object.
  73       if (U->getOpcode() != Instruction::Add ||
  74           (!isa<ConstantInt>(U->getOperand(1)) &&
  75            Operator::getOpcode(U->getOperand(1)) != Instruction::Mul))
  76         return V;
  77       V = U->getOperand(0);
  78     } else {
  79       return V;
  80     }
  81     assert(V->getType()->isIntegerTy() && "Unexpected operand type!");
  82   } while (1);
  83 }
  84
  85 /// getUnderlyingObject - This is a wrapper around GetUnderlyingObject
  86 /// and adds support for basic ptrtoint+arithmetic+inttoptr sequences.
  87 static const Value *getUnderlyingObject(const Value *V) {
  88   // First just call Value::getUnderlyingObject to let it do what it does.
  89   do {
  90     V = GetUnderlyingObject(V);
  91     // If it found an inttoptr, use special code to continue climing.
  92     if (Operator::getOpcode(V) != Instruction::IntToPtr)
  93       break;
  94     const Value *O = getUnderlyingObjectFromInt(cast<User>(V)->getOperand(0));
  95     // If that succeeded in finding a pointer, continue the search.
  96     if (!O->getType()->isPointerTy())
  97       break;
  98     V = O;
  99   } while (1);
 100   return V;
 101 }
 102
 103 /// getUnderlyingObjectForInstr - If this machine instr has memory reference
 104 /// information and it can be tracked to a normal reference to a known
 105 /// object, return the Value for that object. Otherwise return null.
 106 static const Value *getUnderlyingObjectForInstr(const MachineInstr *MI,
 107                                                 const MachineFrameInfo *MFI,
 108                                                 bool &MayAlias) {
 109   MayAlias = true;
 110   if (!MI->hasOneMemOperand() ||
 111       !(*MI->memoperands_begin())->getValue() ||
 112       (*MI->memoperands_begin())->isVolatile())
 113     return 0;
 114
 115   const Value *V = (*MI->memoperands_begin())->getValue();
 116   if (!V)
 117     return 0;
 118
 119   V = getUnderlyingObject(V);
 120   if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V)) {
 121     // For now, ignore PseudoSourceValues which may alias LLVM IR values
 122     // because the code that uses this function has no way to cope with
 123     // such aliases.
 124     if (PSV->isAliased(MFI))
 125       return 0;
 126
 127     MayAlias = PSV->mayAlias(MFI);
 128     return V;
 129   }
 130
 131   if (isIdentifiedObject(V))
 132     return V;
 133
 134   return 0;
 135 }
 136
 137 void ScheduleDAGInstrs::startBlock(MachineBasicBlock *bb) {
 138   BB = bb;
 139 }
 140
 141 void ScheduleDAGInstrs::finishBlock() {
 142   // Subclasses should no longer refer to the old block.
 143   BB = 0;
 144 }
 145
 146 /// Initialize the map with the number of registers.
 147 void Reg2SUnitsMap::setRegLimit(unsigned Limit) {
 148   PhysRegSet.setUniverse(Limit);
 149   SUnits.resize(Limit);
 150 }
 151
 152 /// Clear the map without deallocating storage.
 153 void Reg2SUnitsMap::clear() {
 154   for (const_iterator I = reg_begin(), E = reg_end(); I != E; ++I) {
 155     SUnits[*I].clear();
 156   }
 157   PhysRegSet.clear();
 158 }
 159
 160 /// Initialize the DAG and common scheduler state for the current scheduling
 161 /// region. This does not actually create the DAG, only clears it. The
 162 /// scheduling driver may call BuildSchedGraph multiple times per scheduling
 163 /// region.
 164 void ScheduleDAGInstrs::enterRegion(MachineBasicBlock *bb,
 165                                     MachineBasicBlock::iterator begin,
 166                                     MachineBasicBlock::iterator end,
 167                                     unsigned endcount) {
 168   assert(bb == BB && "startBlock should set BB");
 169   RegionBegin = begin;
 170   RegionEnd = end;
 171   EndIndex = endcount;
 172   MISUnitMap.clear();
 173
 174   ScheduleDAG::clearDAG();
 175 }
 176
 177 /// Close the current scheduling region. Don't clear any state in case the
 178 /// driver wants to refer to the previous scheduling region.
 179 void ScheduleDAGInstrs::exitRegion() {
 180   // Nothing to do.
 181 }
 182
 183 /// addSchedBarrierDeps - Add dependencies from instructions in the current
 184 /// list of instructions being scheduled to scheduling barrier by adding
 185 /// the exit SU to the register defs and use list. This is because we want to
 186 /// make sure instructions which define registers that are either used by
 187 /// the terminator or are live-out are properly scheduled. This is
 188 /// especially important when the definition latency of the return value(s)
 189 /// are too high to be hidden by the branch or when the liveout registers
 190 /// used by instructions in the fallthrough block.
 191 void ScheduleDAGInstrs::addSchedBarrierDeps() {
 192   MachineInstr *ExitMI = RegionEnd != BB->end() ? &*RegionEnd : 0;
 193   ExitSU.setInstr(ExitMI);
 194   bool AllDepKnown = ExitMI &&
 195     (ExitMI->isCall() || ExitMI->isBarrier());
 196   if (ExitMI && AllDepKnown) {
 197     // If it's a call or a barrier, add dependencies on the defs and uses of
 198     // instruction.
 199     for (unsigned i = 0, e = ExitMI->getNumOperands(); i != e; ++i) {
 200       const MachineOperand &MO = ExitMI->getOperand(i);
 201       if (!MO.isReg() || MO.isDef()) continue;
 202       unsigned Reg = MO.getReg();
 203       if (Reg == 0) continue;
 204
 205       if (TRI->isPhysicalRegister(Reg))
 206         Uses[Reg].push_back(PhysRegSUOper(&ExitSU, -1));
 207       else {
 208         assert(!IsPostRA && "Virtual register encountered after regalloc.");
 209         addVRegUseDeps(&ExitSU, i);
 210       }
 211     }
 212   } else {
 213     // For others, e.g. fallthrough, conditional branch, assume the exit
 214     // uses all the registers that are livein to the successor blocks.
 215     assert(Uses.empty() && "Uses in set before adding deps?");
 216     for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
 217            SE = BB->succ_end(); SI != SE; ++SI)
 218       for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(),
 219              E = (*SI)->livein_end(); I != E; ++I) {
 220         unsigned Reg = *I;
 221         if (!Uses.contains(Reg))
 222           Uses[Reg].push_back(PhysRegSUOper(&ExitSU, -1));
 223       }
 224   }
 225 }
 226
 227 /// MO is an operand of SU's instruction that defines a physical register. Add
 228 /// data dependencies from SU to any uses of the physical register.
 229 void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) {
 230   const MachineOperand &MO = SU->getInstr()->getOperand(OperIdx);
 231   assert(MO.isDef() && "expect physreg def");
 232
 233   // Ask the target if address-backscheduling is desirable, and if so how much.
 234   const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>();
 235
 236   for (MCRegAliasIterator Alias(MO.getReg(), TRI, true);
 237        Alias.isValid(); ++Alias) {
 238     if (!Uses.contains(*Alias))
 239       continue;
 240     std::vector<PhysRegSUOper> &UseList = Uses[*Alias];
 241     for (unsigned i = 0, e = UseList.size(); i != e; ++i) {
 242       SUnit *UseSU = UseList[i].SU;
 243       if (UseSU == SU)
 244         continue;
 245
 246       SDep dep(SU, SDep::Data, 1, *Alias);
 247
 248       // Adjust the dependence latency using operand def/use information,
 249       // then allow the target to perform its own adjustments.
 250       int UseOp = UseList[i].OpIdx;
 251       MachineInstr *RegUse = UseOp < 0 ? 0 : UseSU->getInstr();
 252       dep.setLatency(
 253         SchedModel.computeOperandLatency(SU->getInstr(), OperIdx,
 254                                          RegUse, UseOp, /*FindMin=*/false));
 255       dep.setMinLatency(
 256         SchedModel.computeOperandLatency(SU->getInstr(), OperIdx,
 257                                          RegUse, UseOp, /*FindMin=*/true));
 258
 259       ST.adjustSchedDependency(SU, UseSU, dep);
 260       UseSU->addPred(dep);
 261     }
 262   }
 263 }
 264
 265 /// addPhysRegDeps - Add register dependencies (data, anti, and output) from
 266 /// this SUnit to following instructions in the same scheduling region that
 267 /// depend the physical register referenced at OperIdx.
 268 void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) {
 269   const MachineInstr *MI = SU->getInstr();
 270   const MachineOperand &MO = MI->getOperand(OperIdx);
 271
 272   // Optionally add output and anti dependencies. For anti
 273   // dependencies we use a latency of 0 because for a multi-issue
 274   // target we want to allow the defining instruction to issue
 275   // in the same cycle as the using instruction.
 276   // TODO: Using a latency of 1 here for output dependencies assumes
 277   //       there's no cost for reusing registers.
 278   SDep::Kind Kind = MO.isUse() ? SDep::Anti : SDep::Output;
 279   for (MCRegAliasIterator Alias(MO.getReg(), TRI, true);
 280        Alias.isValid(); ++Alias) {
 281     if (!Defs.contains(*Alias))
 282       continue;
 283     std::vector<PhysRegSUOper> &DefList = Defs[*Alias];
 284     for (unsigned i = 0, e = DefList.size(); i != e; ++i) {
 285       SUnit *DefSU = DefList[i].SU;
 286       if (DefSU == &ExitSU)
 287         continue;
 288       if (DefSU != SU &&
 289           (Kind != SDep::Output || !MO.isDead() ||
 290            !DefSU->getInstr()->registerDefIsDead(*Alias))) {
 291         if (Kind == SDep::Anti)
 292           DefSU->addPred(SDep(SU, Kind, 0, /*Reg=*/*Alias));
 293         else {
 294           unsigned AOLat =
 295             SchedModel.computeOutputLatency(MI, OperIdx, DefSU->getInstr());
 296           DefSU->addPred(SDep(SU, Kind, AOLat, /*Reg=*/*Alias));
 297         }
 298       }
 299     }
 300   }
 301
 302   if (!MO.isDef()) {
 303     // Either insert a new Reg2SUnits entry with an empty SUnits list, or
 304     // retrieve the existing SUnits list for this register's uses.
 305     // Push this SUnit on the use list.
 306     Uses[MO.getReg()].push_back(PhysRegSUOper(SU, OperIdx));
 307   }
 308   else {
 309     addPhysRegDataDeps(SU, OperIdx);
 310
 311     // Either insert a new Reg2SUnits entry with an empty SUnits list, or
 312     // retrieve the existing SUnits list for this register's defs.
 313     std::vector<PhysRegSUOper> &DefList = Defs[MO.getReg()];
 314
 315     // clear this register's use list
 316     if (Uses.contains(MO.getReg()))
 317       Uses[MO.getReg()].clear();
 318
 319     if (!MO.isDead())
 320       DefList.clear();
 321
 322     // Calls will not be reordered because of chain dependencies (see
 323     // below). Since call operands are dead, calls may continue to be added
 324     // to the DefList making dependence checking quadratic in the size of
 325     // the block. Instead, we leave only one call at the back of the
 326     // DefList.
 327     if (SU->isCall) {
 328       while (!DefList.empty() && DefList.back().SU->isCall)
 329         DefList.pop_back();
 330     }
 331     // Defs are pushed in the order they are visited and never reordered.
 332     DefList.push_back(PhysRegSUOper(SU, OperIdx));
 333   }
 334 }
 335
 336 /// addVRegDefDeps - Add register output and data dependencies from this SUnit
 337 /// to instructions that occur later in the same scheduling region if they read
 338 /// from or write to the virtual register defined at OperIdx.
 339 ///
 340 /// TODO: Hoist loop induction variable increments. This has to be
 341 /// reevaluated. Generally, IV scheduling should be done before coalescing.
 342 void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) {
 343   const MachineInstr *MI = SU->getInstr();
 344   unsigned Reg = MI->getOperand(OperIdx).getReg();
 345
 346   // Singly defined vregs do not have output/anti dependencies.
 347   // The current operand is a def, so we have at least one.
 348   // Check here if there are any others...
 349   if (MRI.hasOneDef(Reg))
 350     return;
 351
 352   // Add output dependence to the next nearest def of this vreg.
 353   //
 354   // Unless this definition is dead, the output dependence should be
 355   // transitively redundant with antidependencies from this definition's
 356   // uses. We're conservative for now until we have a way to guarantee the uses
 357   // are not eliminated sometime during scheduling. The output dependence edge
 358   // is also useful if output latency exceeds def-use latency.
 359   VReg2SUnitMap::iterator DefI = VRegDefs.find(Reg);
 360   if (DefI == VRegDefs.end())
 361     VRegDefs.insert(VReg2SUnit(Reg, SU));
 362   else {
 363     SUnit *DefSU = DefI->SU;
 364     if (DefSU != SU && DefSU != &ExitSU) {
 365       unsigned OutLatency =
 366         SchedModel.computeOutputLatency(MI, OperIdx, DefSU->getInstr());
 367       DefSU->addPred(SDep(SU, SDep::Output, OutLatency, Reg));
 368     }
 369     DefI->SU = SU;
 370   }
 371 }
 372
 373 /// addVRegUseDeps - Add a register data dependency if the instruction that
 374 /// defines the virtual register used at OperIdx is mapped to an SUnit. Add a
 375 /// register antidependency from this SUnit to instructions that occur later in
 376 /// the same scheduling region if they write the virtual register.
 377 ///
 378 /// TODO: Handle ExitSU "uses" properly.
 379 void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) {
 380   MachineInstr *MI = SU->getInstr();
 381   unsigned Reg = MI->getOperand(OperIdx).getReg();
 382
 383   // Lookup this operand's reaching definition.
 384   assert(LIS && "vreg dependencies requires LiveIntervals");
 385   LiveRangeQuery LRQ(LIS->getInterval(Reg), LIS->getInstructionIndex(MI));
 386   VNInfo *VNI = LRQ.valueIn();
 387
 388   // VNI will be valid because MachineOperand::readsReg() is checked by caller.
 389   assert(VNI && "No value to read by operand");
 390   MachineInstr *Def = LIS->getInstructionFromIndex(VNI->def);
 391   // Phis and other noninstructions (after coalescing) have a NULL Def.
 392   if (Def) {
 393     SUnit *DefSU = getSUnit(Def);
 394     if (DefSU) {
 395       // The reaching Def lives within this scheduling region.
 396       // Create a data dependence.
 397       SDep dep(DefSU, SDep::Data, 1, Reg);
 398       // Adjust the dependence latency using operand def/use information, then
 399       // allow the target to perform its own adjustments.
 400       int DefOp = Def->findRegisterDefOperandIdx(Reg);
 401       dep.setLatency(
 402         SchedModel.computeOperandLatency(Def, DefOp, MI, OperIdx, false));
 403       dep.setMinLatency(
 404         SchedModel.computeOperandLatency(Def, DefOp, MI, OperIdx, true));
 405
 406       const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>();
 407       ST.adjustSchedDependency(DefSU, SU, const_cast<SDep &>(dep));
 408       SU->addPred(dep);
 409     }
 410   }
 411
 412   // Add antidependence to the following def of the vreg it uses.
 413   VReg2SUnitMap::iterator DefI = VRegDefs.find(Reg);
 414   if (DefI != VRegDefs.end() && DefI->SU != SU)
 415     DefI->SU->addPred(SDep(SU, SDep::Anti, 0, Reg));
 416 }
 417
 418 /// Return true if MI is an instruction we are unable to reason about
 419 /// (like a call or something with unmodeled side effects).
 420 static inline bool isGlobalMemoryObject(AliasAnalysis *AA, MachineInstr *MI) {
 421   if (MI->isCall() || MI->hasUnmodeledSideEffects() ||
 422       (MI->hasOrderedMemoryRef() &&
 423        (!MI->mayLoad() || !MI->isInvariantLoad(AA))))
 424     return true;
 425   return false;
 426 }
 427
 428 // This MI might have either incomplete info, or known to be unsafe
 429 // to deal with (i.e. volatile object).
 430 static inline bool isUnsafeMemoryObject(MachineInstr *MI,
 431                                         const MachineFrameInfo *MFI) {
 432   if (!MI || MI->memoperands_empty())
 433     return true;
 434   // We purposefully do no check for hasOneMemOperand() here
 435   // in hope to trigger an assert downstream in order to
 436   // finish implementation.
 437   if ((*MI->memoperands_begin())->isVolatile() ||
 438        MI->hasUnmodeledSideEffects())
 439     return true;
 440
 441   const Value *V = (*MI->memoperands_begin())->getValue();
 442   if (!V)
 443     return true;
 444
 445   V = getUnderlyingObject(V);
 446   if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V)) {
 447     // Similarly to getUnderlyingObjectForInstr:
 448     // For now, ignore PseudoSourceValues which may alias LLVM IR values
 449     // because the code that uses this function has no way to cope with
 450     // such aliases.
 451     if (PSV->isAliased(MFI))
 452       return true;
 453   }
 454   // Does this pointer refer to a distinct and identifiable object?
 455   if (!isIdentifiedObject(V))
 456     return true;
 457
 458   return false;
 459 }
 460
 461 /// This returns true if the two MIs need a chain edge betwee them.
 462 /// If these are not even memory operations, we still may need
 463 /// chain deps between them. The question really is - could
 464 /// these two MIs be reordered during scheduling from memory dependency
 465 /// point of view.
 466 static bool MIsNeedChainEdge(AliasAnalysis *AA, const MachineFrameInfo *MFI,
 467                              MachineInstr *MIa,
 468                              MachineInstr *MIb) {
 469   // Cover a trivial case - no edge is need to itself.
 470   if (MIa == MIb)
 471     return false;
 472
 473   if (isUnsafeMemoryObject(MIa, MFI) || isUnsafeMemoryObject(MIb, MFI))
 474     return true;
 475
 476   // If we are dealing with two "normal" loads, we do not need an edge
 477   // between them - they could be reordered.
 478   if (!MIa->mayStore() && !MIb->mayStore())
 479     return false;
 480
 481   // To this point analysis is generic. From here on we do need AA.
 482   if (!AA)
 483     return true;
 484
 485   MachineMemOperand *MMOa = *MIa->memoperands_begin();
 486   MachineMemOperand *MMOb = *MIb->memoperands_begin();
 487
 488   // FIXME: Need to handle multiple memory operands to support all targets.
 489   if (!MIa->hasOneMemOperand() || !MIb->hasOneMemOperand())
 490     llvm_unreachable("Multiple memory operands.");
 491
 492   // The following interface to AA is fashioned after DAGCombiner::isAlias
 493   // and operates with MachineMemOperand offset with some important
 494   // assumptions:
 495   //   - LLVM fundamentally assumes flat address spaces.
 496   //   - MachineOperand offset can *only* result from legalization and
 497   //     cannot affect queries other than the trivial case of overlap
 498   //     checking.
 499   //   - These offsets never wrap and never step outside
 500   //     of allocated objects.
 501   //   - There should never be any negative offsets here.
 502   //
 503   // FIXME: Modify API to hide this math from "user"
 504   // FIXME: Even before we go to AA we can reason locally about some
 505   // memory objects. It can save compile time, and possibly catch some
 506   // corner cases not currently covered.
 507
 508   assert ((MMOa->getOffset() >= 0) && "Negative MachineMemOperand offset");
 509   assert ((MMOb->getOffset() >= 0) && "Negative MachineMemOperand offset");
 510
 511   int64_t MinOffset = std::min(MMOa->getOffset(), MMOb->getOffset());
 512   int64_t Overlapa = MMOa->getSize() + MMOa->getOffset() - MinOffset;
 513   int64_t Overlapb = MMOb->getSize() + MMOb->getOffset() - MinOffset;
 514
 515   AliasAnalysis::AliasResult AAResult = AA->alias(
 516   AliasAnalysis::Location(MMOa->getValue(), Overlapa,
 517                           MMOa->getTBAAInfo()),
 518   AliasAnalysis::Location(MMOb->getValue(), Overlapb,
 519                           MMOb->getTBAAInfo()));
 520
 521   return (AAResult != AliasAnalysis::NoAlias);
 522 }
 523
 524 /// This recursive function iterates over chain deps of SUb looking for
 525 /// "latest" node that needs a chain edge to SUa.
 526 static unsigned
 527 iterateChainSucc(AliasAnalysis *AA, const MachineFrameInfo *MFI,
 528                  SUnit *SUa, SUnit *SUb, SUnit *ExitSU, unsigned *Depth,
 529                  SmallPtrSet<const SUnit*, 16> &Visited) {
 530   if (!SUa || !SUb || SUb == ExitSU)
 531     return *Depth;
 532
 533   // Remember visited nodes.
 534   if (!Visited.insert(SUb))
 535       return *Depth;
 536   // If there is _some_ dependency already in place, do not
 537   // descend any further.
 538   // TODO: Need to make sure that if that dependency got eliminated or ignored
 539   // for any reason in the future, we would not violate DAG topology.
 540   // Currently it does not happen, but makes an implicit assumption about
 541   // future implementation.
 542   //
 543   // Independently, if we encounter node that is some sort of global
 544   // object (like a call) we already have full set of dependencies to it
 545   // and we can stop descending.
 546   if (SUa->isSucc(SUb) ||
 547       isGlobalMemoryObject(AA, SUb->getInstr()))
 548     return *Depth;
 549
 550   // If we do need an edge, or we have exceeded depth budget,
 551   // add that edge to the predecessors chain of SUb,
 552   // and stop descending.
 553   if (*Depth > 200 ||
 554       MIsNeedChainEdge(AA, MFI, SUa->getInstr(), SUb->getInstr())) {
 555     SUb->addPred(SDep(SUa, SDep::Order, /*Latency=*/0, /*Reg=*/0,
 556                       /*isNormalMemory=*/true));
 557     return *Depth;
 558   }
 559   // Track current depth.
 560   (*Depth)++;
 561   // Iterate over chain dependencies only.
 562   for (SUnit::const_succ_iterator I = SUb->Succs.begin(), E = SUb->Succs.end();
 563        I != E; ++I)
 564     if (I->isCtrl())
 565       iterateChainSucc (AA, MFI, SUa, I->getSUnit(), ExitSU, Depth, Visited);
 566   return *Depth;
 567 }
 568
 569 /// This function assumes that "downward" from SU there exist
 570 /// tail/leaf of already constructed DAG. It iterates downward and
 571 /// checks whether SU can be aliasing any node dominated
 572 /// by it.
 573 static void adjustChainDeps(AliasAnalysis *AA, const MachineFrameInfo *MFI,
 574                             SUnit *SU, SUnit *ExitSU, std::set<SUnit *> &CheckList,
 575                             unsigned LatencyToLoad) {
 576   if (!SU)
 577     return;
 578
 579   SmallPtrSet<const SUnit*, 16> Visited;
 580   unsigned Depth = 0;
 581
 582   for (std::set<SUnit *>::iterator I = CheckList.begin(), IE = CheckList.end();
 583        I != IE; ++I) {
 584     if (SU == *I)
 585       continue;
 586     if (MIsNeedChainEdge(AA, MFI, SU->getInstr(), (*I)->getInstr())) {
 587       unsigned Latency = ((*I)->getInstr()->mayLoad()) ? LatencyToLoad : 0;
 588       (*I)->addPred(SDep(SU, SDep::Order, Latency, /*Reg=*/0,
 589                          /*isNormalMemory=*/true));
 590     }
 591     // Now go through all the chain successors and iterate from them.
 592     // Keep track of visited nodes.
 593     for (SUnit::const_succ_iterator J = (*I)->Succs.begin(),
 594          JE = (*I)->Succs.end(); J != JE; ++J)
 595       if (J->isCtrl())
 596         iterateChainSucc (AA, MFI, SU, J->getSUnit(),
 597                           ExitSU, &Depth, Visited);
 598   }
 599 }
 600
 601 /// Check whether two objects need a chain edge, if so, add it
 602 /// otherwise remember the rejected SU.
 603 static inline
 604 void addChainDependency (AliasAnalysis *AA, const MachineFrameInfo *MFI,
 605                          SUnit *SUa, SUnit *SUb,
 606                          std::set<SUnit *> &RejectList,
 607                          unsigned TrueMemOrderLatency = 0,
 608                          bool isNormalMemory = false) {
 609   // If this is a false dependency,
 610   // do not add the edge, but rememeber the rejected node.
 611   if (!EnableAASchedMI ||
 612       MIsNeedChainEdge(AA, MFI, SUa->getInstr(), SUb->getInstr()))
 613     SUb->addPred(SDep(SUa, SDep::Order, TrueMemOrderLatency, /*Reg=*/0,
 614                       isNormalMemory));
 615   else {
 616     // Duplicate entries should be ignored.
 617     RejectList.insert(SUb);
 618     DEBUG(dbgs() << "\tReject chain dep between SU("
 619           << SUa->NodeNum << ") and SU("
 620           << SUb->NodeNum << ")\n");
 621   }
 622 }
 623
 624 /// Create an SUnit for each real instruction, numbered in top-down toplological
 625 /// order. The instruction order A < B, implies that no edge exists from B to A.
 626 ///
 627 /// Map each real instruction to its SUnit.
 628 ///
 629 /// After initSUnits, the SUnits vector cannot be resized and the scheduler may
 630 /// hang onto SUnit pointers. We may relax this in the future by using SUnit IDs
 631 /// instead of pointers.
 632 ///
 633 /// MachineScheduler relies on initSUnits numbering the nodes by their order in
 634 /// the original instruction list.
 635 void ScheduleDAGInstrs::initSUnits() {
 636   // We'll be allocating one SUnit for each real instruction in the region,
 637   // which is contained within a basic block.
 638   SUnits.reserve(BB->size());
 639
 640   for (MachineBasicBlock::iterator I = RegionBegin; I != RegionEnd; ++I) {
 641     MachineInstr *MI = I;
 642     if (MI->isDebugValue())
 643       continue;
 644
 645     SUnit *SU = newSUnit(MI);
 646     MISUnitMap[MI] = SU;
 647
 648     SU->isCall = MI->isCall();
 649     SU->isCommutable = MI->isCommutable();
 650
 651     // Assign the Latency field of SU using target-provided information.
 652     SU->Latency = SchedModel.computeInstrLatency(SU->getInstr());
 653   }
 654 }
 655
 656 /// If RegPressure is non null, compute register pressure as a side effect. The
 657 /// DAG builder is an efficient place to do it because it already visits
 658 /// operands.
 659 void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
 660                                         RegPressureTracker *RPTracker) {
 661   // Create an SUnit for each real instruction.
 662   initSUnits();
 663
 664   // We build scheduling units by walking a block's instruction list from bottom
 665   // to top.
 666
 667   // Remember where a generic side-effecting instruction is as we procede.
 668   SUnit *BarrierChain = 0, *AliasChain = 0;
 669
 670   // Memory references to specific known memory locations are tracked
 671   // so that they can be given more precise dependencies. We track
 672   // separately the known memory locations that may alias and those
 673   // that are known not to alias
 674   std::map<const Value *, SUnit *> AliasMemDefs, NonAliasMemDefs;
 675   std::map<const Value *, std::vector<SUnit *> > AliasMemUses, NonAliasMemUses;
 676   std::set<SUnit*> RejectMemNodes;
 677
 678   // Remove any stale debug info; sometimes BuildSchedGraph is called again
 679   // without emitting the info from the previous call.
 680   DbgValues.clear();
 681   FirstDbgValue = NULL;
 682
 683   assert(Defs.empty() && Uses.empty() &&
 684          "Only BuildGraph should update Defs/Uses");
 685   Defs.setRegLimit(TRI->getNumRegs());
 686   Uses.setRegLimit(TRI->getNumRegs());
 687
 688   assert(VRegDefs.empty() && "Only BuildSchedGraph may access VRegDefs");
 689   // FIXME: Allow SparseSet to reserve space for the creation of virtual
 690   // registers during scheduling. Don't artificially inflate the Universe
 691   // because we want to assert that vregs are not created during DAG building.
 692   VRegDefs.setUniverse(MRI.getNumVirtRegs());
 693
 694   // Model data dependencies between instructions being scheduled and the
 695   // ExitSU.
 696   addSchedBarrierDeps();
 697
 698   // Walk the list of instructions, from bottom moving up.
 699   MachineInstr *PrevMI = NULL;
 700   for (MachineBasicBlock::iterator MII = RegionEnd, MIE = RegionBegin;
 701        MII != MIE; --MII) {
 702     MachineInstr *MI = prior(MII);
 703     if (MI && PrevMI) {
 704       DbgValues.push_back(std::make_pair(PrevMI, MI));
 705       PrevMI = NULL;
 706     }
 707
 708     if (MI->isDebugValue()) {
 709       PrevMI = MI;
 710       continue;
 711     }
 712     if (RPTracker) {
 713       RPTracker->recede();
 714       assert(RPTracker->getPos() == prior(MII) && "RPTracker can't find MI");
 715     }
 716
 717     assert((!MI->isTerminator() || CanHandleTerminators) && !MI->isLabel() &&
 718            "Cannot schedule terminators or labels!");
 719
 720     SUnit *SU = MISUnitMap[MI];
 721     assert(SU && "No SUnit mapped to this MI");
 722
 723     // Add register-based dependencies (data, anti, and output).
 724     for (unsigned j = 0, n = MI->getNumOperands(); j != n; ++j) {
 725       const MachineOperand &MO = MI->getOperand(j);
 726       if (!MO.isReg()) continue;
 727       unsigned Reg = MO.getReg();
 728       if (Reg == 0) continue;
 729
 730       if (TRI->isPhysicalRegister(Reg))
 731         addPhysRegDeps(SU, j);
 732       else {
 733         assert(!IsPostRA && "Virtual register encountered!");
 734         if (MO.isDef())
 735           addVRegDefDeps(SU, j);
 736         else if (MO.readsReg()) // ignore undef operands
 737           addVRegUseDeps(SU, j);
 738       }
 739     }
 740
 741     // Add chain dependencies.
 742     // Chain dependencies used to enforce memory order should have
 743     // latency of 0 (except for true dependency of Store followed by
 744     // aliased Load... we estimate that with a single cycle of latency
 745     // assuming the hardware will bypass)
 746     // Note that isStoreToStackSlot and isLoadFromStackSLot are not usable
 747     // after stack slots are lowered to actual addresses.
 748     // TODO: Use an AliasAnalysis and do real alias-analysis queries, and
 749     // produce more precise dependence information.
 750     unsigned TrueMemOrderLatency = MI->mayStore() ? 1 : 0;
 751     if (isGlobalMemoryObject(AA, MI)) {
 752       // Be conservative with these and add dependencies on all memory
 753       // references, even those that are known to not alias.
 754       for (std::map<const Value *, SUnit *>::iterator I =
 755              NonAliasMemDefs.begin(), E = NonAliasMemDefs.end(); I != E; ++I) {
 756         I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
 757       }
 758       for (std::map<const Value *, std::vector<SUnit *> >::iterator I =
 759              NonAliasMemUses.begin(), E = NonAliasMemUses.end(); I != E; ++I) {
 760         for (unsigned i = 0, e = I->second.size(); i != e; ++i)
 761           I->second[i]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency));
 762       }
 763       // Add SU to the barrier chain.
 764       if (BarrierChain)
 765         BarrierChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
 766       BarrierChain = SU;
 767       // This is a barrier event that acts as a pivotal node in the DAG,
 768       // so it is safe to clear list of exposed nodes.
 769       adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes,
 770                       TrueMemOrderLatency);
 771       RejectMemNodes.clear();
 772       NonAliasMemDefs.clear();
 773       NonAliasMemUses.clear();
 774
 775       // fall-through
 776     new_alias_chain:
 777       // Chain all possibly aliasing memory references though SU.
 778       if (AliasChain) {
 779         unsigned ChainLatency = 0;
 780         if (AliasChain->getInstr()->mayLoad())
 781           ChainLatency = TrueMemOrderLatency;
 782         addChainDependency(AA, MFI, SU, AliasChain, RejectMemNodes,
 783                            ChainLatency);
 784       }
 785       AliasChain = SU;
 786       for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k)
 787         addChainDependency(AA, MFI, SU, PendingLoads[k], RejectMemNodes,
 788                            TrueMemOrderLatency);
 789       for (std::map<const Value *, SUnit *>::iterator I = AliasMemDefs.begin(),
 790            E = AliasMemDefs.end(); I != E; ++I)
 791         addChainDependency(AA, MFI, SU, I->second, RejectMemNodes);
 792       for (std::map<const Value *, std::vector<SUnit *> >::iterator I =
 793            AliasMemUses.begin(), E = AliasMemUses.end(); I != E; ++I) {
 794         for (unsigned i = 0, e = I->second.size(); i != e; ++i)
 795           addChainDependency(AA, MFI, SU, I->second[i], RejectMemNodes,
 796                              TrueMemOrderLatency);
 797       }
 798       adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes,
 799                       TrueMemOrderLatency);
 800       PendingLoads.clear();
 801       AliasMemDefs.clear();
 802       AliasMemUses.clear();
 803     } else if (MI->mayStore()) {
 804       bool MayAlias = true;
 805       if (const Value *V = getUnderlyingObjectForInstr(MI, MFI, MayAlias)) {
 806         // A store to a specific PseudoSourceValue. Add precise dependencies.
 807         // Record the def in MemDefs, first adding a dep if there is
 808         // an existing def.
 809         std::map<const Value *, SUnit *>::iterator I =
 810           ((MayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V));
 811         std::map<const Value *, SUnit *>::iterator IE =
 812           ((MayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end());
 813         if (I != IE) {
 814           addChainDependency(AA, MFI, SU, I->second, RejectMemNodes,
 815                              0, true);
 816           I->second = SU;
 817         } else {
 818           if (MayAlias)
 819             AliasMemDefs[V] = SU;
 820           else
 821             NonAliasMemDefs[V] = SU;
 822         }
 823         // Handle the uses in MemUses, if there are any.
 824         std::map<const Value *, std::vector<SUnit *> >::iterator J =
 825           ((MayAlias) ? AliasMemUses.find(V) : NonAliasMemUses.find(V));
 826         std::map<const Value *, std::vector<SUnit *> >::iterator JE =
 827           ((MayAlias) ? AliasMemUses.end() : NonAliasMemUses.end());
 828         if (J != JE) {
 829           for (unsigned i = 0, e = J->second.size(); i != e; ++i)
 830             addChainDependency(AA, MFI, SU, J->second[i], RejectMemNodes,
 831                                TrueMemOrderLatency, true);
 832           J->second.clear();
 833         }
 834         if (MayAlias) {
 835           // Add dependencies from all the PendingLoads, i.e. loads
 836           // with no underlying object.
 837           for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k)
 838             addChainDependency(AA, MFI, SU, PendingLoads[k], RejectMemNodes,
 839                                TrueMemOrderLatency);
 840           // Add dependence on alias chain, if needed.
 841           if (AliasChain)
 842             addChainDependency(AA, MFI, SU, AliasChain, RejectMemNodes);
 843           // But we also should check dependent instructions for the
 844           // SU in question.
 845           adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes,
 846                           TrueMemOrderLatency);
 847         }
 848         // Add dependence on barrier chain, if needed.
 849         // There is no point to check aliasing on barrier event. Even if
 850         // SU and barrier _could_ be reordered, they should not. In addition,
 851         // we have lost all RejectMemNodes below barrier.
 852         if (BarrierChain)
 853           BarrierChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
 854       } else {
 855         // Treat all other stores conservatively.
 856         goto new_alias_chain;
 857       }
 858
 859       if (!ExitSU.isPred(SU))
 860         // Push store's up a bit to avoid them getting in between cmp
 861         // and branches.
 862         ExitSU.addPred(SDep(SU, SDep::Order, 0,
 863                             /*Reg=*/0, /*isNormalMemory=*/false,
 864                             /*isMustAlias=*/false,
 865                             /*isArtificial=*/true));
 866     } else if (MI->mayLoad()) {
 867       bool MayAlias = true;
 868       if (MI->isInvariantLoad(AA)) {
 869         // Invariant load, no chain dependencies needed!
 870       } else {
 871         if (const Value *V =
 872             getUnderlyingObjectForInstr(MI, MFI, MayAlias)) {
 873           // A load from a specific PseudoSourceValue. Add precise dependencies.
 874           std::map<const Value *, SUnit *>::iterator I =
 875             ((MayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V));
 876           std::map<const Value *, SUnit *>::iterator IE =
 877             ((MayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end());
 878           if (I != IE)
 879             addChainDependency(AA, MFI, SU, I->second, RejectMemNodes, 0, true);
 880           if (MayAlias)
 881             AliasMemUses[V].push_back(SU);
 882           else
 883             NonAliasMemUses[V].push_back(SU);
 884         } else {
 885           // A load with no underlying object. Depend on all
 886           // potentially aliasing stores.
 887           for (std::map<const Value *, SUnit *>::iterator I =
 888                  AliasMemDefs.begin(), E = AliasMemDefs.end(); I != E; ++I)
 889             addChainDependency(AA, MFI, SU, I->second, RejectMemNodes);
 890
 891           PendingLoads.push_back(SU);
 892           MayAlias = true;
 893         }
 894         if (MayAlias)
 895           adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes, /*Latency=*/0);
 896         // Add dependencies on alias and barrier chains, if needed.
 897         if (MayAlias && AliasChain)
 898           addChainDependency(AA, MFI, SU, AliasChain, RejectMemNodes);
 899         if (BarrierChain)
 900           BarrierChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
 901       }
 902     }
 903   }
 904   if (PrevMI)
 905     FirstDbgValue = PrevMI;
 906
 907   Defs.clear();
 908   Uses.clear();
 909   VRegDefs.clear();
 910   PendingLoads.clear();
 911 }
 912
 913 void ScheduleDAGInstrs::dumpNode(const SUnit *SU) const {
 914 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 915   SU->getInstr()->dump();
 916 #endif
 917 }
 918
 919 std::string ScheduleDAGInstrs::getGraphNodeLabel(const SUnit *SU) const {
 920   std::string s;
 921   raw_string_ostream oss(s);
 922   if (SU == &EntrySU)
 923     oss << "<entry>";
 924   else if (SU == &ExitSU)
 925     oss << "<exit>";
 926   else
 927     SU->getInstr()->print(oss);
 928   return oss.str();
 929 }
 930
 931 /// Return the basic block label. It is not necessarilly unique because a block
 932 /// contains multiple scheduling regions. But it is fine for visualization.
 933 std::string ScheduleDAGInstrs::getDAGName() const {
 934   return "dag." + BB->getFullName();
 935 }