+
+ /// CreateTargetHazardRecognizer - Allocate and return a hazard recognizer to
+ /// use for this target when scheduling the machine instructions before
+ /// register allocation.
+ virtual ScheduleHazardRecognizer*
+ CreateTargetHazardRecognizer(const TargetMachine *TM,
+ const ScheduleDAG *DAG) const = 0;
+
+ /// CreateTargetMIHazardRecognizer - Allocate and return a hazard recognizer
+ /// to use for this target when scheduling the machine instructions before
+ /// register allocation.
+ virtual ScheduleHazardRecognizer*
+ CreateTargetMIHazardRecognizer(const InstrItineraryData*,
+ const ScheduleDAG *DAG) const = 0;
+
+ /// CreateTargetPostRAHazardRecognizer - Allocate and return a hazard
+ /// recognizer to use for this target when scheduling the machine instructions
+ /// after register allocation.
+ virtual ScheduleHazardRecognizer*
+ CreateTargetPostRAHazardRecognizer(const InstrItineraryData*,
+ const ScheduleDAG *DAG) const = 0;
+
+ /// analyzeCompare - For a comparison instruction, return the source registers
+ /// in SrcReg and SrcReg2 if having two register operands, and the value it
+ /// compares against in CmpValue. Return true if the comparison instruction
+ /// can be analyzed.
+ virtual bool analyzeCompare(const MachineInstr *MI,
+ unsigned &SrcReg, unsigned &SrcReg2,
+ int &Mask, int &Value) const {
+ return false;
+ }
+
+ /// optimizeCompareInstr - See if the comparison instruction can be converted
+ /// into something more efficient. E.g., on ARM most instructions can set the
+ /// flags register, obviating the need for a separate CMP.
+ virtual bool optimizeCompareInstr(MachineInstr *CmpInstr,
+ unsigned SrcReg, unsigned SrcReg2,
+ int Mask, int Value,
+ const MachineRegisterInfo *MRI) const {
+ return false;
+ }
+
+ /// optimizeLoadInstr - Try to remove the load by folding it to a register
+ /// operand at the use. We fold the load instructions if and only if the
+ /// def and use are in the same BB. We only look at one load and see
+ /// whether it can be folded into MI. FoldAsLoadDefReg is the virtual register
+ /// defined by the load we are trying to fold. DefMI returns the machine
+ /// instruction that defines FoldAsLoadDefReg, and the function returns
+ /// the machine instruction generated due to folding.
+ virtual MachineInstr* optimizeLoadInstr(MachineInstr *MI,
+ const MachineRegisterInfo *MRI,
+ unsigned &FoldAsLoadDefReg,
+ MachineInstr *&DefMI) const {
+ return 0;
+ }
+
+ /// FoldImmediate - 'Reg' is known to be defined by a move immediate
+ /// instruction, try to fold the immediate into the use instruction.
+ virtual bool FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI,
+ unsigned Reg, MachineRegisterInfo *MRI) const {
+ return false;
+ }
+
+ /// getNumMicroOps - Return the number of u-operations the given machine
+ /// instruction will be decoded to on the target cpu. The itinerary's
+ /// IssueWidth is the number of microops that can be dispatched each
+ /// cycle. An instruction with zero microops takes no dispatch resources.
+ virtual unsigned getNumMicroOps(const InstrItineraryData *ItinData,
+ const MachineInstr *MI) const = 0;
+
+ /// isZeroCost - Return true for pseudo instructions that don't consume any
+ /// machine resources in their current form. These are common cases that the
+ /// scheduler should consider free, rather than conservatively handling them
+ /// as instructions with no itinerary.
+ bool isZeroCost(unsigned Opcode) const {
+ return Opcode <= TargetOpcode::COPY;
+ }
+
+ virtual int getOperandLatency(const InstrItineraryData *ItinData,
+ SDNode *DefNode, unsigned DefIdx,
+ SDNode *UseNode, unsigned UseIdx) const = 0;
+
+ /// getOperandLatency - Compute and return the use operand latency of a given
+ /// pair of def and use.
+ /// In most cases, the static scheduling itinerary was enough to determine the
+ /// operand latency. But it may not be possible for instructions with variable
+ /// number of defs / uses.
+ ///
+ /// This is a raw interface to the itinerary that may be directly overriden by
+ /// a target. Use computeOperandLatency to get the best estimate of latency.
+ virtual int getOperandLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *DefMI, unsigned DefIdx,
+ const MachineInstr *UseMI,
+ unsigned UseIdx) const = 0;
+
+ /// computeOperandLatency - Compute and return the latency of the given data
+ /// dependent def and use when the operand indices are already known.
+ ///
+ /// FindMin may be set to get the minimum vs. expected latency.
+ unsigned computeOperandLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *DefMI, unsigned DefIdx,
+ const MachineInstr *UseMI, unsigned UseIdx,
+ bool FindMin = false) const;
+
+ /// getInstrLatency - Compute the instruction latency of a given instruction.
+ /// If the instruction has higher cost when predicated, it's returned via
+ /// PredCost.
+ virtual unsigned getInstrLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *MI,
+ unsigned *PredCost = 0) const = 0;
+
+ virtual int getInstrLatency(const InstrItineraryData *ItinData,
+ SDNode *Node) const = 0;
+
+ /// Return the default expected latency for a def based on it's opcode.
+ unsigned defaultDefLatency(const MCSchedModel *SchedModel,
+ const MachineInstr *DefMI) const;
+
+ int computeDefOperandLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *DefMI, bool FindMin) const;
+
+ /// isHighLatencyDef - Return true if this opcode has high latency to its
+ /// result.
+ virtual bool isHighLatencyDef(int opc) const { return false; }
+
+ /// hasHighOperandLatency - Compute operand latency between a def of 'Reg'
+ /// and an use in the current loop, return true if the target considered
+ /// it 'high'. This is used by optimization passes such as machine LICM to
+ /// determine whether it makes sense to hoist an instruction out even in
+ /// high register pressure situation.
+ virtual
+ bool hasHighOperandLatency(const InstrItineraryData *ItinData,
+ const MachineRegisterInfo *MRI,
+ const MachineInstr *DefMI, unsigned DefIdx,
+ const MachineInstr *UseMI, unsigned UseIdx) const {
+ return false;
+ }
+
+ /// hasLowDefLatency - Compute operand latency of a def of 'Reg', return true
+ /// if the target considered it 'low'.
+ virtual
+ bool hasLowDefLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *DefMI, unsigned DefIdx) const = 0;
+
+ /// verifyInstruction - Perform target specific instruction verification.
+ virtual
+ bool verifyInstruction(const MachineInstr *MI, StringRef &ErrInfo) const {
+ return true;
+ }
+
+ /// getExecutionDomain - Return the current execution domain and bit mask of
+ /// possible domains for instruction.
+ ///
+ /// Some micro-architectures have multiple execution domains, and multiple
+ /// opcodes that perform the same operation in different domains. For
+ /// example, the x86 architecture provides the por, orps, and orpd
+ /// instructions that all do the same thing. There is a latency penalty if a
+ /// register is written in one domain and read in another.
+ ///
+ /// This function returns a pair (domain, mask) containing the execution
+ /// domain of MI, and a bit mask of possible domains. The setExecutionDomain
+ /// function can be used to change the opcode to one of the domains in the
+ /// bit mask. Instructions whose execution domain can't be changed should
+ /// return a 0 mask.
+ ///
+ /// The execution domain numbers don't have any special meaning except domain
+ /// 0 is used for instructions that are not associated with any interesting
+ /// execution domain.
+ ///
+ virtual std::pair<uint16_t, uint16_t>
+ getExecutionDomain(const MachineInstr *MI) const {
+ return std::make_pair(0, 0);
+ }
+
+ /// setExecutionDomain - Change the opcode of MI to execute in Domain.
+ ///
+ /// The bit (1 << Domain) must be set in the mask returned from
+ /// getExecutionDomain(MI).
+ ///
+ virtual void setExecutionDomain(MachineInstr *MI, unsigned Domain) const {}
+
+
+ /// getPartialRegUpdateClearance - Returns the preferred minimum clearance
+ /// before an instruction with an unwanted partial register update.
+ ///
+ /// Some instructions only write part of a register, and implicitly need to
+ /// read the other parts of the register. This may cause unwanted stalls
+ /// preventing otherwise unrelated instructions from executing in parallel in
+ /// an out-of-order CPU.
+ ///
+ /// For example, the x86 instruction cvtsi2ss writes its result to bits
+ /// [31:0] of the destination xmm register. Bits [127:32] are unaffected, so
+ /// the instruction needs to wait for the old value of the register to become
+ /// available:
+ ///
+ /// addps %xmm1, %xmm0
+ /// movaps %xmm0, (%rax)
+ /// cvtsi2ss %rbx, %xmm0
+ ///
+ /// In the code above, the cvtsi2ss instruction needs to wait for the addps
+ /// instruction before it can issue, even though the high bits of %xmm0
+ /// probably aren't needed.
+ ///
+ /// This hook returns the preferred clearance before MI, measured in
+ /// instructions. Other defs of MI's operand OpNum are avoided in the last N
+ /// instructions before MI. It should only return a positive value for
+ /// unwanted dependencies. If the old bits of the defined register have
+ /// useful values, or if MI is determined to otherwise read the dependency,
+ /// the hook should return 0.
+ ///
+ /// The unwanted dependency may be handled by:
+ ///
+ /// 1. Allocating the same register for an MI def and use. That makes the
+ /// unwanted dependency identical to a required dependency.
+ ///
+ /// 2. Allocating a register for the def that has no defs in the previous N
+ /// instructions.
+ ///
+ /// 3. Calling breakPartialRegDependency() with the same arguments. This
+ /// allows the target to insert a dependency breaking instruction.
+ ///
+ virtual unsigned
+ getPartialRegUpdateClearance(const MachineInstr *MI, unsigned OpNum,
+ const TargetRegisterInfo *TRI) const {
+ // The default implementation returns 0 for no partial register dependency.
+ return 0;
+ }
+
+ /// breakPartialRegDependency - Insert a dependency-breaking instruction
+ /// before MI to eliminate an unwanted dependency on OpNum.
+ ///
+ /// If it wasn't possible to avoid a def in the last N instructions before MI
+ /// (see getPartialRegUpdateClearance), this hook will be called to break the
+ /// unwanted dependency.
+ ///
+ /// On x86, an xorps instruction can be used as a dependency breaker:
+ ///
+ /// addps %xmm1, %xmm0
+ /// movaps %xmm0, (%rax)
+ /// xorps %xmm0, %xmm0
+ /// cvtsi2ss %rbx, %xmm0
+ ///
+ /// An <imp-kill> operand should be added to MI if an instruction was
+ /// inserted. This ties the instructions together in the post-ra scheduler.
+ ///
+ virtual void
+ breakPartialRegDependency(MachineBasicBlock::iterator MI, unsigned OpNum,
+ const TargetRegisterInfo *TRI) const {}
+
+ /// Create machine specific model for scheduling.
+ virtual DFAPacketizer*
+ CreateTargetScheduleState(const TargetMachine*, const ScheduleDAG*) const {
+ return NULL;
+ }
+
+private:
+ int CallFrameSetupOpcode, CallFrameDestroyOpcode;