1 //===-- PeepholeOptimizer.cpp - Peephole Optimizations --------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // Perform peephole optimizations on the machine code:
12 // - Optimize Extensions
14 // Optimization of sign / zero extension instructions. It may be extended to
15 // handle other instructions with similar properties.
17 // On some targets, some instructions, e.g. X86 sign / zero extension, may
18 // leave the source value in the lower part of the result. This optimization
19 // will replace some uses of the pre-extension value with uses of the
20 // sub-register of the results.
22 // - Optimize Comparisons
24 // Optimization of comparison instructions. For instance, in this code:
30 // If the "sub" instruction all ready sets (or could be modified to set) the
31 // same flag that the "cmp" instruction sets and that "bz" uses, then we can
32 // eliminate the "cmp" instruction.
34 // Another instance, in this code:
36 // sub r1, r3 | sub r1, imm
37 // cmp r3, r1 or cmp r1, r3 | cmp r1, imm
40 // If the branch instruction can use flag from "sub", then we can replace
41 // "sub" with "subs" and eliminate the "cmp" instruction.
45 // Loads that can be folded into a later instruction. A load is foldable
46 // if it loads to virtual registers and the virtual register defined has
49 // - Optimize Copies and Bitcast:
51 // Rewrite copies and bitcasts to avoid cross register bank copies
53 // E.g., Consider the following example, where capital and lower
54 // letters denote different register file:
55 // b = copy A <-- cross-bank copy
56 // C = copy b <-- cross-bank copy
58 // b = copy A <-- cross-bank copy
59 // C = copy A <-- same-bank copy
62 // b = bitcast A <-- cross-bank copy
63 // C = bitcast b <-- cross-bank copy
65 // b = bitcast A <-- cross-bank copy
66 // C = copy A <-- same-bank copy
67 //===----------------------------------------------------------------------===//
69 #include "llvm/CodeGen/Passes.h"
70 #include "llvm/ADT/DenseMap.h"
71 #include "llvm/ADT/SmallPtrSet.h"
72 #include "llvm/ADT/SmallSet.h"
73 #include "llvm/ADT/Statistic.h"
74 #include "llvm/CodeGen/MachineDominators.h"
75 #include "llvm/CodeGen/MachineInstrBuilder.h"
76 #include "llvm/CodeGen/MachineRegisterInfo.h"
77 #include "llvm/Support/CommandLine.h"
78 #include "llvm/Support/Debug.h"
79 #include "llvm/Target/TargetInstrInfo.h"
80 #include "llvm/Target/TargetRegisterInfo.h"
81 #include "llvm/Target/TargetSubtargetInfo.h"
84 #define DEBUG_TYPE "peephole-opt"
86 // Optimize Extensions
88 Aggressive("aggressive-ext-opt", cl::Hidden,
89 cl::desc("Aggressive extension optimization"));
92 DisablePeephole("disable-peephole", cl::Hidden, cl::init(false),
93 cl::desc("Disable the peephole optimizer"));
96 DisableAdvCopyOpt("disable-adv-copy-opt", cl::Hidden, cl::init(true),
97 cl::desc("Disable advanced copy optimization"));
99 STATISTIC(NumReuse, "Number of extension results reused");
100 STATISTIC(NumCmps, "Number of compares eliminated");
101 STATISTIC(NumImmFold, "Number of move immediate folded");
102 STATISTIC(NumLoadFold, "Number of loads folded");
103 STATISTIC(NumSelects, "Number of selects optimized");
104 STATISTIC(NumCopiesBitcasts, "Number of copies/bitcasts optimized");
107 class PeepholeOptimizer : public MachineFunctionPass {
108 const TargetMachine *TM;
109 const TargetInstrInfo *TII;
110 MachineRegisterInfo *MRI;
111 MachineDominatorTree *DT; // Machine dominator tree
114 static char ID; // Pass identification
115 PeepholeOptimizer() : MachineFunctionPass(ID) {
116 initializePeepholeOptimizerPass(*PassRegistry::getPassRegistry());
119 bool runOnMachineFunction(MachineFunction &MF) override;
121 void getAnalysisUsage(AnalysisUsage &AU) const override {
122 AU.setPreservesCFG();
123 MachineFunctionPass::getAnalysisUsage(AU);
125 AU.addRequired<MachineDominatorTree>();
126 AU.addPreserved<MachineDominatorTree>();
131 bool optimizeCmpInstr(MachineInstr *MI, MachineBasicBlock *MBB);
132 bool optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
133 SmallPtrSetImpl<MachineInstr*> &LocalMIs);
134 bool optimizeSelect(MachineInstr *MI);
135 bool optimizeCopyOrBitcast(MachineInstr *MI);
136 bool isMoveImmediate(MachineInstr *MI,
137 SmallSet<unsigned, 4> &ImmDefRegs,
138 DenseMap<unsigned, MachineInstr*> &ImmDefMIs);
139 bool foldImmediate(MachineInstr *MI, MachineBasicBlock *MBB,
140 SmallSet<unsigned, 4> &ImmDefRegs,
141 DenseMap<unsigned, MachineInstr*> &ImmDefMIs);
142 bool isLoadFoldable(MachineInstr *MI,
143 SmallSet<unsigned, 16> &FoldAsLoadDefCandidates);
146 /// \brief Helper class to track the possible sources of a value defined by
147 /// a (chain of) copy related instructions.
148 /// Given a definition (instruction and definition index), this class
149 /// follows the use-def chain to find successive suitable sources.
150 /// The given source can be used to rewrite the definition into
153 /// For instance, let us consider the following snippet:
155 /// v2 = INSERT_SUBREG v1, v0, sub0
156 /// def = COPY v2.sub0
158 /// Using a ValueTracker for def = COPY v2.sub0 will give the following
159 /// suitable sources:
161 /// Then, def can be rewritten into def = COPY v0.
164 /// The current point into the use-def chain.
165 const MachineInstr *Def;
166 /// The index of the definition in Def.
168 /// The sub register index of the definition.
170 /// The register where the value can be found.
172 /// Specifiy whether or not the value tracking looks through
173 /// complex instructions. When this is false, the value tracker
174 /// bails on everything that is not a copy or a bitcast.
176 /// Note: This could have been implemented as a specialized version of
177 /// the ValueTracker class but that would have complicated the code of
178 /// the users of this class.
179 bool UseAdvancedTracking;
180 /// Optional MachineRegisterInfo used to perform some complex
182 const MachineRegisterInfo *MRI;
184 /// \brief Dispatcher to the right underlying implementation of
186 bool getNextSourceImpl(unsigned &SrcIdx, unsigned &SrcSubReg);
187 /// \brief Specialized version of getNextSource for Copy instructions.
188 bool getNextSourceFromCopy(unsigned &SrcIdx, unsigned &SrcSubReg);
189 /// \brief Specialized version of getNextSource for Bitcast instructions.
190 bool getNextSourceFromBitcast(unsigned &SrcIdx, unsigned &SrcSubReg);
191 /// \brief Specialized version of getNextSource for RegSequence
193 bool getNextSourceFromRegSequence(unsigned &SrcIdx, unsigned &SrcSubReg);
194 /// \brief Specialized version of getNextSource for InsertSubreg
196 bool getNextSourceFromInsertSubreg(unsigned &SrcIdx, unsigned &SrcSubReg);
197 /// \brief Specialized version of getNextSource for ExtractSubreg
199 bool getNextSourceFromExtractSubreg(unsigned &SrcIdx, unsigned &SrcSubReg);
200 /// \brief Specialized version of getNextSource for SubregToReg
202 bool getNextSourceFromSubregToReg(unsigned &SrcIdx, unsigned &SrcSubReg);
205 /// \brief Create a ValueTracker instance for the value defines by \p MI
206 /// at the operand index \p DefIdx.
207 /// \p DefSubReg represents the sub register index the value tracker will
208 /// track. It does not need to match the sub register index used in \p MI.
209 /// \p UseAdvancedTracking specifies whether or not the value tracker looks
210 /// through complex instructions. By default (false), it handles only copy
211 /// and bitcast instructions.
212 /// \p MRI useful to perform some complex checks.
213 ValueTracker(const MachineInstr &MI, unsigned DefIdx, unsigned DefSubReg,
214 bool UseAdvancedTracking = false,
215 const MachineRegisterInfo *MRI = nullptr)
216 : Def(&MI), DefIdx(DefIdx), DefSubReg(DefSubReg),
217 UseAdvancedTracking(UseAdvancedTracking), MRI(MRI) {
218 assert(Def->getOperand(DefIdx).isDef() &&
219 Def->getOperand(DefIdx).isReg() &&
220 "Definition does not match machine instruction");
221 // Initially the value is in the defined register.
222 Reg = Def->getOperand(DefIdx).getReg();
225 /// \brief Following the use-def chain, get the next available source
226 /// for the tracked value.
227 /// When the returned value is not nullptr, getReg() gives the register
228 /// that contain the tracked value.
229 /// \note The sub register index returned in \p SrcSubReg must be used
230 /// on that getReg() to access the actual value.
231 /// \return Unless the returned value is nullptr (i.e., no source found),
232 /// \p SrcIdx gives the index of the next source in the returned
233 /// instruction and \p SrcSubReg the index to be used on that source to
234 /// get the tracked value. When nullptr is returned, no alternative source
236 const MachineInstr *getNextSource(unsigned &SrcIdx, unsigned &SrcSubReg);
238 /// \brief Get the last register where the initial value can be found.
239 /// Initially this is the register of the definition.
240 /// Then, after each successful call to getNextSource, this is the
241 /// register of the last source.
242 unsigned getReg() const { return Reg; }
246 char PeepholeOptimizer::ID = 0;
247 char &llvm::PeepholeOptimizerID = PeepholeOptimizer::ID;
248 INITIALIZE_PASS_BEGIN(PeepholeOptimizer, "peephole-opts",
249 "Peephole Optimizations", false, false)
250 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
251 INITIALIZE_PASS_END(PeepholeOptimizer, "peephole-opts",
252 "Peephole Optimizations", false, false)
254 /// optimizeExtInstr - If instruction is a copy-like instruction, i.e. it reads
255 /// a single register and writes a single register and it does not modify the
256 /// source, and if the source value is preserved as a sub-register of the
257 /// result, then replace all reachable uses of the source with the subreg of the
260 /// Do not generate an EXTRACT that is used only in a debug use, as this changes
261 /// the code. Since this code does not currently share EXTRACTs, just ignore all
263 bool PeepholeOptimizer::
264 optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
265 SmallPtrSetImpl<MachineInstr*> &LocalMIs) {
266 unsigned SrcReg, DstReg, SubIdx;
267 if (!TII->isCoalescableExtInstr(*MI, SrcReg, DstReg, SubIdx))
270 if (TargetRegisterInfo::isPhysicalRegister(DstReg) ||
271 TargetRegisterInfo::isPhysicalRegister(SrcReg))
274 if (MRI->hasOneNonDBGUse(SrcReg))
278 // Ensure DstReg can get a register class that actually supports
279 // sub-registers. Don't change the class until we commit.
280 const TargetRegisterClass *DstRC = MRI->getRegClass(DstReg);
281 DstRC = TM->getSubtargetImpl()->getRegisterInfo()->getSubClassWithSubReg(
286 // The ext instr may be operating on a sub-register of SrcReg as well.
287 // PPC::EXTSW is a 32 -> 64-bit sign extension, but it reads a 64-bit
289 // If UseSrcSubIdx is Set, SubIdx also applies to SrcReg, and only uses of
290 // SrcReg:SubIdx should be replaced.
292 TM->getSubtargetImpl()->getRegisterInfo()->getSubClassWithSubReg(
293 MRI->getRegClass(SrcReg), SubIdx) != nullptr;
295 // The source has other uses. See if we can replace the other uses with use of
296 // the result of the extension.
297 SmallPtrSet<MachineBasicBlock*, 4> ReachedBBs;
298 for (MachineInstr &UI : MRI->use_nodbg_instructions(DstReg))
299 ReachedBBs.insert(UI.getParent());
301 // Uses that are in the same BB of uses of the result of the instruction.
302 SmallVector<MachineOperand*, 8> Uses;
304 // Uses that the result of the instruction can reach.
305 SmallVector<MachineOperand*, 8> ExtendedUses;
307 bool ExtendLife = true;
308 for (MachineOperand &UseMO : MRI->use_nodbg_operands(SrcReg)) {
309 MachineInstr *UseMI = UseMO.getParent();
313 if (UseMI->isPHI()) {
318 // Only accept uses of SrcReg:SubIdx.
319 if (UseSrcSubIdx && UseMO.getSubReg() != SubIdx)
322 // It's an error to translate this:
324 // %reg1025 = <sext> %reg1024
326 // %reg1026 = SUBREG_TO_REG 0, %reg1024, 4
330 // %reg1025 = <sext> %reg1024
332 // %reg1027 = COPY %reg1025:4
333 // %reg1026 = SUBREG_TO_REG 0, %reg1027, 4
335 // The problem here is that SUBREG_TO_REG is there to assert that an
336 // implicit zext occurs. It doesn't insert a zext instruction. If we allow
337 // the COPY here, it will give us the value after the <sext>, not the
338 // original value of %reg1024 before <sext>.
339 if (UseMI->getOpcode() == TargetOpcode::SUBREG_TO_REG)
342 MachineBasicBlock *UseMBB = UseMI->getParent();
344 // Local uses that come after the extension.
345 if (!LocalMIs.count(UseMI))
346 Uses.push_back(&UseMO);
347 } else if (ReachedBBs.count(UseMBB)) {
348 // Non-local uses where the result of the extension is used. Always
349 // replace these unless it's a PHI.
350 Uses.push_back(&UseMO);
351 } else if (Aggressive && DT->dominates(MBB, UseMBB)) {
352 // We may want to extend the live range of the extension result in order
353 // to replace these uses.
354 ExtendedUses.push_back(&UseMO);
356 // Both will be live out of the def MBB anyway. Don't extend live range of
357 // the extension result.
363 if (ExtendLife && !ExtendedUses.empty())
364 // Extend the liveness of the extension result.
365 std::copy(ExtendedUses.begin(), ExtendedUses.end(),
366 std::back_inserter(Uses));
368 // Now replace all uses.
369 bool Changed = false;
371 SmallPtrSet<MachineBasicBlock*, 4> PHIBBs;
373 // Look for PHI uses of the extended result, we don't want to extend the
374 // liveness of a PHI input. It breaks all kinds of assumptions down
375 // stream. A PHI use is expected to be the kill of its source values.
376 for (MachineInstr &UI : MRI->use_nodbg_instructions(DstReg))
378 PHIBBs.insert(UI.getParent());
380 const TargetRegisterClass *RC = MRI->getRegClass(SrcReg);
381 for (unsigned i = 0, e = Uses.size(); i != e; ++i) {
382 MachineOperand *UseMO = Uses[i];
383 MachineInstr *UseMI = UseMO->getParent();
384 MachineBasicBlock *UseMBB = UseMI->getParent();
385 if (PHIBBs.count(UseMBB))
388 // About to add uses of DstReg, clear DstReg's kill flags.
390 MRI->clearKillFlags(DstReg);
391 MRI->constrainRegClass(DstReg, DstRC);
394 unsigned NewVR = MRI->createVirtualRegister(RC);
395 MachineInstr *Copy = BuildMI(*UseMBB, UseMI, UseMI->getDebugLoc(),
396 TII->get(TargetOpcode::COPY), NewVR)
397 .addReg(DstReg, 0, SubIdx);
398 // SubIdx applies to both SrcReg and DstReg when UseSrcSubIdx is set.
400 Copy->getOperand(0).setSubReg(SubIdx);
401 Copy->getOperand(0).setIsUndef();
403 UseMO->setReg(NewVR);
412 /// optimizeCmpInstr - If the instruction is a compare and the previous
413 /// instruction it's comparing against all ready sets (or could be modified to
414 /// set) the same flag as the compare, then we can remove the comparison and use
415 /// the flag from the previous instruction.
416 bool PeepholeOptimizer::optimizeCmpInstr(MachineInstr *MI,
417 MachineBasicBlock *MBB) {
418 // If this instruction is a comparison against zero and isn't comparing a
419 // physical register, we can try to optimize it.
420 unsigned SrcReg, SrcReg2;
421 int CmpMask, CmpValue;
422 if (!TII->analyzeCompare(MI, SrcReg, SrcReg2, CmpMask, CmpValue) ||
423 TargetRegisterInfo::isPhysicalRegister(SrcReg) ||
424 (SrcReg2 != 0 && TargetRegisterInfo::isPhysicalRegister(SrcReg2)))
427 // Attempt to optimize the comparison instruction.
428 if (TII->optimizeCompareInstr(MI, SrcReg, SrcReg2, CmpMask, CmpValue, MRI)) {
436 /// Optimize a select instruction.
437 bool PeepholeOptimizer::optimizeSelect(MachineInstr *MI) {
439 unsigned FalseOp = 0;
440 bool Optimizable = false;
441 SmallVector<MachineOperand, 4> Cond;
442 if (TII->analyzeSelect(MI, Cond, TrueOp, FalseOp, Optimizable))
446 if (!TII->optimizeSelect(MI))
448 MI->eraseFromParent();
453 /// \brief Check if the registers defined by the pair (RegisterClass, SubReg)
454 /// share the same register file.
455 static bool shareSameRegisterFile(const TargetRegisterInfo &TRI,
456 const TargetRegisterClass *DefRC,
458 const TargetRegisterClass *SrcRC,
459 unsigned SrcSubReg) {
460 // Same register class.
464 // Both operands are sub registers. Check if they share a register class.
465 unsigned SrcIdx, DefIdx;
466 if (SrcSubReg && DefSubReg)
467 return TRI.getCommonSuperRegClass(SrcRC, SrcSubReg, DefRC, DefSubReg,
468 SrcIdx, DefIdx) != nullptr;
469 // At most one of the register is a sub register, make it Src to avoid
470 // duplicating the test.
472 std::swap(DefSubReg, SrcSubReg);
473 std::swap(DefRC, SrcRC);
476 // One of the register is a sub register, check if we can get a superclass.
478 return TRI.getMatchingSuperRegClass(SrcRC, DefRC, SrcSubReg) != nullptr;
480 return TRI.getCommonSubClass(DefRC, SrcRC) != nullptr;
483 /// \brief Get the index of the definition and source for \p Copy
485 /// \pre Copy.isCopy() or Copy.isBitcast().
486 /// \return True if the Copy instruction has only one register source
487 /// and one register definition. Otherwise, \p DefIdx and \p SrcIdx
489 static bool getCopyOrBitcastDefUseIdx(const MachineInstr &Copy,
490 unsigned &DefIdx, unsigned &SrcIdx) {
491 assert((Copy.isCopy() || Copy.isBitcast()) && "Wrong operation type.");
493 // Copy instruction are supposed to be: Def = Src.
494 if (Copy.getDesc().getNumOperands() != 2)
498 assert(Copy.getOperand(DefIdx).isDef() && "Use comes before def!");
502 // Bitcasts with more than one def are not supported.
503 if (Copy.getDesc().getNumDefs() != 1)
505 // Initialize SrcIdx to an undefined operand.
506 SrcIdx = Copy.getDesc().getNumOperands();
507 for (unsigned OpIdx = 0, EndOpIdx = SrcIdx; OpIdx != EndOpIdx; ++OpIdx) {
508 const MachineOperand &MO = Copy.getOperand(OpIdx);
509 if (!MO.isReg() || !MO.getReg())
513 else if (SrcIdx != EndOpIdx)
521 /// \brief Optimize a copy or bitcast instruction to avoid cross
522 /// register bank copy. The optimization looks through a chain of
523 /// copies and try to find a source that has a compatible register
525 /// Two register classes are considered to be compatible if they share
526 /// the same register bank.
527 /// New copies issued by this optimization are register allocator
528 /// friendly. This optimization does not remove any copy as it may
529 /// overconstraint the register allocator, but replaces some when
531 /// \pre \p MI is a Copy (MI->isCopy() is true)
532 /// \return True, when \p MI has been optimized. In that case, \p MI has
533 /// been removed from its parent.
534 bool PeepholeOptimizer::optimizeCopyOrBitcast(MachineInstr *MI) {
535 unsigned DefIdx, SrcIdx;
536 if (!MI || !getCopyOrBitcastDefUseIdx(*MI, DefIdx, SrcIdx))
539 const MachineOperand &MODef = MI->getOperand(DefIdx);
540 assert(MODef.isReg() && "Copies must be between registers.");
541 unsigned Def = MODef.getReg();
543 if (TargetRegisterInfo::isPhysicalRegister(Def))
546 const TargetRegisterClass *DefRC = MRI->getRegClass(Def);
547 unsigned DefSubReg = MODef.getSubReg();
551 bool ShouldRewrite = false;
552 const TargetRegisterInfo &TRI = *TM->getSubtargetImpl()->getRegisterInfo();
554 // Follow the chain of copies until we reach the top of the use-def chain
555 // or find a more suitable source.
556 ValueTracker ValTracker(*MI, DefIdx, DefSubReg, !DisableAdvCopyOpt, MRI);
558 unsigned CopySrcIdx, CopySrcSubReg;
559 if (!ValTracker.getNextSource(CopySrcIdx, CopySrcSubReg))
561 Src = ValTracker.getReg();
562 SrcSubReg = CopySrcSubReg;
564 // Do not extend the live-ranges of physical registers as they add
565 // constraints to the register allocator.
566 // Moreover, if we want to extend the live-range of a physical register,
567 // unlike SSA virtual register, we will have to check that they are not
568 // redefine before the related use.
569 if (TargetRegisterInfo::isPhysicalRegister(Src))
572 const TargetRegisterClass *SrcRC = MRI->getRegClass(Src);
574 // If this source does not incur a cross register bank copy, use it.
575 ShouldRewrite = shareSameRegisterFile(TRI, DefRC, DefSubReg, SrcRC,
577 } while (!ShouldRewrite);
579 // If we did not find a more suitable source, there is nothing to optimize.
580 if (!ShouldRewrite || Src == MI->getOperand(SrcIdx).getReg())
583 // Rewrite the copy to avoid a cross register bank penalty.
584 unsigned NewVR = TargetRegisterInfo::isPhysicalRegister(Def) ? Def :
585 MRI->createVirtualRegister(DefRC);
586 MachineInstr *NewCopy = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
587 TII->get(TargetOpcode::COPY), NewVR)
588 .addReg(Src, 0, SrcSubReg);
589 NewCopy->getOperand(0).setSubReg(DefSubReg);
591 MRI->replaceRegWith(Def, NewVR);
592 MRI->clearKillFlags(NewVR);
593 // We extended the lifetime of Src.
594 // Clear the kill flags to account for that.
595 MRI->clearKillFlags(Src);
596 MI->eraseFromParent();
601 /// isLoadFoldable - Check whether MI is a candidate for folding into a later
602 /// instruction. We only fold loads to virtual registers and the virtual
603 /// register defined has a single use.
604 bool PeepholeOptimizer::isLoadFoldable(
606 SmallSet<unsigned, 16> &FoldAsLoadDefCandidates) {
607 if (!MI->canFoldAsLoad() || !MI->mayLoad())
609 const MCInstrDesc &MCID = MI->getDesc();
610 if (MCID.getNumDefs() != 1)
613 unsigned Reg = MI->getOperand(0).getReg();
614 // To reduce compilation time, we check MRI->hasOneNonDBGUse when inserting
615 // loads. It should be checked when processing uses of the load, since
616 // uses can be removed during peephole.
617 if (!MI->getOperand(0).getSubReg() &&
618 TargetRegisterInfo::isVirtualRegister(Reg) &&
619 MRI->hasOneNonDBGUse(Reg)) {
620 FoldAsLoadDefCandidates.insert(Reg);
626 bool PeepholeOptimizer::isMoveImmediate(MachineInstr *MI,
627 SmallSet<unsigned, 4> &ImmDefRegs,
628 DenseMap<unsigned, MachineInstr*> &ImmDefMIs) {
629 const MCInstrDesc &MCID = MI->getDesc();
630 if (!MI->isMoveImmediate())
632 if (MCID.getNumDefs() != 1)
634 unsigned Reg = MI->getOperand(0).getReg();
635 if (TargetRegisterInfo::isVirtualRegister(Reg)) {
636 ImmDefMIs.insert(std::make_pair(Reg, MI));
637 ImmDefRegs.insert(Reg);
644 /// foldImmediate - Try folding register operands that are defined by move
645 /// immediate instructions, i.e. a trivial constant folding optimization, if
646 /// and only if the def and use are in the same BB.
647 bool PeepholeOptimizer::foldImmediate(MachineInstr *MI, MachineBasicBlock *MBB,
648 SmallSet<unsigned, 4> &ImmDefRegs,
649 DenseMap<unsigned, MachineInstr*> &ImmDefMIs) {
650 for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) {
651 MachineOperand &MO = MI->getOperand(i);
652 if (!MO.isReg() || MO.isDef())
654 unsigned Reg = MO.getReg();
655 if (!TargetRegisterInfo::isVirtualRegister(Reg))
657 if (ImmDefRegs.count(Reg) == 0)
659 DenseMap<unsigned, MachineInstr*>::iterator II = ImmDefMIs.find(Reg);
660 assert(II != ImmDefMIs.end());
661 if (TII->FoldImmediate(MI, II->second, Reg, MRI)) {
669 bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
670 if (skipOptnoneFunction(*MF.getFunction()))
673 DEBUG(dbgs() << "********** PEEPHOLE OPTIMIZER **********\n");
674 DEBUG(dbgs() << "********** Function: " << MF.getName() << '\n');
679 TM = &MF.getTarget();
680 TII = TM->getSubtargetImpl()->getInstrInfo();
681 MRI = &MF.getRegInfo();
682 DT = Aggressive ? &getAnalysis<MachineDominatorTree>() : nullptr;
684 bool Changed = false;
686 for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
687 MachineBasicBlock *MBB = &*I;
689 bool SeenMoveImm = false;
690 SmallPtrSet<MachineInstr*, 16> LocalMIs;
691 SmallSet<unsigned, 4> ImmDefRegs;
692 DenseMap<unsigned, MachineInstr*> ImmDefMIs;
693 SmallSet<unsigned, 16> FoldAsLoadDefCandidates;
695 for (MachineBasicBlock::iterator
696 MII = I->begin(), MIE = I->end(); MII != MIE; ) {
697 MachineInstr *MI = &*MII;
698 // We may be erasing MI below, increment MII now.
702 // Skip debug values. They should not affect this peephole optimization.
703 if (MI->isDebugValue())
706 // If there exists an instruction which belongs to the following
707 // categories, we will discard the load candidates.
708 if (MI->isPosition() || MI->isPHI() || MI->isImplicitDef() ||
709 MI->isKill() || MI->isInlineAsm() ||
710 MI->hasUnmodeledSideEffects()) {
711 FoldAsLoadDefCandidates.clear();
714 if (MI->mayStore() || MI->isCall())
715 FoldAsLoadDefCandidates.clear();
717 if (((MI->isBitcast() || MI->isCopy()) && optimizeCopyOrBitcast(MI)) ||
718 (MI->isCompare() && optimizeCmpInstr(MI, MBB)) ||
719 (MI->isSelect() && optimizeSelect(MI))) {
726 if (isMoveImmediate(MI, ImmDefRegs, ImmDefMIs)) {
729 Changed |= optimizeExtInstr(MI, MBB, LocalMIs);
730 // optimizeExtInstr might have created new instructions after MI
731 // and before the already incremented MII. Adjust MII so that the
732 // next iteration sees the new instructions.
736 Changed |= foldImmediate(MI, MBB, ImmDefRegs, ImmDefMIs);
739 // Check whether MI is a load candidate for folding into a later
740 // instruction. If MI is not a candidate, check whether we can fold an
741 // earlier load into MI.
742 if (!isLoadFoldable(MI, FoldAsLoadDefCandidates) &&
743 !FoldAsLoadDefCandidates.empty()) {
744 const MCInstrDesc &MIDesc = MI->getDesc();
745 for (unsigned i = MIDesc.getNumDefs(); i != MIDesc.getNumOperands();
747 const MachineOperand &MOp = MI->getOperand(i);
750 unsigned FoldAsLoadDefReg = MOp.getReg();
751 if (FoldAsLoadDefCandidates.count(FoldAsLoadDefReg)) {
752 // We need to fold load after optimizeCmpInstr, since
753 // optimizeCmpInstr can enable folding by converting SUB to CMP.
754 // Save FoldAsLoadDefReg because optimizeLoadInstr() resets it and
755 // we need it for markUsesInDebugValueAsUndef().
756 unsigned FoldedReg = FoldAsLoadDefReg;
757 MachineInstr *DefMI = nullptr;
758 MachineInstr *FoldMI = TII->optimizeLoadInstr(MI, MRI,
762 // Update LocalMIs since we replaced MI with FoldMI and deleted
764 DEBUG(dbgs() << "Replacing: " << *MI);
765 DEBUG(dbgs() << " With: " << *FoldMI);
767 LocalMIs.erase(DefMI);
768 LocalMIs.insert(FoldMI);
769 MI->eraseFromParent();
770 DefMI->eraseFromParent();
771 MRI->markUsesInDebugValueAsUndef(FoldedReg);
772 FoldAsLoadDefCandidates.erase(FoldedReg);
774 // MI is replaced with FoldMI.
787 bool ValueTracker::getNextSourceFromCopy(unsigned &SrcIdx,
788 unsigned &SrcSubReg) {
789 assert(Def->isCopy() && "Invalid definition");
790 // Copy instruction are supposed to be: Def = Src.
791 // If someone breaks this assumption, bad things will happen everywhere.
792 assert(Def->getDesc().getNumOperands() == 2 && "Invalid number of operands");
794 if (Def->getOperand(DefIdx).getSubReg() != DefSubReg)
795 // If we look for a different subreg, it means we want a subreg of src.
796 // Bails as we do not support composing subreg yet.
798 // Otherwise, we want the whole source.
800 SrcSubReg = Def->getOperand(SrcIdx).getSubReg();
804 bool ValueTracker::getNextSourceFromBitcast(unsigned &SrcIdx,
805 unsigned &SrcSubReg) {
806 assert(Def->isBitcast() && "Invalid definition");
808 // Bail if there are effects that a plain copy will not expose.
809 if (Def->hasUnmodeledSideEffects())
812 // Bitcasts with more than one def are not supported.
813 if (Def->getDesc().getNumDefs() != 1)
815 if (Def->getOperand(DefIdx).getSubReg() != DefSubReg)
816 // If we look for a different subreg, it means we want a subreg of the src.
817 // Bails as we do not support composing subreg yet.
820 SrcIdx = Def->getDesc().getNumOperands();
821 for (unsigned OpIdx = DefIdx + 1, EndOpIdx = SrcIdx; OpIdx != EndOpIdx;
823 const MachineOperand &MO = Def->getOperand(OpIdx);
824 if (!MO.isReg() || !MO.getReg())
826 assert(!MO.isDef() && "We should have skipped all the definitions by now");
827 if (SrcIdx != EndOpIdx)
832 SrcSubReg = Def->getOperand(SrcIdx).getSubReg();
836 bool ValueTracker::getNextSourceFromRegSequence(unsigned &SrcIdx,
837 unsigned &SrcSubReg) {
838 assert(Def->isRegSequence() && "Invalid definition");
840 if (Def->getOperand(DefIdx).getSubReg())
841 // If we are composing subreg, bails out.
842 // The case we are checking is Def.<subreg> = REG_SEQUENCE.
843 // This should almost never happen as the SSA property is tracked at
844 // the register level (as opposed to the subreg level).
848 // is a valid SSA representation for Def.sub0 and Def.sub1, but not for
849 // Def. Thus, it must not be generated.
850 // However, some code could theoretically generates a single
851 // Def.sub0 (i.e, not defining the other subregs) and we would
853 // If we can ascertain (or force) that this never happens, we could
854 // turn that into an assertion.
857 // We are looking at:
858 // Def = REG_SEQUENCE v0, sub0, v1, sub1, ...
859 // Check if one of the operand defines the subreg we are interested in.
860 for (unsigned OpIdx = DefIdx + 1, EndOpIdx = Def->getNumOperands();
861 OpIdx != EndOpIdx; OpIdx += 2) {
862 const MachineOperand &MOSubIdx = Def->getOperand(OpIdx + 1);
863 assert(MOSubIdx.isImm() &&
864 "One of the subindex of the reg_sequence is not an immediate");
865 if (MOSubIdx.getImm() == DefSubReg) {
866 assert(Def->getOperand(OpIdx).isReg() &&
867 "One of the source of the reg_sequence is not a register");
869 SrcSubReg = Def->getOperand(SrcIdx).getSubReg();
874 // If the subreg we are tracking is super-defined by another subreg,
875 // we could follow this value. However, this would require to compose
876 // the subreg and we do not do that for now.
880 bool ValueTracker::getNextSourceFromInsertSubreg(unsigned &SrcIdx,
881 unsigned &SrcSubReg) {
882 assert(Def->isInsertSubreg() && "Invalid definition");
883 if (Def->getOperand(DefIdx).getSubReg())
884 // If we are composing subreg, bails out.
885 // Same remark as getNextSourceFromRegSequence.
886 // I.e., this may be turned into an assert.
889 // We are looking at:
890 // Def = INSERT_SUBREG v0, v1, sub1
891 // There are two cases:
892 // 1. DefSubReg == sub1, get v1.
893 // 2. DefSubReg != sub1, the value may be available through v0.
895 // #1 Check if the inserted register matches the require sub index.
896 unsigned InsertedSubReg = Def->getOperand(3).getImm();
897 if (InsertedSubReg == DefSubReg) {
899 SrcSubReg = Def->getOperand(SrcIdx).getSubReg();
902 // #2 Otherwise, if the sub register we are looking for is not partial
903 // defined by the inserted element, we can look through the main
905 // To check the overlapping we need a MRI and a TRI.
909 const MachineOperand &MODef = Def->getOperand(DefIdx);
910 const MachineOperand &MOBase = Def->getOperand(1);
911 // If the result register (Def) and the base register (v0) do not
912 // have the same register class or if we have to compose
913 // subregisters, bails out.
914 if (MRI->getRegClass(MODef.getReg()) != MRI->getRegClass(MOBase.getReg()) ||
918 // Get the TRI and check if inserted sub register overlaps with the
919 // sub register we are tracking.
920 const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo();
922 (TRI->getSubRegIndexLaneMask(DefSubReg) &
923 TRI->getSubRegIndexLaneMask(InsertedSubReg)) != 0)
925 // At this point, the value is available in v0 via the same subreg
928 SrcSubReg = DefSubReg;
932 bool ValueTracker::getNextSourceFromExtractSubreg(unsigned &SrcIdx,
933 unsigned &SrcSubReg) {
934 assert(Def->isExtractSubreg() && "Invalid definition");
935 // We are looking at:
936 // Def = EXTRACT_SUBREG v0, sub0
938 // Bails if we have to compose sub registers.
939 // Indeed, if DefSubReg != 0, we would have to compose it with sub0.
943 // Bails if we have to compose sub registers.
944 // Likewise, if v0.subreg != 0, we would have to compose v0.subreg with sub0.
945 if (Def->getOperand(1).getSubReg())
947 // Otherwise, the value is available in the v0.sub0.
949 SrcSubReg = Def->getOperand(2).getImm();
953 bool ValueTracker::getNextSourceFromSubregToReg(unsigned &SrcIdx,
954 unsigned &SrcSubReg) {
955 assert(Def->isSubregToReg() && "Invalid definition");
956 // We are looking at:
957 // Def = SUBREG_TO_REG Imm, v0, sub0
959 // Bails if we have to compose sub registers.
960 // If DefSubReg != sub0, we would have to check that all the bits
961 // we track are included in sub0 and if yes, we would have to
962 // determine the right subreg in v0.
963 if (DefSubReg != Def->getOperand(3).getImm())
965 // Bails if we have to compose sub registers.
966 // Likewise, if v0.subreg != 0, we would have to compose it with sub0.
967 if (Def->getOperand(2).getSubReg())
971 SrcSubReg = Def->getOperand(3).getImm();
975 bool ValueTracker::getNextSourceImpl(unsigned &SrcIdx, unsigned &SrcSubReg) {
976 assert(Def && "This method needs a valid definition");
979 (DefIdx < Def->getDesc().getNumDefs() || Def->getDesc().isVariadic()) &&
980 Def->getOperand(DefIdx).isDef() && "Invalid DefIdx");
982 return getNextSourceFromCopy(SrcIdx, SrcSubReg);
983 if (Def->isBitcast())
984 return getNextSourceFromBitcast(SrcIdx, SrcSubReg);
985 // All the remaining cases involve "complex" instructions.
986 // Bails if we did not ask for the advanced tracking.
987 if (!UseAdvancedTracking)
989 if (Def->isRegSequence())
990 return getNextSourceFromRegSequence(SrcIdx, SrcSubReg);
991 if (Def->isInsertSubreg())
992 return getNextSourceFromInsertSubreg(SrcIdx, SrcSubReg);
993 if (Def->isExtractSubreg())
994 return getNextSourceFromExtractSubreg(SrcIdx, SrcSubReg);
995 if (Def->isSubregToReg())
996 return getNextSourceFromSubregToReg(SrcIdx, SrcSubReg);
1000 const MachineInstr *ValueTracker::getNextSource(unsigned &SrcIdx,
1001 unsigned &SrcSubReg) {
1002 // If we reach a point where we cannot move up in the use-def chain,
1003 // there is nothing we can get.
1007 const MachineInstr *PrevDef = nullptr;
1008 // Try to find the next source.
1009 if (getNextSourceImpl(SrcIdx, SrcSubReg)) {
1010 // Update definition, definition index, and subregister for the
1011 // next call of getNextSource.
1012 const MachineOperand &MO = Def->getOperand(SrcIdx);
1013 assert(MO.isReg() && !MO.isDef() && "Source is invalid");
1014 // Update the current register.
1016 // Update the return value before moving up in the use-def chain.
1018 // If we can still move up in the use-def chain, move to the next
1020 if (!TargetRegisterInfo::isPhysicalRegister(Reg)) {
1021 Def = MRI->getVRegDef(Reg);
1022 DefIdx = MRI->def_begin(Reg).getOperandNo();
1023 DefSubReg = SrcSubReg;
1027 // If we end up here, this means we will not be able to find another source
1028 // for the next iteration.
1029 // Make sure any new call to getNextSource bails out early by cutting the