1 //===---------- AArch64CollectLOH.cpp - AArch64 collect LOH pass --*- C++ -*-=//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file contains a pass that collect the Linker Optimization Hint (LOH).
11 // This pass should be run at the very end of the compilation flow, just before
13 // To be useful for the linker, the LOH must be printed into the assembly file.
15 // A LOH describes a sequence of instructions that may be optimized by the
17 // This same sequence cannot be optimized by the compiler because some of
18 // the information will be known at link time.
19 // For instance, consider the following sequence:
20 // L1: adrp xA, sym@PAGE
21 // L2: add xB, xA, sym@PAGEOFF
22 // L3: ldr xC, [xB, #imm]
23 // This sequence can be turned into:
24 // A literal load if sym@PAGE + sym@PAGEOFF + #imm - address(L3) is < 1MB:
25 // L3: ldr xC, sym+#imm
26 // It may also be turned into either the following more efficient
28 // - If sym@PAGEOFF + #imm fits the encoding space of L3.
29 // L1: adrp xA, sym@PAGE
30 // L3: ldr xC, [xB, sym@PAGEOFF + #imm]
31 // - If sym@PAGE + sym@PAGEOFF - address(L1) < 1MB:
33 // L3: ldr xC, [xB, #imm]
35 // To be valid a LOH must meet all the requirements needed by all the related
36 // possible linker transformations.
37 // For instance, using the running example, the constraints to emit
38 // ".loh AdrpAddLdr" are:
39 // - L1, L2, and L3 instructions are of the expected type, i.e.,
40 // respectively ADRP, ADD (immediate), and LD.
41 // - The result of L1 is used only by L2.
42 // - The register argument (xA) used in the ADD instruction is defined
44 // - The result of L2 is used only by L3.
45 // - The base address (xB) in L3 is defined only L2.
46 // - The ADRP in L1 and the ADD in L2 must reference the same symbol using
47 // @PAGE/@PAGEOFF with no additional constants
49 // Currently supported LOHs are:
50 // * So called non-ADRP-related:
51 // - .loh AdrpAddLdr L1, L2, L3:
52 // L1: adrp xA, sym@PAGE
53 // L2: add xB, xA, sym@PAGEOFF
54 // L3: ldr xC, [xB, #imm]
55 // - .loh AdrpLdrGotLdr L1, L2, L3:
56 // L1: adrp xA, sym@GOTPAGE
57 // L2: ldr xB, [xA, sym@GOTPAGEOFF]
58 // L3: ldr xC, [xB, #imm]
59 // - .loh AdrpLdr L1, L3:
60 // L1: adrp xA, sym@PAGE
61 // L3: ldr xC, [xA, sym@PAGEOFF]
62 // - .loh AdrpAddStr L1, L2, L3:
63 // L1: adrp xA, sym@PAGE
64 // L2: add xB, xA, sym@PAGEOFF
65 // L3: str xC, [xB, #imm]
66 // - .loh AdrpLdrGotStr L1, L2, L3:
67 // L1: adrp xA, sym@GOTPAGE
68 // L2: ldr xB, [xA, sym@GOTPAGEOFF]
69 // L3: str xC, [xB, #imm]
70 // - .loh AdrpAdd L1, L2:
71 // L1: adrp xA, sym@PAGE
72 // L2: add xB, xA, sym@PAGEOFF
73 // For all these LOHs, L1, L2, L3 form a simple chain:
74 // L1 result is used only by L2 and L2 result by L3.
75 // L3 LOH-related argument is defined only by L2 and L2 LOH-related argument
77 // All these LOHs aim at using more efficient load/store patterns by folding
78 // some instructions used to compute the address directly into the load/store.
80 // * So called ADRP-related:
81 // - .loh AdrpAdrp L2, L1:
82 // L2: ADRP xA, sym1@PAGE
83 // L1: ADRP xA, sym2@PAGE
84 // L2 dominates L1 and xA is not redifined between L2 and L1
85 // This LOH aims at getting rid of redundant ADRP instructions.
87 // The overall design for emitting the LOHs is:
88 // 1. AArch64CollectLOH (this pass) records the LOHs in the AArch64FunctionInfo.
89 // 2. AArch64AsmPrinter reads the LOHs from AArch64FunctionInfo and it:
90 // 1. Associates them a label.
91 // 2. Emits them in a MCStreamer (EmitLOHDirective).
92 // - The MCMachOStreamer records them into the MCAssembler.
93 // - The MCAsmStreamer prints them.
94 // - Other MCStreamers ignore them.
95 // 3. Closes the MCStreamer:
96 // - The MachObjectWriter gets them from the MCAssembler and writes
97 // them in the object file.
98 // - Other ObjectWriters ignore them.
99 //===----------------------------------------------------------------------===//
102 #include "AArch64InstrInfo.h"
103 #include "AArch64MachineFunctionInfo.h"
104 #include "MCTargetDesc/AArch64AddressingModes.h"
105 #include "llvm/ADT/BitVector.h"
106 #include "llvm/ADT/DenseMap.h"
107 #include "llvm/ADT/MapVector.h"
108 #include "llvm/ADT/SetVector.h"
109 #include "llvm/ADT/SmallVector.h"
110 #include "llvm/ADT/Statistic.h"
111 #include "llvm/CodeGen/MachineBasicBlock.h"
112 #include "llvm/CodeGen/MachineDominators.h"
113 #include "llvm/CodeGen/MachineFunctionPass.h"
114 #include "llvm/CodeGen/MachineInstr.h"
115 #include "llvm/CodeGen/MachineInstrBuilder.h"
116 #include "llvm/Support/CommandLine.h"
117 #include "llvm/Support/Debug.h"
118 #include "llvm/Support/ErrorHandling.h"
119 #include "llvm/Support/raw_ostream.h"
120 #include "llvm/Target/TargetInstrInfo.h"
121 #include "llvm/Target/TargetMachine.h"
122 #include "llvm/Target/TargetRegisterInfo.h"
123 using namespace llvm;
125 #define DEBUG_TYPE "aarch64-collect-loh"
128 PreCollectRegister("aarch64-collect-loh-pre-collect-register", cl::Hidden,
129 cl::desc("Restrict analysis to registers invovled"
134 BasicBlockScopeOnly("aarch64-collect-loh-bb-only", cl::Hidden,
135 cl::desc("Restrict analysis at basic block scope"),
138 STATISTIC(NumADRPSimpleCandidate,
139 "Number of simplifiable ADRP dominate by another");
140 STATISTIC(NumADRPComplexCandidate2,
141 "Number of simplifiable ADRP reachable by 2 defs");
142 STATISTIC(NumADRPComplexCandidate3,
143 "Number of simplifiable ADRP reachable by 3 defs");
144 STATISTIC(NumADRPComplexCandidateOther,
145 "Number of simplifiable ADRP reachable by 4 or more defs");
146 STATISTIC(NumADDToSTRWithImm,
147 "Number of simplifiable STR with imm reachable by ADD");
148 STATISTIC(NumLDRToSTRWithImm,
149 "Number of simplifiable STR with imm reachable by LDR");
150 STATISTIC(NumADDToSTR, "Number of simplifiable STR reachable by ADD");
151 STATISTIC(NumLDRToSTR, "Number of simplifiable STR reachable by LDR");
152 STATISTIC(NumADDToLDRWithImm,
153 "Number of simplifiable LDR with imm reachable by ADD");
154 STATISTIC(NumLDRToLDRWithImm,
155 "Number of simplifiable LDR with imm reachable by LDR");
156 STATISTIC(NumADDToLDR, "Number of simplifiable LDR reachable by ADD");
157 STATISTIC(NumLDRToLDR, "Number of simplifiable LDR reachable by LDR");
158 STATISTIC(NumADRPToLDR, "Number of simplifiable LDR reachable by ADRP");
159 STATISTIC(NumCplxLvl1, "Number of complex case of level 1");
160 STATISTIC(NumTooCplxLvl1, "Number of too complex case of level 1");
161 STATISTIC(NumCplxLvl2, "Number of complex case of level 2");
162 STATISTIC(NumTooCplxLvl2, "Number of too complex case of level 2");
163 STATISTIC(NumADRSimpleCandidate, "Number of simplifiable ADRP + ADD");
164 STATISTIC(NumADRComplexCandidate, "Number of too complex ADRP + ADD");
167 void initializeAArch64CollectLOHPass(PassRegistry &);
171 struct AArch64CollectLOH : public MachineFunctionPass {
173 AArch64CollectLOH() : MachineFunctionPass(ID) {
174 initializeAArch64CollectLOHPass(*PassRegistry::getPassRegistry());
177 bool runOnMachineFunction(MachineFunction &MF) override;
179 const char *getPassName() const override {
180 return "AArch64 Collect Linker Optimization Hint (LOH)";
183 void getAnalysisUsage(AnalysisUsage &AU) const override {
184 AU.setPreservesAll();
185 MachineFunctionPass::getAnalysisUsage(AU);
186 AU.addRequired<MachineDominatorTree>();
192 /// A set of MachineInstruction.
193 typedef SetVector<const MachineInstr *> SetOfMachineInstr;
194 /// Map a basic block to a set of instructions per register.
195 /// This is used to represent the exposed uses of a basic block
197 typedef MapVector<const MachineBasicBlock *, SetOfMachineInstr *>
198 BlockToSetOfInstrsPerColor;
199 /// Map a basic block to an instruction per register.
200 /// This is used to represent the live-out definitions of a basic block
202 typedef MapVector<const MachineBasicBlock *, const MachineInstr **>
203 BlockToInstrPerColor;
204 /// Map an instruction to a set of instructions. Used to represent the
205 /// mapping def to reachable uses or use to definitions.
206 typedef MapVector<const MachineInstr *, SetOfMachineInstr> InstrToInstrs;
207 /// Map a basic block to a BitVector.
208 /// This is used to record the kill registers per basic block.
209 typedef MapVector<const MachineBasicBlock *, BitVector> BlockToRegSet;
211 /// Map a register to a dense id.
212 typedef DenseMap<unsigned, unsigned> MapRegToId;
213 /// Map a dense id to a register. Used for debug purposes.
214 typedef SmallVector<unsigned, 32> MapIdToReg;
215 } // end anonymous namespace.
217 char AArch64CollectLOH::ID = 0;
219 INITIALIZE_PASS_BEGIN(AArch64CollectLOH, "aarch64-collect-loh",
220 "AArch64 Collect Linker Optimization Hint (LOH)", false,
222 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
223 INITIALIZE_PASS_END(AArch64CollectLOH, "aarch64-collect-loh",
224 "AArch64 Collect Linker Optimization Hint (LOH)", false,
227 /// Given a couple (MBB, reg) get the corresponding set of instruction from
228 /// the given "sets".
229 /// If this couple does not reference any set, an empty set is added to "sets"
230 /// for this couple and returned.
231 /// \param nbRegs is used internally allocate some memory. It must be consistent
232 /// with the way sets is used.
233 static SetOfMachineInstr &getSet(BlockToSetOfInstrsPerColor &sets,
234 const MachineBasicBlock &MBB, unsigned reg,
236 SetOfMachineInstr *result;
237 BlockToSetOfInstrsPerColor::iterator it = sets.find(&MBB);
238 if (it != sets.end())
241 result = sets[&MBB] = new SetOfMachineInstr[nbRegs];
246 /// Given a couple (reg, MI) get the corresponding set of instructions from the
247 /// the given "sets".
248 /// This is used to get the uses record in sets of a definition identified by
249 /// MI and reg, i.e., MI defines reg.
250 /// If the couple does not reference anything, an empty set is added to
252 /// \pre set[reg] is valid.
253 static SetOfMachineInstr &getUses(InstrToInstrs *sets, unsigned reg,
254 const MachineInstr &MI) {
255 return sets[reg][&MI];
258 /// Same as getUses but does not modify the input map: sets.
259 /// \return NULL if the couple (reg, MI) is not in sets.
260 static const SetOfMachineInstr *getUses(const InstrToInstrs *sets, unsigned reg,
261 const MachineInstr &MI) {
262 InstrToInstrs::const_iterator Res = sets[reg].find(&MI);
263 if (Res != sets[reg].end())
264 return &(Res->second);
268 /// Initialize the reaching definition algorithm:
269 /// For each basic block BB in MF, record:
271 /// - its reachable uses (uses that are exposed to BB's predecessors).
272 /// - its the generated definitions.
273 /// \param DummyOp if not NULL, specifies a Dummy Operation to be added to
274 /// the list of uses of exposed defintions.
275 /// \param ADRPMode specifies to only consider ADRP instructions for generated
276 /// definition. It also consider definitions of ADRP instructions as uses and
277 /// ignore other uses. The ADRPMode is used to collect the information for LHO
278 /// that involve ADRP operation only.
279 static void initReachingDef(MachineFunction &MF,
280 InstrToInstrs *ColorOpToReachedUses,
281 BlockToInstrPerColor &Gen, BlockToRegSet &Kill,
282 BlockToSetOfInstrsPerColor &ReachableUses,
283 const MapRegToId &RegToId,
284 const MachineInstr *DummyOp, bool ADRPMode) {
285 const TargetMachine &TM = MF.getTarget();
286 const TargetRegisterInfo *TRI = TM.getRegisterInfo();
288 unsigned NbReg = RegToId.size();
290 for (MachineBasicBlock &MBB : MF) {
291 const MachineInstr **&BBGen = Gen[&MBB];
292 BBGen = new const MachineInstr *[NbReg];
293 memset(BBGen, 0, sizeof(const MachineInstr *) * NbReg);
295 BitVector &BBKillSet = Kill[&MBB];
296 BBKillSet.resize(NbReg);
297 for (const MachineInstr &MI : MBB) {
298 bool IsADRP = MI.getOpcode() == AArch64::ADRP;
300 // Process uses first.
301 if (IsADRP || !ADRPMode)
302 for (const MachineOperand &MO : MI.operands()) {
303 // Treat ADRP def as use, as the goal of the analysis is to find
304 // ADRP defs reached by other ADRP defs.
305 if (!MO.isReg() || (!ADRPMode && !MO.isUse()) ||
306 (ADRPMode && (!IsADRP || !MO.isDef())))
308 unsigned CurReg = MO.getReg();
309 MapRegToId::const_iterator ItCurRegId = RegToId.find(CurReg);
310 if (ItCurRegId == RegToId.end())
312 CurReg = ItCurRegId->second;
314 // if CurReg has not been defined, this use is reachable.
315 if (!BBGen[CurReg] && !BBKillSet.test(CurReg))
316 getSet(ReachableUses, MBB, CurReg, NbReg).insert(&MI);
317 // current basic block definition for this color, if any, is in Gen.
319 getUses(ColorOpToReachedUses, CurReg, *BBGen[CurReg]).insert(&MI);
323 for (const MachineOperand &MO : MI.operands()) {
326 // Clobbers kill the related colors.
327 const uint32_t *PreservedRegs = MO.getRegMask();
329 // Set generated regs.
330 for (const auto Entry : RegToId) {
331 unsigned Reg = Entry.second;
332 // Use the global register ID when querying APIs external to this
334 if (MachineOperand::clobbersPhysReg(PreservedRegs, Entry.first)) {
335 // Do not register clobbered definition for no ADRP.
336 // This definition is not used anyway (otherwise register
337 // allocation is wrong).
338 BBGen[Reg] = ADRPMode ? &MI : nullptr;
344 // Process register defs.
345 for (const MachineOperand &MO : MI.operands()) {
346 if (!MO.isReg() || !MO.isDef())
348 unsigned CurReg = MO.getReg();
349 MapRegToId::const_iterator ItCurRegId = RegToId.find(CurReg);
350 if (ItCurRegId == RegToId.end())
353 for (MCRegAliasIterator AI(CurReg, TRI, true); AI.isValid(); ++AI) {
354 MapRegToId::const_iterator ItRegId = RegToId.find(*AI);
355 assert(ItRegId != RegToId.end() &&
356 "Sub-register of an "
357 "involved register, not recorded as involved!");
358 BBKillSet.set(ItRegId->second);
359 BBGen[ItRegId->second] = &MI;
361 BBGen[ItCurRegId->second] = &MI;
365 // If we restrict our analysis to basic block scope, conservatively add a
367 // use for each generated value.
368 if (!ADRPMode && DummyOp && !MBB.succ_empty())
369 for (unsigned CurReg = 0; CurReg < NbReg; ++CurReg)
371 getUses(ColorOpToReachedUses, CurReg, *BBGen[CurReg]).insert(DummyOp);
375 /// Reaching def core algorithm:
376 /// while an Out has changed
379 /// In[bb][color] = U Out[bb.predecessors][color]
380 /// insert reachableUses[bb][color] in each in[bb][color]
383 /// Out[bb] = Gen[bb] U (In[bb] - Kill[bb])
384 static void reachingDefAlgorithm(MachineFunction &MF,
385 InstrToInstrs *ColorOpToReachedUses,
386 BlockToSetOfInstrsPerColor &In,
387 BlockToSetOfInstrsPerColor &Out,
388 BlockToInstrPerColor &Gen, BlockToRegSet &Kill,
389 BlockToSetOfInstrsPerColor &ReachableUses,
394 for (MachineBasicBlock &MBB : MF) {
396 for (CurReg = 0; CurReg < NbReg; ++CurReg) {
397 SetOfMachineInstr &BBInSet = getSet(In, MBB, CurReg, NbReg);
398 SetOfMachineInstr &BBReachableUses =
399 getSet(ReachableUses, MBB, CurReg, NbReg);
400 SetOfMachineInstr &BBOutSet = getSet(Out, MBB, CurReg, NbReg);
401 unsigned Size = BBOutSet.size();
402 // In[bb][color] = U Out[bb.predecessors][color]
403 for (MachineBasicBlock *PredMBB : MBB.predecessors()) {
404 SetOfMachineInstr &PredOutSet = getSet(Out, *PredMBB, CurReg, NbReg);
405 BBInSet.insert(PredOutSet.begin(), PredOutSet.end());
407 // insert reachableUses[bb][color] in each in[bb][color] op.reachedses
408 for (const MachineInstr *MI : BBInSet) {
409 SetOfMachineInstr &OpReachedUses =
410 getUses(ColorOpToReachedUses, CurReg, *MI);
411 OpReachedUses.insert(BBReachableUses.begin(), BBReachableUses.end());
413 // Out[bb] = Gen[bb] U (In[bb] - Kill[bb])
414 if (!Kill[&MBB].test(CurReg))
415 BBOutSet.insert(BBInSet.begin(), BBInSet.end());
416 if (Gen[&MBB][CurReg])
417 BBOutSet.insert(Gen[&MBB][CurReg]);
418 HasChanged |= BBOutSet.size() != Size;
421 } while (HasChanged);
424 /// Release all memory dynamically allocated during the reaching
425 /// definition algorithm.
426 static void finitReachingDef(BlockToSetOfInstrsPerColor &In,
427 BlockToSetOfInstrsPerColor &Out,
428 BlockToInstrPerColor &Gen,
429 BlockToSetOfInstrsPerColor &ReachableUses) {
434 for (auto &IT : ReachableUses)
440 /// Reaching definition algorithm.
441 /// \param MF function on which the algorithm will operate.
442 /// \param[out] ColorOpToReachedUses will contain the result of the reaching
444 /// \param ADRPMode specify whether the reaching def algorithm should be tuned
445 /// for ADRP optimization. \see initReachingDef for more details.
446 /// \param DummyOp if not NULL, the algorithm will work at
447 /// basic block scope and will set for every exposed definition a use to
449 /// \pre ColorOpToReachedUses is an array of at least number of registers of
451 static void reachingDef(MachineFunction &MF,
452 InstrToInstrs *ColorOpToReachedUses,
453 const MapRegToId &RegToId, bool ADRPMode = false,
454 const MachineInstr *DummyOp = nullptr) {
456 // For each basic block.
457 // Out: a set per color of definitions that reach the
458 // out boundary of this block.
459 // In: Same as Out but for in boundary.
460 // Gen: generated color in this block (one operation per color).
461 // Kill: register set of killed color in this block.
462 // ReachableUses: a set per color of uses (operation) reachable
463 // for "In" definitions.
464 BlockToSetOfInstrsPerColor Out, In, ReachableUses;
465 BlockToInstrPerColor Gen;
468 // Initialize Gen, kill and reachableUses.
469 initReachingDef(MF, ColorOpToReachedUses, Gen, Kill, ReachableUses, RegToId,
474 reachingDefAlgorithm(MF, ColorOpToReachedUses, In, Out, Gen, Kill,
475 ReachableUses, RegToId.size());
478 finitReachingDef(In, Out, Gen, ReachableUses);
482 /// print the result of the reaching definition algorithm.
483 static void printReachingDef(const InstrToInstrs *ColorOpToReachedUses,
484 unsigned NbReg, const TargetRegisterInfo *TRI,
485 const MapIdToReg &IdToReg) {
487 for (CurReg = 0; CurReg < NbReg; ++CurReg) {
488 if (ColorOpToReachedUses[CurReg].empty())
490 DEBUG(dbgs() << "*** Reg " << PrintReg(IdToReg[CurReg], TRI) << " ***\n");
492 for (const auto &DefsIt : ColorOpToReachedUses[CurReg]) {
493 DEBUG(dbgs() << "Def:\n");
494 DEBUG(DefsIt.first->print(dbgs()));
495 DEBUG(dbgs() << "Reachable uses:\n");
496 for (const MachineInstr *MI : DefsIt.second) {
497 DEBUG(MI->print(dbgs()));
504 /// Answer the following question: Can Def be one of the definition
505 /// involved in a part of a LOH?
506 static bool canDefBePartOfLOH(const MachineInstr *Def) {
507 unsigned Opc = Def->getOpcode();
508 // Accept ADRP, ADDLow and LOADGot.
514 case AArch64::ADDXri:
515 // Check immediate to see if the immediate is an address.
516 switch (Def->getOperand(2).getType()) {
519 case MachineOperand::MO_GlobalAddress:
520 case MachineOperand::MO_JumpTableIndex:
521 case MachineOperand::MO_ConstantPoolIndex:
522 case MachineOperand::MO_BlockAddress:
525 case AArch64::LDRXui:
526 // Check immediate to see if the immediate is an address.
527 switch (Def->getOperand(2).getType()) {
530 case MachineOperand::MO_GlobalAddress:
538 /// Check whether the given instruction can the end of a LOH chain involving a
540 static bool isCandidateStore(const MachineInstr *Instr) {
541 switch (Instr->getOpcode()) {
544 case AArch64::STRBui:
545 case AArch64::STRHui:
546 case AArch64::STRWui:
547 case AArch64::STRXui:
548 case AArch64::STRSui:
549 case AArch64::STRDui:
550 case AArch64::STRQui:
551 // In case we have str xA, [xA, #imm], this is two different uses
552 // of xA and we cannot fold, otherwise the xA stored may be wrong,
553 // even if #imm == 0.
554 if (Instr->getOperand(0).getReg() != Instr->getOperand(1).getReg())
560 /// Given the result of a reaching definition algorithm in ColorOpToReachedUses,
561 /// Build the Use to Defs information and filter out obvious non-LOH candidates.
562 /// In ADRPMode, non-LOH candidates are "uses" with non-ADRP definitions.
563 /// In non-ADRPMode, non-LOH candidates are "uses" with several definition,
564 /// i.e., no simple chain.
565 /// \param ADRPMode -- \see initReachingDef.
566 static void reachedUsesToDefs(InstrToInstrs &UseToReachingDefs,
567 const InstrToInstrs *ColorOpToReachedUses,
568 const MapRegToId &RegToId,
569 bool ADRPMode = false) {
571 SetOfMachineInstr NotCandidate;
572 unsigned NbReg = RegToId.size();
573 MapRegToId::const_iterator EndIt = RegToId.end();
574 for (unsigned CurReg = 0; CurReg < NbReg; ++CurReg) {
575 // If this color is never defined, continue.
576 if (ColorOpToReachedUses[CurReg].empty())
579 for (const auto &DefsIt : ColorOpToReachedUses[CurReg]) {
580 for (const MachineInstr *MI : DefsIt.second) {
581 const MachineInstr *Def = DefsIt.first;
582 MapRegToId::const_iterator It;
583 // if all the reaching defs are not adrp, this use will not be
585 if ((ADRPMode && Def->getOpcode() != AArch64::ADRP) ||
586 (!ADRPMode && !canDefBePartOfLOH(Def)) ||
587 (!ADRPMode && isCandidateStore(MI) &&
588 // store are LOH candidate iff the end of the chain is used as
590 ((It = RegToId.find((MI)->getOperand(1).getReg())) == EndIt ||
591 It->second != CurReg))) {
592 NotCandidate.insert(MI);
595 // Do not consider self reaching as a simplifiable case for ADRP.
596 if (!ADRPMode || MI != DefsIt.first) {
597 UseToReachingDefs[MI].insert(DefsIt.first);
598 // If UsesIt has several reaching definitions, it is not
599 // candidate for simplificaton in non-ADRPMode.
600 if (!ADRPMode && UseToReachingDefs[MI].size() > 1)
601 NotCandidate.insert(MI);
606 for (const MachineInstr *Elem : NotCandidate) {
607 DEBUG(dbgs() << "Too many reaching defs: " << *Elem << "\n");
608 // It would have been better if we could just remove the entry
609 // from the map. Because of that, we have to filter the garbage
610 // (second.empty) in the subsequence analysis.
611 UseToReachingDefs[Elem].clear();
615 /// Based on the use to defs information (in ADRPMode), compute the
616 /// opportunities of LOH ADRP-related.
617 static void computeADRP(const InstrToInstrs &UseToDefs,
618 AArch64FunctionInfo &AArch64FI,
619 const MachineDominatorTree *MDT) {
620 DEBUG(dbgs() << "*** Compute LOH for ADRP\n");
621 for (const auto &Entry : UseToDefs) {
622 unsigned Size = Entry.second.size();
626 const MachineInstr *L2 = *Entry.second.begin();
627 const MachineInstr *L1 = Entry.first;
628 if (!MDT->dominates(L2, L1)) {
629 DEBUG(dbgs() << "Dominance check failed:\n" << *L2 << '\n' << *L1
633 DEBUG(dbgs() << "Record AdrpAdrp:\n" << *L2 << '\n' << *L1 << '\n');
634 SmallVector<const MachineInstr *, 2> Args;
637 AArch64FI.addLOHDirective(MCLOH_AdrpAdrp, Args);
638 ++NumADRPSimpleCandidate;
642 ++NumADRPComplexCandidate2;
644 ++NumADRPComplexCandidate3;
646 ++NumADRPComplexCandidateOther;
648 // if Size < 1, the use should have been removed from the candidates
649 assert(Size >= 1 && "No reaching defs for that use!");
653 /// Check whether the given instruction can be the end of a LOH chain
654 /// involving a load.
655 static bool isCandidateLoad(const MachineInstr *Instr) {
656 switch (Instr->getOpcode()) {
659 case AArch64::LDRSBWui:
660 case AArch64::LDRSBXui:
661 case AArch64::LDRSHWui:
662 case AArch64::LDRSHXui:
663 case AArch64::LDRSWui:
664 case AArch64::LDRBui:
665 case AArch64::LDRHui:
666 case AArch64::LDRWui:
667 case AArch64::LDRXui:
668 case AArch64::LDRSui:
669 case AArch64::LDRDui:
670 case AArch64::LDRQui:
671 if (Instr->getOperand(2).getTargetFlags() & AArch64II::MO_GOT)
679 /// Check whether the given instruction can load a litteral.
680 static bool supportLoadFromLiteral(const MachineInstr *Instr) {
681 switch (Instr->getOpcode()) {
684 case AArch64::LDRSWui:
685 case AArch64::LDRWui:
686 case AArch64::LDRXui:
687 case AArch64::LDRSui:
688 case AArch64::LDRDui:
689 case AArch64::LDRQui:
696 /// Check whether the given instruction is a LOH candidate.
697 /// \param UseToDefs is used to check that Instr is at the end of LOH supported
699 /// \pre UseToDefs contains only on def per use, i.e., obvious non candidate are
700 /// already been filtered out.
701 static bool isCandidate(const MachineInstr *Instr,
702 const InstrToInstrs &UseToDefs,
703 const MachineDominatorTree *MDT) {
704 if (!isCandidateLoad(Instr) && !isCandidateStore(Instr))
707 const MachineInstr *Def = *UseToDefs.find(Instr)->second.begin();
708 if (Def->getOpcode() != AArch64::ADRP) {
709 // At this point, Def is ADDXri or LDRXui of the right type of
710 // symbol, because we filtered out the uses that were not defined
711 // by these kind of instructions (+ ADRP).
713 // Check if this forms a simple chain: each intermediate node must
714 // dominates the next one.
715 if (!MDT->dominates(Def, Instr))
717 // Move one node up in the simple chain.
718 if (UseToDefs.find(Def) ==
720 // The map may contain garbage we have to ignore.
722 UseToDefs.find(Def)->second.empty())
725 Def = *UseToDefs.find(Def)->second.begin();
727 // Check if we reached the top of the simple chain:
729 // - check the simple chain property: each intermediate node must
730 // dominates the next one.
731 if (Def->getOpcode() == AArch64::ADRP)
732 return MDT->dominates(Def, Instr);
736 static bool registerADRCandidate(const MachineInstr &Use,
737 const InstrToInstrs &UseToDefs,
738 const InstrToInstrs *DefsPerColorToUses,
739 AArch64FunctionInfo &AArch64FI,
740 SetOfMachineInstr *InvolvedInLOHs,
741 const MapRegToId &RegToId) {
742 // Look for opportunities to turn ADRP -> ADD or
743 // ADRP -> LDR GOTPAGEOFF into ADR.
744 // If ADRP has more than one use. Give up.
745 if (Use.getOpcode() != AArch64::ADDXri &&
746 (Use.getOpcode() != AArch64::LDRXui ||
747 !(Use.getOperand(2).getTargetFlags() & AArch64II::MO_GOT)))
749 InstrToInstrs::const_iterator It = UseToDefs.find(&Use);
750 // The map may contain garbage that we need to ignore.
751 if (It == UseToDefs.end() || It->second.empty())
753 const MachineInstr &Def = **It->second.begin();
754 if (Def.getOpcode() != AArch64::ADRP)
756 // Check the number of users of ADRP.
757 const SetOfMachineInstr *Users =
758 getUses(DefsPerColorToUses,
759 RegToId.find(Def.getOperand(0).getReg())->second, Def);
760 if (Users->size() > 1) {
761 ++NumADRComplexCandidate;
764 ++NumADRSimpleCandidate;
765 assert((!InvolvedInLOHs || InvolvedInLOHs->insert(&Def)) &&
766 "ADRP already involved in LOH.");
767 assert((!InvolvedInLOHs || InvolvedInLOHs->insert(&Use)) &&
768 "ADD already involved in LOH.");
769 DEBUG(dbgs() << "Record AdrpAdd\n" << Def << '\n' << Use << '\n');
771 SmallVector<const MachineInstr *, 2> Args;
772 Args.push_back(&Def);
773 Args.push_back(&Use);
775 AArch64FI.addLOHDirective(Use.getOpcode() == AArch64::ADDXri ? MCLOH_AdrpAdd
781 /// Based on the use to defs information (in non-ADRPMode), compute the
782 /// opportunities of LOH non-ADRP-related
783 static void computeOthers(const InstrToInstrs &UseToDefs,
784 const InstrToInstrs *DefsPerColorToUses,
785 AArch64FunctionInfo &AArch64FI, const MapRegToId &RegToId,
786 const MachineDominatorTree *MDT) {
787 SetOfMachineInstr *InvolvedInLOHs = nullptr;
789 SetOfMachineInstr InvolvedInLOHsStorage;
790 InvolvedInLOHs = &InvolvedInLOHsStorage;
792 DEBUG(dbgs() << "*** Compute LOH for Others\n");
793 // ADRP -> ADD/LDR -> LDR/STR pattern.
794 // Fall back to ADRP -> ADD pattern if we fail to catch the bigger pattern.
796 // FIXME: When the statistics are not important,
797 // This initial filtering loop can be merged into the next loop.
798 // Currently, we didn't do it to have the same code for both DEBUG and
799 // NDEBUG builds. Indeed, the iterator of the second loop would need
801 SetOfMachineInstr PotentialCandidates;
802 SetOfMachineInstr PotentialADROpportunities;
803 for (auto &Use : UseToDefs) {
804 // If no definition is available, this is a non candidate.
805 if (Use.second.empty())
807 // Keep only instructions that are load or store and at the end of
808 // a ADRP -> ADD/LDR/Nothing chain.
809 // We already filtered out the no-chain cases.
810 if (!isCandidate(Use.first, UseToDefs, MDT)) {
811 PotentialADROpportunities.insert(Use.first);
814 PotentialCandidates.insert(Use.first);
817 // Make the following distinctions for statistics as the linker does
818 // know how to decode instructions:
819 // - ADD/LDR/Nothing make there different patterns.
820 // - LDR/STR make two different patterns.
821 // Hence, 6 - 1 base patterns.
822 // (because ADRP-> Nothing -> STR is not simplifiable)
824 // The linker is only able to have a simple semantic, i.e., if pattern A
826 // However, we want to see the opportunity we may miss if we were able to
827 // catch more complex cases.
829 // PotentialCandidates are result of a chain ADRP -> ADD/LDR ->
830 // A potential candidate becomes a candidate, if its current immediate
831 // operand is zero and all nodes of the chain have respectively only one user
833 SetOfMachineInstr DefsOfPotentialCandidates;
835 for (const MachineInstr *Candidate : PotentialCandidates) {
836 // Get the definition of the candidate i.e., ADD or LDR.
837 const MachineInstr *Def = *UseToDefs.find(Candidate)->second.begin();
838 // Record the elements of the chain.
839 const MachineInstr *L1 = Def;
840 const MachineInstr *L2 = nullptr;
841 unsigned ImmediateDefOpc = Def->getOpcode();
842 if (Def->getOpcode() != AArch64::ADRP) {
843 // Check the number of users of this node.
844 const SetOfMachineInstr *Users =
845 getUses(DefsPerColorToUses,
846 RegToId.find(Def->getOperand(0).getReg())->second, *Def);
847 if (Users->size() > 1) {
849 // if all the uses of this def are in potential candidate, this is
850 // a complex candidate of level 2.
851 bool IsLevel2 = true;
852 for (const MachineInstr *MI : *Users) {
853 if (!PotentialCandidates.count(MI)) {
862 PotentialADROpportunities.insert(Def);
866 Def = *UseToDefs.find(Def)->second.begin();
868 } // else the element in the middle of the chain is nothing, thus
869 // Def already contains the first element of the chain.
871 // Check the number of users of the first node in the chain, i.e., ADRP
872 const SetOfMachineInstr *Users =
873 getUses(DefsPerColorToUses,
874 RegToId.find(Def->getOperand(0).getReg())->second, *Def);
875 if (Users->size() > 1) {
877 // if all the uses of this def are in the defs of the potential candidate,
878 // this is a complex candidate of level 1
879 if (DefsOfPotentialCandidates.empty()) {
881 DefsOfPotentialCandidates = PotentialCandidates;
882 for (const MachineInstr *Candidate : PotentialCandidates) {
883 if (!UseToDefs.find(Candidate)->second.empty())
884 DefsOfPotentialCandidates.insert(
885 *UseToDefs.find(Candidate)->second.begin());
889 for (auto &Use : *Users) {
890 if (!DefsOfPotentialCandidates.count(Use)) {
902 bool IsL2Add = (ImmediateDefOpc == AArch64::ADDXri);
903 // If the chain is three instructions long and ldr is the second element,
904 // then this ldr must load form GOT, otherwise this is not a correct chain.
905 if (L2 && !IsL2Add && L2->getOperand(2).getTargetFlags() != AArch64II::MO_GOT)
907 SmallVector<const MachineInstr *, 3> Args;
909 if (isCandidateLoad(Candidate)) {
911 // At this point, the candidate LOH indicates that the ldr instruction
912 // may use a direct access to the symbol. There is not such encoding
913 // for loads of byte and half.
914 if (!supportLoadFromLiteral(Candidate))
917 DEBUG(dbgs() << "Record AdrpLdr:\n" << *L1 << '\n' << *Candidate
919 Kind = MCLOH_AdrpLdr;
921 Args.push_back(Candidate);
922 assert((!InvolvedInLOHs || InvolvedInLOHs->insert(L1)) &&
923 "L1 already involved in LOH.");
924 assert((!InvolvedInLOHs || InvolvedInLOHs->insert(Candidate)) &&
925 "Candidate already involved in LOH.");
928 DEBUG(dbgs() << "Record Adrp" << (IsL2Add ? "Add" : "LdrGot")
929 << "Ldr:\n" << *L1 << '\n' << *L2 << '\n' << *Candidate
932 Kind = IsL2Add ? MCLOH_AdrpAddLdr : MCLOH_AdrpLdrGotLdr;
935 Args.push_back(Candidate);
937 PotentialADROpportunities.remove(L2);
938 assert((!InvolvedInLOHs || InvolvedInLOHs->insert(L1)) &&
939 "L1 already involved in LOH.");
940 assert((!InvolvedInLOHs || InvolvedInLOHs->insert(L2)) &&
941 "L2 already involved in LOH.");
942 assert((!InvolvedInLOHs || InvolvedInLOHs->insert(Candidate)) &&
943 "Candidate already involved in LOH.");
945 // get the immediate of the load
946 if (Candidate->getOperand(2).getImm() == 0)
947 if (ImmediateDefOpc == AArch64::ADDXri)
951 else if (ImmediateDefOpc == AArch64::ADDXri)
952 ++NumADDToLDRWithImm;
954 ++NumLDRToLDRWithImm;
958 if (ImmediateDefOpc == AArch64::ADRP)
962 DEBUG(dbgs() << "Record Adrp" << (IsL2Add ? "Add" : "LdrGot")
963 << "Str:\n" << *L1 << '\n' << *L2 << '\n' << *Candidate
966 Kind = IsL2Add ? MCLOH_AdrpAddStr : MCLOH_AdrpLdrGotStr;
969 Args.push_back(Candidate);
971 PotentialADROpportunities.remove(L2);
972 assert((!InvolvedInLOHs || InvolvedInLOHs->insert(L1)) &&
973 "L1 already involved in LOH.");
974 assert((!InvolvedInLOHs || InvolvedInLOHs->insert(L2)) &&
975 "L2 already involved in LOH.");
976 assert((!InvolvedInLOHs || InvolvedInLOHs->insert(Candidate)) &&
977 "Candidate already involved in LOH.");
979 // get the immediate of the store
980 if (Candidate->getOperand(2).getImm() == 0)
981 if (ImmediateDefOpc == AArch64::ADDXri)
985 else if (ImmediateDefOpc == AArch64::ADDXri)
986 ++NumADDToSTRWithImm;
988 ++NumLDRToSTRWithImm;
992 AArch64FI.addLOHDirective(Kind, Args);
995 // Now, we grabbed all the big patterns, check ADR opportunities.
996 for (const MachineInstr *Candidate : PotentialADROpportunities)
997 registerADRCandidate(*Candidate, UseToDefs, DefsPerColorToUses, AArch64FI,
998 InvolvedInLOHs, RegToId);
1001 /// Look for every register defined by potential LOHs candidates.
1002 /// Map these registers with dense id in @p RegToId and vice-versa in
1003 /// @p IdToReg. @p IdToReg is populated only in DEBUG mode.
1004 static void collectInvolvedReg(MachineFunction &MF, MapRegToId &RegToId,
1005 MapIdToReg &IdToReg,
1006 const TargetRegisterInfo *TRI) {
1007 unsigned CurRegId = 0;
1008 if (!PreCollectRegister) {
1009 unsigned NbReg = TRI->getNumRegs();
1010 for (; CurRegId < NbReg; ++CurRegId) {
1011 RegToId[CurRegId] = CurRegId;
1012 DEBUG(IdToReg.push_back(CurRegId));
1013 DEBUG(assert(IdToReg[CurRegId] == CurRegId && "Reg index mismatches"));
1018 DEBUG(dbgs() << "** Collect Involved Register\n");
1019 for (const auto &MBB : MF) {
1020 for (const MachineInstr &MI : MBB) {
1021 if (!canDefBePartOfLOH(&MI))
1025 for (MachineInstr::const_mop_iterator IO = MI.operands_begin(),
1026 IOEnd = MI.operands_end();
1027 IO != IOEnd; ++IO) {
1028 if (!IO->isReg() || !IO->isDef())
1030 unsigned CurReg = IO->getReg();
1031 for (MCRegAliasIterator AI(CurReg, TRI, true); AI.isValid(); ++AI)
1032 if (RegToId.find(*AI) == RegToId.end()) {
1033 DEBUG(IdToReg.push_back(*AI);
1034 assert(IdToReg[CurRegId] == *AI &&
1035 "Reg index mismatches insertion index."));
1036 RegToId[*AI] = CurRegId++;
1037 DEBUG(dbgs() << "Register: " << PrintReg(*AI, TRI) << '\n');
1044 bool AArch64CollectLOH::runOnMachineFunction(MachineFunction &MF) {
1045 const TargetMachine &TM = MF.getTarget();
1046 const TargetRegisterInfo *TRI = TM.getRegisterInfo();
1047 const MachineDominatorTree *MDT = &getAnalysis<MachineDominatorTree>();
1051 AArch64FunctionInfo *AArch64FI = MF.getInfo<AArch64FunctionInfo>();
1052 assert(AArch64FI && "No MachineFunctionInfo for this function!");
1054 DEBUG(dbgs() << "Looking for LOH in " << MF.getName() << '\n');
1056 collectInvolvedReg(MF, RegToId, IdToReg, TRI);
1057 if (RegToId.empty())
1060 MachineInstr *DummyOp = nullptr;
1061 if (BasicBlockScopeOnly) {
1062 const AArch64InstrInfo *TII =
1063 static_cast<const AArch64InstrInfo *>(TM.getInstrInfo());
1064 // For local analysis, create a dummy operation to record uses that are not
1066 DummyOp = MF.CreateMachineInstr(TII->get(AArch64::COPY), DebugLoc());
1069 unsigned NbReg = RegToId.size();
1070 bool Modified = false;
1073 InstrToInstrs *ColorOpToReachedUses = new InstrToInstrs[NbReg];
1075 // Compute the reaching def in ADRP mode, meaning ADRP definitions
1076 // are first considered as uses.
1077 reachingDef(MF, ColorOpToReachedUses, RegToId, true, DummyOp);
1078 DEBUG(dbgs() << "ADRP reaching defs\n");
1079 DEBUG(printReachingDef(ColorOpToReachedUses, NbReg, TRI, IdToReg));
1081 // Translate the definition to uses map into a use to definitions map to ease
1082 // statistic computation.
1083 InstrToInstrs ADRPToReachingDefs;
1084 reachedUsesToDefs(ADRPToReachingDefs, ColorOpToReachedUses, RegToId, true);
1086 // Compute LOH for ADRP.
1087 computeADRP(ADRPToReachingDefs, *AArch64FI, MDT);
1088 delete[] ColorOpToReachedUses;
1090 // Continue with general ADRP -> ADD/LDR -> LDR/STR pattern.
1091 ColorOpToReachedUses = new InstrToInstrs[NbReg];
1093 // first perform a regular reaching def analysis.
1094 reachingDef(MF, ColorOpToReachedUses, RegToId, false, DummyOp);
1095 DEBUG(dbgs() << "All reaching defs\n");
1096 DEBUG(printReachingDef(ColorOpToReachedUses, NbReg, TRI, IdToReg));
1098 // Turn that into a use to defs to ease statistic computation.
1099 InstrToInstrs UsesToReachingDefs;
1100 reachedUsesToDefs(UsesToReachingDefs, ColorOpToReachedUses, RegToId, false);
1102 // Compute other than AdrpAdrp LOH.
1103 computeOthers(UsesToReachingDefs, ColorOpToReachedUses, *AArch64FI, RegToId,
1105 delete[] ColorOpToReachedUses;
1107 if (BasicBlockScopeOnly)
1108 MF.DeleteMachineInstr(DummyOp);
1113 /// createAArch64CollectLOHPass - returns an instance of the Statistic for
1114 /// linker optimization pass.
1115 FunctionPass *llvm::createAArch64CollectLOHPass() {
1116 return new AArch64CollectLOH();