1 //===--- HexagonStoreWidening.cpp------------------------------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
9 // Replace sequences of "narrow" stores to adjacent memory locations with
10 // a fewer "wide" stores that have the same effect.
11 // For example, replace:
12 // S4_storeirb_io %vreg100, 0, 0 ; store-immediate-byte
13 // S4_storeirb_io %vreg100, 1, 0 ; store-immediate-byte
15 // S4_storeirh_io %vreg100, 0, 0 ; store-immediate-halfword
16 // The above is the general idea. The actual cases handled by the code
17 // may be a bit more complex.
18 // The purpose of this pass is to reduce the number of outstanding stores,
19 // or as one could say, "reduce store queue pressure". Also, wide stores
20 // mean fewer stores, and since there are only two memory instructions allowed
21 // per packet, it also means fewer packets, and ultimately fewer cycles.
22 //===---------------------------------------------------------------------===//
24 #define DEBUG_TYPE "hexagon-widen-stores"
26 #include "HexagonTargetMachine.h"
28 #include "llvm/PassSupport.h"
29 #include "llvm/Analysis/AliasAnalysis.h"
30 #include "llvm/CodeGen/Passes.h"
31 #include "llvm/CodeGen/MachineFunction.h"
32 #include "llvm/CodeGen/MachineFunctionPass.h"
33 #include "llvm/CodeGen/MachineInstrBuilder.h"
34 #include "llvm/CodeGen/MachineRegisterInfo.h"
35 #include "llvm/MC/MCInstrDesc.h"
36 #include "llvm/Support/Debug.h"
37 #include "llvm/Support/raw_ostream.h"
38 #include "llvm/Target/TargetMachine.h"
39 #include "llvm/Target/TargetRegisterInfo.h"
40 #include "llvm/Target/TargetInstrInfo.h"
48 FunctionPass *createHexagonStoreWidening();
49 void initializeHexagonStoreWideningPass(PassRegistry&);
53 struct HexagonStoreWidening : public MachineFunctionPass {
54 const HexagonInstrInfo *TII;
55 const HexagonRegisterInfo *TRI;
56 const MachineRegisterInfo *MRI;
62 HexagonStoreWidening() : MachineFunctionPass(ID) {
63 initializeHexagonStoreWideningPass(*PassRegistry::getPassRegistry());
66 bool runOnMachineFunction(MachineFunction &MF) override;
68 const char *getPassName() const override {
69 return "Hexagon Store Widening";
72 void getAnalysisUsage(AnalysisUsage &AU) const override {
73 AU.addRequired<AAResultsWrapperPass>();
74 AU.addPreserved<AAResultsWrapperPass>();
75 MachineFunctionPass::getAnalysisUsage(AU);
78 static bool handledStoreType(const MachineInstr *MI);
81 static const int MaxWideSize = 4;
83 typedef std::vector<MachineInstr*> InstrGroup;
84 typedef std::vector<InstrGroup> InstrGroupList;
86 bool instrAliased(InstrGroup &Stores, const MachineMemOperand &MMO);
87 bool instrAliased(InstrGroup &Stores, const MachineInstr *MI);
88 void createStoreGroup(MachineInstr *BaseStore, InstrGroup::iterator Begin,
89 InstrGroup::iterator End, InstrGroup &Group);
90 void createStoreGroups(MachineBasicBlock &MBB,
91 InstrGroupList &StoreGroups);
92 bool processBasicBlock(MachineBasicBlock &MBB);
93 bool processStoreGroup(InstrGroup &Group);
94 bool selectStores(InstrGroup::iterator Begin, InstrGroup::iterator End,
95 InstrGroup &OG, unsigned &TotalSize, unsigned MaxSize);
96 bool createWideStores(InstrGroup &OG, InstrGroup &NG, unsigned TotalSize);
97 bool replaceStores(InstrGroup &OG, InstrGroup &NG);
98 bool storesAreAdjacent(const MachineInstr *S1, const MachineInstr *S2);
106 // Some local helper functions...
107 unsigned getBaseAddressRegister(const MachineInstr *MI) {
108 const MachineOperand &MO = MI->getOperand(0);
109 assert(MO.isReg() && "Expecting register operand");
113 int64_t getStoreOffset(const MachineInstr *MI) {
114 unsigned OpC = MI->getOpcode();
115 assert(HexagonStoreWidening::handledStoreType(MI) && "Unhandled opcode");
118 case Hexagon::S4_storeirb_io:
119 case Hexagon::S4_storeirh_io:
120 case Hexagon::S4_storeiri_io: {
121 const MachineOperand &MO = MI->getOperand(1);
122 assert(MO.isImm() && "Expecting immediate offset");
127 llvm_unreachable("Store offset calculation missing for a handled opcode");
131 const MachineMemOperand &getStoreTarget(const MachineInstr *MI) {
132 assert(!MI->memoperands_empty() && "Expecting memory operands");
133 return **MI->memoperands_begin();
139 char HexagonStoreWidening::ID = 0;
141 INITIALIZE_PASS_BEGIN(HexagonStoreWidening, "hexagon-widen-stores",
142 "Hexason Store Widening", false, false)
143 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
144 INITIALIZE_PASS_END(HexagonStoreWidening, "hexagon-widen-stores",
145 "Hexagon Store Widening", false, false)
148 // Filtering function: any stores whose opcodes are not "approved" of by
149 // this function will not be subjected to widening.
150 inline bool HexagonStoreWidening::handledStoreType(const MachineInstr *MI) {
151 // For now, only handle stores of immediate values.
152 // Also, reject stores to stack slots.
153 unsigned Opc = MI->getOpcode();
155 case Hexagon::S4_storeirb_io:
156 case Hexagon::S4_storeirh_io:
157 case Hexagon::S4_storeiri_io:
158 // Base address must be a register. (Implement FI later.)
159 return MI->getOperand(0).isReg();
166 // Check if the machine memory operand MMO is aliased with any of the
167 // stores in the store group Stores.
168 bool HexagonStoreWidening::instrAliased(InstrGroup &Stores,
169 const MachineMemOperand &MMO) {
173 MemoryLocation L(MMO.getValue(), MMO.getSize(), MMO.getAAInfo());
175 for (auto SI : Stores) {
176 const MachineMemOperand &SMO = getStoreTarget(SI);
180 MemoryLocation SL(SMO.getValue(), SMO.getSize(), SMO.getAAInfo());
181 if (AA->alias(L, SL))
189 // Check if the machine instruction MI accesses any storage aliased with
190 // any store in the group Stores.
191 bool HexagonStoreWidening::instrAliased(InstrGroup &Stores,
192 const MachineInstr *MI) {
193 for (auto &I : MI->memoperands())
194 if (instrAliased(Stores, *I))
200 // Inspect a machine basic block, and generate store groups out of stores
201 // encountered in the block.
203 // A store group is a group of stores that use the same base register,
204 // and which can be reordered within that group without altering the
205 // semantics of the program. A single store group could be widened as
206 // a whole, if there existed a single store instruction with the same
207 // semantics as the entire group. In many cases, a single store group
208 // may need more than one wide store.
209 void HexagonStoreWidening::createStoreGroups(MachineBasicBlock &MBB,
210 InstrGroupList &StoreGroups) {
213 // Copy all instruction pointers from the basic block to a temporary
214 // list. This will allow operating on the list, and modifying its
215 // elements without affecting the basic block.
217 AllInsns.push_back(&I);
219 // Traverse all instructions in the AllInsns list, and if we encounter
220 // a store, then try to create a store group starting at that instruction
221 // i.e. a sequence of independent stores that can be widened.
222 for (auto I = AllInsns.begin(), E = AllInsns.end(); I != E; ++I) {
223 MachineInstr *MI = *I;
224 // Skip null pointers (processed instructions).
225 if (!MI || !handledStoreType(MI))
228 // Found a store. Try to create a store group.
230 createStoreGroup(MI, I+1, E, G);
232 StoreGroups.push_back(G);
237 // Create a single store group. The stores need to be independent between
238 // themselves, and also there cannot be other instructions between them
239 // that could read or modify storage being stored into.
240 void HexagonStoreWidening::createStoreGroup(MachineInstr *BaseStore,
241 InstrGroup::iterator Begin, InstrGroup::iterator End, InstrGroup &Group) {
242 assert(handledStoreType(BaseStore) && "Unexpected instruction");
243 unsigned BaseReg = getBaseAddressRegister(BaseStore);
246 Group.push_back(BaseStore);
248 for (auto I = Begin; I != End; ++I) {
249 MachineInstr *MI = *I;
253 if (handledStoreType(MI)) {
254 // If this store instruction is aliased with anything already in the
255 // group, terminate the group now.
256 if (instrAliased(Group, getStoreTarget(MI)))
258 // If this store is aliased to any of the memory instructions we have
259 // seen so far (that are not a part of this group), terminate the group.
260 if (instrAliased(Other, getStoreTarget(MI)))
263 unsigned BR = getBaseAddressRegister(MI);
271 // Assume calls are aliased to everything.
272 if (MI->isCall() || MI->hasUnmodeledSideEffects())
275 if (MI->mayLoad() || MI->mayStore()) {
276 if (MI->hasOrderedMemoryRef() || instrAliased(Group, MI))
284 // Check if store instructions S1 and S2 are adjacent. More precisely,
285 // S2 has to access memory immediately following that accessed by S1.
286 bool HexagonStoreWidening::storesAreAdjacent(const MachineInstr *S1,
287 const MachineInstr *S2) {
288 if (!handledStoreType(S1) || !handledStoreType(S2))
291 const MachineMemOperand &S1MO = getStoreTarget(S1);
293 // Currently only handling immediate stores.
294 int Off1 = S1->getOperand(1).getImm();
295 int Off2 = S2->getOperand(1).getImm();
297 return (Off1 >= 0) ? Off1+S1MO.getSize() == unsigned(Off2)
298 : int(Off1+S1MO.getSize()) == Off2;
302 /// Given a sequence of adjacent stores, and a maximum size of a single wide
303 /// store, pick a group of stores that can be replaced by a single store
304 /// of size not exceeding MaxSize. The selected sequence will be recorded
305 /// in OG ("old group" of instructions).
306 /// OG should be empty on entry, and should be left empty if the function
308 bool HexagonStoreWidening::selectStores(InstrGroup::iterator Begin,
309 InstrGroup::iterator End, InstrGroup &OG, unsigned &TotalSize,
311 assert(Begin != End && "No instructions to analyze");
312 assert(OG.empty() && "Old group not empty on entry");
314 if (std::distance(Begin, End) <= 1)
317 MachineInstr *FirstMI = *Begin;
318 assert(!FirstMI->memoperands_empty() && "Expecting some memory operands");
319 const MachineMemOperand &FirstMMO = getStoreTarget(FirstMI);
320 unsigned Alignment = FirstMMO.getAlignment();
321 unsigned SizeAccum = FirstMMO.getSize();
322 unsigned FirstOffset = getStoreOffset(FirstMI);
324 // The initial value of SizeAccum should always be a power of 2.
325 assert(isPowerOf2_32(SizeAccum) && "First store size not a power of 2");
327 // If the size of the first store equals to or exceeds the limit, do nothing.
328 if (SizeAccum >= MaxSize)
331 // If the size of the first store is greater than or equal to the address
332 // stored to, then the store cannot be made any wider.
333 if (SizeAccum >= Alignment)
336 // The offset of a store will put restrictions on how wide the store can be.
337 // Offsets in stores of size 2^n bytes need to have the n lowest bits be 0.
338 // If the first store already exhausts the offset limits, quit. Test this
339 // by checking if the next wider size would exceed the limit.
340 if ((2*SizeAccum-1) & FirstOffset)
343 OG.push_back(FirstMI);
344 MachineInstr *S1 = FirstMI, *S2 = *(Begin+1);
345 InstrGroup::iterator I = Begin+1;
347 // Pow2Num will be the largest number of elements in OG such that the sum
348 // of sizes of stores 0...Pow2Num-1 will be a power of 2.
349 unsigned Pow2Num = 1;
350 unsigned Pow2Size = SizeAccum;
352 // Be greedy: keep accumulating stores as long as they are to adjacent
353 // memory locations, and as long as the total number of bytes stored
354 // does not exceed the limit (MaxSize).
355 // Keep track of when the total size covered is a power of 2, since
356 // this is a size a single store can cover.
359 // Stores are sorted, so if S1 and S2 are not adjacent, there won't be
360 // any other store to fill the "hole".
361 if (!storesAreAdjacent(S1, S2))
364 unsigned S2Size = getStoreTarget(S2).getSize();
365 if (SizeAccum + S2Size > std::min(MaxSize, Alignment))
370 if (isPowerOf2_32(SizeAccum)) {
372 Pow2Size = SizeAccum;
374 if ((2*Pow2Size-1) & FirstOffset)
381 // The stores don't add up to anything that can be widened. Clean up.
387 // Only leave the stored being widened.
389 TotalSize = Pow2Size;
394 /// Given an "old group" OG of stores, create a "new group" NG of instructions
395 /// to replace them. Ideally, NG would only have a single instruction in it,
396 /// but that may only be possible for store-immediate.
397 bool HexagonStoreWidening::createWideStores(InstrGroup &OG, InstrGroup &NG,
398 unsigned TotalSize) {
399 // XXX Current limitations:
400 // - only expect stores of immediate values in OG,
401 // - only handle a TotalSize of up to 4.
406 unsigned Acc = 0; // Value accumulator.
409 for (InstrGroup::iterator I = OG.begin(), E = OG.end(); I != E; ++I) {
410 MachineInstr *MI = *I;
411 const MachineMemOperand &MMO = getStoreTarget(MI);
412 MachineOperand &SO = MI->getOperand(2); // Source.
413 assert(SO.isImm() && "Expecting an immediate operand");
415 unsigned NBits = MMO.getSize()*8;
416 unsigned Mask = (0xFFFFFFFFU >> (32-NBits));
417 unsigned Val = (SO.getImm() & Mask) << Shift;
423 MachineInstr *FirstSt = OG.front();
424 DebugLoc DL = OG.back()->getDebugLoc();
425 const MachineMemOperand &OldM = getStoreTarget(FirstSt);
426 MachineMemOperand *NewM =
427 MF->getMachineMemOperand(OldM.getPointerInfo(), OldM.getFlags(),
428 TotalSize, OldM.getAlignment(),
432 // Create mem[hw] = #Acc
433 unsigned WOpc = (TotalSize == 2) ? Hexagon::S4_storeirh_io :
434 (TotalSize == 4) ? Hexagon::S4_storeiri_io : 0;
435 assert(WOpc && "Unexpected size");
437 int Val = (TotalSize == 2) ? int16_t(Acc) : int(Acc);
438 const MCInstrDesc &StD = TII->get(WOpc);
439 MachineOperand &MR = FirstSt->getOperand(0);
440 int64_t Off = FirstSt->getOperand(1).getImm();
441 MachineInstr *StI = BuildMI(*MF, DL, StD)
442 .addReg(MR.getReg(), getKillRegState(MR.isKill()))
445 StI->addMemOperand(*MF, NewM);
448 // Create vreg = A2_tfrsi #Acc; mem[hw] = vreg
449 const MCInstrDesc &TfrD = TII->get(Hexagon::A2_tfrsi);
450 const TargetRegisterClass *RC = TII->getRegClass(TfrD, 0, TRI, *MF);
451 unsigned VReg = MF->getRegInfo().createVirtualRegister(RC);
452 MachineInstr *TfrI = BuildMI(*MF, DL, TfrD, VReg)
456 unsigned WOpc = (TotalSize == 2) ? Hexagon::S2_storerh_io :
457 (TotalSize == 4) ? Hexagon::S2_storeri_io : 0;
458 assert(WOpc && "Unexpected size");
460 const MCInstrDesc &StD = TII->get(WOpc);
461 MachineOperand &MR = FirstSt->getOperand(0);
462 int64_t Off = FirstSt->getOperand(1).getImm();
463 MachineInstr *StI = BuildMI(*MF, DL, StD)
464 .addReg(MR.getReg(), getKillRegState(MR.isKill()))
466 .addReg(VReg, RegState::Kill);
467 StI->addMemOperand(*MF, NewM);
475 // Replace instructions from the old group OG with instructions from the
476 // new group NG. Conceptually, remove all instructions in OG, and then
477 // insert all instructions in NG, starting at where the first instruction
478 // from OG was (in the order in which they appeared in the basic block).
479 // (The ordering in OG does not have to match the order in the basic block.)
480 bool HexagonStoreWidening::replaceStores(InstrGroup &OG, InstrGroup &NG) {
482 dbgs() << "Replacing:\n";
490 MachineBasicBlock *MBB = OG.back()->getParent();
491 MachineBasicBlock::iterator InsertAt = MBB->end();
493 // Need to establish the insertion point. The best one is right before
494 // the first store in the OG, but in the order in which the stores occur
495 // in the program list. Since the ordering in OG does not correspond
496 // to the order in the program list, we need to do some work to find
497 // the insertion point.
499 // Create a set of all instructions in OG (for quick lookup).
500 SmallPtrSet<MachineInstr*, 4> InstrSet;
504 // Traverse the block, until we hit an instruction from OG.
505 for (auto &I : *MBB) {
506 if (InstrSet.count(&I)) {
512 assert((InsertAt != MBB->end()) && "Cannot locate any store from the group");
514 bool AtBBStart = false;
516 // InsertAt points at the first instruction that will be removed. We need
517 // to move it out of the way, so it remains valid after removing all the
518 // old stores, and so we are able to recover it back to the proper insertion
520 if (InsertAt != MBB->begin())
526 I->eraseFromParent();
531 InsertAt = MBB->begin();
534 MBB->insert(InsertAt, I);
540 // Break up the group into smaller groups, each of which can be replaced by
541 // a single wide store. Widen each such smaller group and replace the old
542 // instructions with the widened ones.
543 bool HexagonStoreWidening::processStoreGroup(InstrGroup &Group) {
544 bool Changed = false;
545 InstrGroup::iterator I = Group.begin(), E = Group.end();
546 InstrGroup OG, NG; // Old and new groups.
547 unsigned CollectedSize;
553 bool Succ = selectStores(I++, E, OG, CollectedSize, MaxWideSize) &&
554 createWideStores(OG, NG, CollectedSize) &&
555 replaceStores(OG, NG);
559 assert(OG.size() > 1 && "Created invalid group");
560 assert(distance(I, E)+1 >= int(OG.size()) && "Too many elements");
570 // Process a single basic block: create the store groups, and replace them
571 // with the widened stores, if possible. Processing of each basic block
572 // is independent from processing of any other basic block. This transfor-
573 // mation could be stopped after having processed any basic block without
574 // any ill effects (other than not having performed widening in the unpro-
575 // cessed blocks). Also, the basic blocks can be processed in any order.
576 bool HexagonStoreWidening::processBasicBlock(MachineBasicBlock &MBB) {
578 bool Changed = false;
580 createStoreGroups(MBB, SGs);
582 auto Less = [] (const MachineInstr *A, const MachineInstr *B) -> bool {
583 return getStoreOffset(A) < getStoreOffset(B);
585 for (auto &G : SGs) {
586 assert(G.size() > 1 && "Store group with fewer than 2 elements");
587 std::sort(G.begin(), G.end(), Less);
589 Changed |= processStoreGroup(G);
596 bool HexagonStoreWidening::runOnMachineFunction(MachineFunction &MFn) {
598 auto &ST = MFn.getSubtarget<HexagonSubtarget>();
599 TII = ST.getInstrInfo();
600 TRI = ST.getRegisterInfo();
601 MRI = &MFn.getRegInfo();
602 AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
604 bool Changed = false;
607 Changed |= processBasicBlock(B);
613 FunctionPass *llvm::createHexagonStoreWidening() {
614 return new HexagonStoreWidening();