1 //===- AArch64InstrInfo.cpp - AArch64 Instruction Information -------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file contains the AArch64 implementation of the TargetInstrInfo class.
12 //===----------------------------------------------------------------------===//
15 #include "AArch64InstrInfo.h"
16 #include "AArch64MachineFunctionInfo.h"
17 #include "AArch64TargetMachine.h"
18 #include "MCTargetDesc/AArch64MCTargetDesc.h"
19 #include "Utils/AArch64BaseInfo.h"
20 #include "llvm/CodeGen/MachineConstantPool.h"
21 #include "llvm/CodeGen/MachineDominators.h"
22 #include "llvm/CodeGen/MachineFrameInfo.h"
23 #include "llvm/CodeGen/MachineFunctionPass.h"
24 #include "llvm/CodeGen/MachineInstrBuilder.h"
25 #include "llvm/CodeGen/MachineRegisterInfo.h"
26 #include "llvm/IR/Function.h"
27 #include "llvm/Support/ErrorHandling.h"
28 #include "llvm/Support/TargetRegistry.h"
32 #define GET_INSTRINFO_CTOR_DTOR
33 #include "AArch64GenInstrInfo.inc"
37 AArch64InstrInfo::AArch64InstrInfo(const AArch64Subtarget &STI)
38 : AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP),
41 void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
42 MachineBasicBlock::iterator I, DebugLoc DL,
43 unsigned DestReg, unsigned SrcReg,
47 if (DestReg == AArch64::XSP || SrcReg == AArch64::XSP) {
48 // E.g. ADD xDst, xsp, #0 (, lsl #0)
49 BuildMI(MBB, I, DL, get(AArch64::ADDxxi_lsl0_s), DestReg)
53 } else if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP) {
54 // E.g. ADD wDST, wsp, #0 (, lsl #0)
55 BuildMI(MBB, I, DL, get(AArch64::ADDwwi_lsl0_s), DestReg)
59 } else if (DestReg == AArch64::NZCV) {
60 assert(AArch64::GPR64RegClass.contains(SrcReg));
61 // E.g. MSR NZCV, xDST
62 BuildMI(MBB, I, DL, get(AArch64::MSRix))
63 .addImm(A64SysReg::NZCV)
65 } else if (SrcReg == AArch64::NZCV) {
66 assert(AArch64::GPR64RegClass.contains(DestReg));
67 // E.g. MRS xDST, NZCV
68 BuildMI(MBB, I, DL, get(AArch64::MRSxi), DestReg)
69 .addImm(A64SysReg::NZCV);
70 } else if (AArch64::GPR64RegClass.contains(DestReg)) {
71 if(AArch64::GPR64RegClass.contains(SrcReg)){
72 Opc = AArch64::ORRxxx_lsl;
73 ZeroReg = AArch64::XZR;
75 assert(AArch64::FPR64RegClass.contains(SrcReg));
76 BuildMI(MBB, I, DL, get(AArch64::FMOVxd), DestReg)
80 } else if (AArch64::GPR32RegClass.contains(DestReg)) {
81 if(AArch64::GPR32RegClass.contains(SrcReg)){
82 Opc = AArch64::ORRwww_lsl;
83 ZeroReg = AArch64::WZR;
85 assert(AArch64::FPR32RegClass.contains(SrcReg));
86 BuildMI(MBB, I, DL, get(AArch64::FMOVws), DestReg)
90 } else if (AArch64::FPR32RegClass.contains(DestReg)) {
91 if(AArch64::FPR32RegClass.contains(SrcReg)){
92 BuildMI(MBB, I, DL, get(AArch64::FMOVss), DestReg)
97 assert(AArch64::GPR32RegClass.contains(SrcReg));
98 BuildMI(MBB, I, DL, get(AArch64::FMOVsw), DestReg)
102 } else if (AArch64::FPR64RegClass.contains(DestReg)) {
103 if(AArch64::FPR64RegClass.contains(SrcReg)){
104 BuildMI(MBB, I, DL, get(AArch64::FMOVdd), DestReg)
109 assert(AArch64::GPR64RegClass.contains(SrcReg));
110 BuildMI(MBB, I, DL, get(AArch64::FMOVdx), DestReg)
114 } else if (AArch64::FPR128RegClass.contains(DestReg)) {
115 assert(AArch64::FPR128RegClass.contains(SrcReg));
117 // If NEON is enable, we use ORR to implement this copy.
118 // If NEON isn't available, emit STR and LDR to handle this.
119 if(getSubTarget().hasNEON()) {
120 BuildMI(MBB, I, DL, get(AArch64::ORRvvv_16B), DestReg)
125 BuildMI(MBB, I, DL, get(AArch64::LSFP128_PreInd_STR), AArch64::XSP)
127 .addReg(AArch64::XSP)
128 .addImm(0x1ff & -16);
130 BuildMI(MBB, I, DL, get(AArch64::LSFP128_PostInd_LDR), DestReg)
131 .addReg(AArch64::XSP, RegState::Define)
132 .addReg(AArch64::XSP)
137 CopyPhysRegTuple(MBB, I, DL, DestReg, SrcReg);
141 // E.g. ORR xDst, xzr, xSrc, lsl #0
142 BuildMI(MBB, I, DL, get(Opc), DestReg)
148 void AArch64InstrInfo::CopyPhysRegTuple(MachineBasicBlock &MBB,
149 MachineBasicBlock::iterator I,
150 DebugLoc DL, unsigned DestReg,
151 unsigned SrcReg) const {
154 if (AArch64::DPairRegClass.contains(DestReg, SrcReg)) {
157 } else if (AArch64::DTripleRegClass.contains(DestReg, SrcReg)) {
160 } else if (AArch64::DQuadRegClass.contains(DestReg, SrcReg)) {
163 } else if (AArch64::QPairRegClass.contains(DestReg, SrcReg)) {
166 } else if (AArch64::QTripleRegClass.contains(DestReg, SrcReg)) {
169 } else if (AArch64::QQuadRegClass.contains(DestReg, SrcReg)) {
173 llvm_unreachable("Unknown register class");
175 unsigned BeginIdx = IsQRegs ? AArch64::qsub_0 : AArch64::dsub_0;
177 const TargetRegisterInfo *TRI = &getRegisterInfo();
178 // Copy register tuples backward when the first Dest reg overlaps
180 if (TRI->regsOverlap(SrcReg, TRI->getSubReg(DestReg, BeginIdx))) {
181 BeginIdx = BeginIdx + (SubRegs - 1);
185 unsigned Opc = IsQRegs ? AArch64::ORRvvv_16B : AArch64::ORRvvv_8B;
186 for (unsigned i = 0; i != SubRegs; ++i) {
187 unsigned Dst = TRI->getSubReg(DestReg, BeginIdx + i * Spacing);
188 unsigned Src = TRI->getSubReg(SrcReg, BeginIdx + i * Spacing);
189 assert(Dst && Src && "Bad sub-register");
190 BuildMI(MBB, I, I->getDebugLoc(), get(Opc), Dst)
197 /// Does the Opcode represent a conditional branch that we can remove and re-add
198 /// at the end of a basic block?
199 static bool isCondBranch(unsigned Opc) {
200 return Opc == AArch64::Bcc || Opc == AArch64::CBZw || Opc == AArch64::CBZx ||
201 Opc == AArch64::CBNZw || Opc == AArch64::CBNZx ||
202 Opc == AArch64::TBZwii || Opc == AArch64::TBZxii ||
203 Opc == AArch64::TBNZwii || Opc == AArch64::TBNZxii;
206 /// Takes apart a given conditional branch MachineInstr (see isCondBranch),
207 /// setting TBB to the destination basic block and populating the Cond vector
208 /// with data necessary to recreate the conditional branch at a later
209 /// date. First element will be the opcode, and subsequent ones define the
210 /// conditions being branched on in an instruction-specific manner.
211 static void classifyCondBranch(MachineInstr *I, MachineBasicBlock *&TBB,
212 SmallVectorImpl<MachineOperand> &Cond) {
213 switch(I->getOpcode()) {
219 // These instructions just have one predicate operand in position 0 (either
220 // a condition code or a register being compared).
221 Cond.push_back(MachineOperand::CreateImm(I->getOpcode()));
222 Cond.push_back(I->getOperand(0));
223 TBB = I->getOperand(1).getMBB();
225 case AArch64::TBZwii:
226 case AArch64::TBZxii:
227 case AArch64::TBNZwii:
228 case AArch64::TBNZxii:
229 // These have two predicate operands: a register and a bit position.
230 Cond.push_back(MachineOperand::CreateImm(I->getOpcode()));
231 Cond.push_back(I->getOperand(0));
232 Cond.push_back(I->getOperand(1));
233 TBB = I->getOperand(2).getMBB();
236 llvm_unreachable("Unknown conditional branch to classify");
242 AArch64InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
243 MachineBasicBlock *&FBB,
244 SmallVectorImpl<MachineOperand> &Cond,
245 bool AllowModify) const {
246 // If the block has no terminators, it just falls into the block after it.
247 MachineBasicBlock::iterator I = MBB.end();
248 if (I == MBB.begin())
251 while (I->isDebugValue()) {
252 if (I == MBB.begin())
256 if (!isUnpredicatedTerminator(I))
259 // Get the last instruction in the block.
260 MachineInstr *LastInst = I;
262 // If there is only one terminator instruction, process it.
263 unsigned LastOpc = LastInst->getOpcode();
264 if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
265 if (LastOpc == AArch64::Bimm) {
266 TBB = LastInst->getOperand(0).getMBB();
269 if (isCondBranch(LastOpc)) {
270 classifyCondBranch(LastInst, TBB, Cond);
273 return true; // Can't handle indirect branch.
276 // Get the instruction before it if it is a terminator.
277 MachineInstr *SecondLastInst = I;
278 unsigned SecondLastOpc = SecondLastInst->getOpcode();
280 // If AllowModify is true and the block ends with two or more unconditional
281 // branches, delete all but the first unconditional branch.
282 if (AllowModify && LastOpc == AArch64::Bimm) {
283 while (SecondLastOpc == AArch64::Bimm) {
284 LastInst->eraseFromParent();
285 LastInst = SecondLastInst;
286 LastOpc = LastInst->getOpcode();
287 if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
288 // Return now the only terminator is an unconditional branch.
289 TBB = LastInst->getOperand(0).getMBB();
293 SecondLastOpc = SecondLastInst->getOpcode();
298 // If there are three terminators, we don't know what sort of block this is.
299 if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(--I))
302 // If the block ends with a B and a Bcc, handle it.
303 if (LastOpc == AArch64::Bimm) {
304 if (SecondLastOpc == AArch64::Bcc) {
305 TBB = SecondLastInst->getOperand(1).getMBB();
306 Cond.push_back(MachineOperand::CreateImm(AArch64::Bcc));
307 Cond.push_back(SecondLastInst->getOperand(0));
308 FBB = LastInst->getOperand(0).getMBB();
310 } else if (isCondBranch(SecondLastOpc)) {
311 classifyCondBranch(SecondLastInst, TBB, Cond);
312 FBB = LastInst->getOperand(0).getMBB();
317 // If the block ends with two unconditional branches, handle it. The second
318 // one is not executed, so remove it.
319 if (SecondLastOpc == AArch64::Bimm && LastOpc == AArch64::Bimm) {
320 TBB = SecondLastInst->getOperand(0).getMBB();
323 I->eraseFromParent();
327 // Otherwise, can't handle this.
331 bool AArch64InstrInfo::ReverseBranchCondition(
332 SmallVectorImpl<MachineOperand> &Cond) const {
333 switch (Cond[0].getImm()) {
335 A64CC::CondCodes CC = static_cast<A64CC::CondCodes>(Cond[1].getImm());
336 CC = A64InvertCondCode(CC);
341 Cond[0].setImm(AArch64::CBNZw);
344 Cond[0].setImm(AArch64::CBNZx);
347 Cond[0].setImm(AArch64::CBZw);
350 Cond[0].setImm(AArch64::CBZx);
352 case AArch64::TBZwii:
353 Cond[0].setImm(AArch64::TBNZwii);
355 case AArch64::TBZxii:
356 Cond[0].setImm(AArch64::TBNZxii);
358 case AArch64::TBNZwii:
359 Cond[0].setImm(AArch64::TBZwii);
361 case AArch64::TBNZxii:
362 Cond[0].setImm(AArch64::TBZxii);
365 llvm_unreachable("Unknown branch type");
371 AArch64InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
372 MachineBasicBlock *FBB,
373 const SmallVectorImpl<MachineOperand> &Cond,
375 if (FBB == 0 && Cond.empty()) {
376 BuildMI(&MBB, DL, get(AArch64::Bimm)).addMBB(TBB);
378 } else if (FBB == 0) {
379 MachineInstrBuilder MIB = BuildMI(&MBB, DL, get(Cond[0].getImm()));
380 for (int i = 1, e = Cond.size(); i != e; ++i)
381 MIB.addOperand(Cond[i]);
386 MachineInstrBuilder MIB = BuildMI(&MBB, DL, get(Cond[0].getImm()));
387 for (int i = 1, e = Cond.size(); i != e; ++i)
388 MIB.addOperand(Cond[i]);
391 BuildMI(&MBB, DL, get(AArch64::Bimm)).addMBB(FBB);
395 unsigned AArch64InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
396 MachineBasicBlock::iterator I = MBB.end();
397 if (I == MBB.begin()) return 0;
399 while (I->isDebugValue()) {
400 if (I == MBB.begin())
404 if (I->getOpcode() != AArch64::Bimm && !isCondBranch(I->getOpcode()))
407 // Remove the branch.
408 I->eraseFromParent();
412 if (I == MBB.begin()) return 1;
414 if (!isCondBranch(I->getOpcode()))
417 // Remove the branch.
418 I->eraseFromParent();
423 AArch64InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MBBI) const {
424 MachineInstr &MI = *MBBI;
425 MachineBasicBlock &MBB = *MI.getParent();
427 unsigned Opcode = MI.getOpcode();
429 case AArch64::TLSDESC_BLRx: {
430 MachineInstr *NewMI =
431 BuildMI(MBB, MBBI, MI.getDebugLoc(), get(AArch64::TLSDESCCALL))
432 .addOperand(MI.getOperand(1));
433 MI.setDesc(get(AArch64::BLRx));
435 llvm::finalizeBundle(MBB, NewMI, *++MBBI);
446 AArch64InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
447 MachineBasicBlock::iterator MBBI,
448 unsigned SrcReg, bool isKill,
450 const TargetRegisterClass *RC,
451 const TargetRegisterInfo *TRI) const {
452 DebugLoc DL = MBB.findDebugLoc(MBBI);
453 MachineFunction &MF = *MBB.getParent();
454 MachineFrameInfo &MFI = *MF.getFrameInfo();
455 unsigned Align = MFI.getObjectAlignment(FrameIdx);
457 MachineMemOperand *MMO
458 = MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx),
459 MachineMemOperand::MOStore,
460 MFI.getObjectSize(FrameIdx),
463 unsigned StoreOp = 0;
464 if (RC->hasType(MVT::i64) || RC->hasType(MVT::i32)) {
465 switch(RC->getSize()) {
466 case 4: StoreOp = AArch64::LS32_STR; break;
467 case 8: StoreOp = AArch64::LS64_STR; break;
469 llvm_unreachable("Unknown size for regclass");
471 } else if (RC->hasType(MVT::f32) || RC->hasType(MVT::f64) ||
472 RC->hasType(MVT::f128)) {
473 switch (RC->getSize()) {
474 case 4: StoreOp = AArch64::LSFP32_STR; break;
475 case 8: StoreOp = AArch64::LSFP64_STR; break;
476 case 16: StoreOp = AArch64::LSFP128_STR; break;
478 llvm_unreachable("Unknown size for regclass");
480 } else { // For a super register class has more than one sub registers
481 if (AArch64::DPairRegClass.hasSubClassEq(RC))
482 StoreOp = AArch64::ST1x2_8B;
483 else if (AArch64::DTripleRegClass.hasSubClassEq(RC))
484 StoreOp = AArch64::ST1x3_8B;
485 else if (AArch64::DQuadRegClass.hasSubClassEq(RC))
486 StoreOp = AArch64::ST1x4_8B;
487 else if (AArch64::QPairRegClass.hasSubClassEq(RC))
488 StoreOp = AArch64::ST1x2_16B;
489 else if (AArch64::QTripleRegClass.hasSubClassEq(RC))
490 StoreOp = AArch64::ST1x3_16B;
491 else if (AArch64::QQuadRegClass.hasSubClassEq(RC))
492 StoreOp = AArch64::ST1x4_16B;
494 llvm_unreachable("Unknown reg class");
496 MachineInstrBuilder NewMI = BuildMI(MBB, MBBI, DL, get(StoreOp));
497 // Vector store has different operands from other store instructions.
498 NewMI.addFrameIndex(FrameIdx)
499 .addReg(SrcReg, getKillRegState(isKill))
504 MachineInstrBuilder NewMI = BuildMI(MBB, MBBI, DL, get(StoreOp));
505 NewMI.addReg(SrcReg, getKillRegState(isKill))
506 .addFrameIndex(FrameIdx)
513 AArch64InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
514 MachineBasicBlock::iterator MBBI,
515 unsigned DestReg, int FrameIdx,
516 const TargetRegisterClass *RC,
517 const TargetRegisterInfo *TRI) const {
518 DebugLoc DL = MBB.findDebugLoc(MBBI);
519 MachineFunction &MF = *MBB.getParent();
520 MachineFrameInfo &MFI = *MF.getFrameInfo();
521 unsigned Align = MFI.getObjectAlignment(FrameIdx);
523 MachineMemOperand *MMO
524 = MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx),
525 MachineMemOperand::MOLoad,
526 MFI.getObjectSize(FrameIdx),
530 if (RC->hasType(MVT::i64) || RC->hasType(MVT::i32)) {
531 switch(RC->getSize()) {
532 case 4: LoadOp = AArch64::LS32_LDR; break;
533 case 8: LoadOp = AArch64::LS64_LDR; break;
535 llvm_unreachable("Unknown size for regclass");
537 } else if (RC->hasType(MVT::f32) || RC->hasType(MVT::f64) ||
538 RC->hasType(MVT::f128)) {
539 switch (RC->getSize()) {
540 case 4: LoadOp = AArch64::LSFP32_LDR; break;
541 case 8: LoadOp = AArch64::LSFP64_LDR; break;
542 case 16: LoadOp = AArch64::LSFP128_LDR; break;
544 llvm_unreachable("Unknown size for regclass");
546 } else { // For a super register class has more than one sub registers
547 if (AArch64::DPairRegClass.hasSubClassEq(RC))
548 LoadOp = AArch64::LD1x2_8B;
549 else if (AArch64::DTripleRegClass.hasSubClassEq(RC))
550 LoadOp = AArch64::LD1x3_8B;
551 else if (AArch64::DQuadRegClass.hasSubClassEq(RC))
552 LoadOp = AArch64::LD1x4_8B;
553 else if (AArch64::QPairRegClass.hasSubClassEq(RC))
554 LoadOp = AArch64::LD1x2_16B;
555 else if (AArch64::QTripleRegClass.hasSubClassEq(RC))
556 LoadOp = AArch64::LD1x3_16B;
557 else if (AArch64::QQuadRegClass.hasSubClassEq(RC))
558 LoadOp = AArch64::LD1x4_16B;
560 llvm_unreachable("Unknown reg class");
562 MachineInstrBuilder NewMI = BuildMI(MBB, MBBI, DL, get(LoadOp), DestReg);
563 // Vector load has different operands from other load instructions.
564 NewMI.addFrameIndex(FrameIdx)
569 MachineInstrBuilder NewMI = BuildMI(MBB, MBBI, DL, get(LoadOp), DestReg);
570 NewMI.addFrameIndex(FrameIdx)
575 unsigned AArch64InstrInfo::estimateRSStackLimit(MachineFunction &MF) const {
576 unsigned Limit = (1 << 16) - 1;
577 for (MachineFunction::iterator BB = MF.begin(),E = MF.end(); BB != E; ++BB) {
578 for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end();
580 for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
581 if (!I->getOperand(i).isFI()) continue;
583 // When using ADDxxi_lsl0_s to get the address of a stack object, 0xfff
584 // is the largest offset guaranteed to fit in the immediate offset.
585 if (I->getOpcode() == AArch64::ADDxxi_lsl0_s) {
586 Limit = std::min(Limit, 0xfffu);
590 int AccessScale, MinOffset, MaxOffset;
591 getAddressConstraints(*I, AccessScale, MinOffset, MaxOffset);
592 Limit = std::min(Limit, static_cast<unsigned>(MaxOffset));
594 break; // At most one FI per instruction
601 void AArch64InstrInfo::getAddressConstraints(const MachineInstr &MI,
602 int &AccessScale, int &MinOffset,
603 int &MaxOffset) const {
604 switch (MI.getOpcode()) {
605 default: llvm_unreachable("Unkown load/store kind");
606 case TargetOpcode::DBG_VALUE:
611 case AArch64::LS8_LDR: case AArch64::LS8_STR:
612 case AArch64::LSFP8_LDR: case AArch64::LSFP8_STR:
613 case AArch64::LDRSBw:
614 case AArch64::LDRSBx:
619 case AArch64::LS16_LDR: case AArch64::LS16_STR:
620 case AArch64::LSFP16_LDR: case AArch64::LSFP16_STR:
621 case AArch64::LDRSHw:
622 case AArch64::LDRSHx:
625 MaxOffset = 0xfff * AccessScale;
627 case AArch64::LS32_LDR: case AArch64::LS32_STR:
628 case AArch64::LSFP32_LDR: case AArch64::LSFP32_STR:
629 case AArch64::LDRSWx:
630 case AArch64::LDPSWx:
633 MaxOffset = 0xfff * AccessScale;
635 case AArch64::LS64_LDR: case AArch64::LS64_STR:
636 case AArch64::LSFP64_LDR: case AArch64::LSFP64_STR:
640 MaxOffset = 0xfff * AccessScale;
642 case AArch64::LSFP128_LDR: case AArch64::LSFP128_STR:
645 MaxOffset = 0xfff * AccessScale;
647 case AArch64::LSPair32_LDR: case AArch64::LSPair32_STR:
648 case AArch64::LSFPPair32_LDR: case AArch64::LSFPPair32_STR:
650 MinOffset = -0x40 * AccessScale;
651 MaxOffset = 0x3f * AccessScale;
653 case AArch64::LSPair64_LDR: case AArch64::LSPair64_STR:
654 case AArch64::LSFPPair64_LDR: case AArch64::LSFPPair64_STR:
656 MinOffset = -0x40 * AccessScale;
657 MaxOffset = 0x3f * AccessScale;
659 case AArch64::LSFPPair128_LDR: case AArch64::LSFPPair128_STR:
661 MinOffset = -0x40 * AccessScale;
662 MaxOffset = 0x3f * AccessScale;
664 case AArch64::LD1x2_8B: case AArch64::ST1x2_8B:
667 MaxOffset = 0xfff * AccessScale;
669 case AArch64::LD1x3_8B: case AArch64::ST1x3_8B:
672 MaxOffset = 0xfff * AccessScale;
674 case AArch64::LD1x4_8B: case AArch64::ST1x4_8B:
675 case AArch64::LD1x2_16B: case AArch64::ST1x2_16B:
678 MaxOffset = 0xfff * AccessScale;
680 case AArch64::LD1x3_16B: case AArch64::ST1x3_16B:
683 MaxOffset = 0xfff * AccessScale;
685 case AArch64::LD1x4_16B: case AArch64::ST1x4_16B:
688 MaxOffset = 0xfff * AccessScale;
693 unsigned AArch64InstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
694 const MCInstrDesc &MCID = MI.getDesc();
695 const MachineBasicBlock &MBB = *MI.getParent();
696 const MachineFunction &MF = *MBB.getParent();
697 const MCAsmInfo &MAI = *MF.getTarget().getMCAsmInfo();
700 return MCID.getSize();
702 if (MI.getOpcode() == AArch64::INLINEASM)
703 return getInlineAsmLength(MI.getOperand(0).getSymbolName(), MAI);
708 switch (MI.getOpcode()) {
709 case TargetOpcode::BUNDLE:
710 return getInstBundleLength(MI);
711 case TargetOpcode::IMPLICIT_DEF:
712 case TargetOpcode::KILL:
713 case TargetOpcode::PROLOG_LABEL:
714 case TargetOpcode::EH_LABEL:
715 case TargetOpcode::DBG_VALUE:
717 case AArch64::TLSDESCCALL:
720 llvm_unreachable("Unknown instruction class");
724 unsigned AArch64InstrInfo::getInstBundleLength(const MachineInstr &MI) const {
726 MachineBasicBlock::const_instr_iterator I = MI;
727 MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
728 while (++I != E && I->isInsideBundle()) {
729 assert(!I->isBundle() && "No nested bundle!");
730 Size += getInstSizeInBytes(*I);
735 bool llvm::rewriteA64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
736 unsigned FrameReg, int &Offset,
737 const AArch64InstrInfo &TII) {
738 MachineBasicBlock &MBB = *MI.getParent();
739 MachineFunction &MF = *MBB.getParent();
740 MachineFrameInfo &MFI = *MF.getFrameInfo();
742 MFI.getObjectOffset(FrameRegIdx);
743 llvm_unreachable("Unimplemented rewriteFrameIndex");
746 void llvm::emitRegUpdate(MachineBasicBlock &MBB,
747 MachineBasicBlock::iterator MBBI,
748 DebugLoc dl, const TargetInstrInfo &TII,
749 unsigned DstReg, unsigned SrcReg, unsigned ScratchReg,
750 int64_t NumBytes, MachineInstr::MIFlag MIFlags) {
751 if (NumBytes == 0 && DstReg == SrcReg)
753 else if (abs64(NumBytes) & ~0xffffff) {
754 // Generically, we have to materialize the offset into a temporary register
755 // and subtract it. There are a couple of ways this could be done, for now
756 // we'll use a movz/movk or movn/movk sequence.
757 uint64_t Bits = static_cast<uint64_t>(abs64(NumBytes));
758 BuildMI(MBB, MBBI, dl, TII.get(AArch64::MOVZxii), ScratchReg)
759 .addImm(0xffff & Bits).addImm(0)
760 .setMIFlags(MIFlags);
764 BuildMI(MBB, MBBI, dl, TII.get(AArch64::MOVKxii), ScratchReg)
766 .addImm(0xffff & Bits).addImm(1)
767 .setMIFlags(MIFlags);
772 BuildMI(MBB, MBBI, dl, TII.get(AArch64::MOVKxii), ScratchReg)
774 .addImm(0xffff & Bits).addImm(2)
775 .setMIFlags(MIFlags);
780 BuildMI(MBB, MBBI, dl, TII.get(AArch64::MOVKxii), ScratchReg)
782 .addImm(0xffff & Bits).addImm(3)
783 .setMIFlags(MIFlags);
786 // ADD DST, SRC, xTMP (, lsl #0)
787 unsigned AddOp = NumBytes > 0 ? AArch64::ADDxxx_uxtx : AArch64::SUBxxx_uxtx;
788 BuildMI(MBB, MBBI, dl, TII.get(AddOp), DstReg)
789 .addReg(SrcReg, RegState::Kill)
790 .addReg(ScratchReg, RegState::Kill)
796 // Now we know that the adjustment can be done in at most two add/sub
797 // (immediate) instructions, which is always more efficient than a
798 // literal-pool load, or even a hypothetical movz/movk/add sequence
800 // Decide whether we're doing addition or subtraction
801 unsigned LowOp, HighOp;
803 LowOp = AArch64::ADDxxi_lsl0_s;
804 HighOp = AArch64::ADDxxi_lsl12_s;
806 LowOp = AArch64::SUBxxi_lsl0_s;
807 HighOp = AArch64::SUBxxi_lsl12_s;
808 NumBytes = abs64(NumBytes);
811 // If we're here, at the very least a move needs to be produced, which just
812 // happens to be materializable by an ADD.
813 if ((NumBytes & 0xfff) || NumBytes == 0) {
814 BuildMI(MBB, MBBI, dl, TII.get(LowOp), DstReg)
815 .addReg(SrcReg, RegState::Kill)
816 .addImm(NumBytes & 0xfff)
819 // Next update should use the register we've just defined.
823 if (NumBytes & 0xfff000) {
824 BuildMI(MBB, MBBI, dl, TII.get(HighOp), DstReg)
825 .addReg(SrcReg, RegState::Kill)
826 .addImm(NumBytes >> 12)
831 void llvm::emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
832 DebugLoc dl, const TargetInstrInfo &TII,
833 unsigned ScratchReg, int64_t NumBytes,
834 MachineInstr::MIFlag MIFlags) {
835 emitRegUpdate(MBB, MI, dl, TII, AArch64::XSP, AArch64::XSP, AArch64::X16,
841 struct LDTLSCleanup : public MachineFunctionPass {
843 LDTLSCleanup() : MachineFunctionPass(ID) {}
845 virtual bool runOnMachineFunction(MachineFunction &MF) {
846 AArch64MachineFunctionInfo* MFI
847 = MF.getInfo<AArch64MachineFunctionInfo>();
848 if (MFI->getNumLocalDynamicTLSAccesses() < 2) {
849 // No point folding accesses if there isn't at least two.
853 MachineDominatorTree *DT = &getAnalysis<MachineDominatorTree>();
854 return VisitNode(DT->getRootNode(), 0);
857 // Visit the dominator subtree rooted at Node in pre-order.
858 // If TLSBaseAddrReg is non-null, then use that to replace any
859 // TLS_base_addr instructions. Otherwise, create the register
860 // when the first such instruction is seen, and then use it
861 // as we encounter more instructions.
862 bool VisitNode(MachineDomTreeNode *Node, unsigned TLSBaseAddrReg) {
863 MachineBasicBlock *BB = Node->getBlock();
864 bool Changed = false;
866 // Traverse the current block.
867 for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;
869 switch (I->getOpcode()) {
870 case AArch64::TLSDESC_BLRx:
871 // Make sure it's a local dynamic access.
872 if (!I->getOperand(1).isSymbol() ||
873 strcmp(I->getOperand(1).getSymbolName(), "_TLS_MODULE_BASE_"))
877 I = ReplaceTLSBaseAddrCall(I, TLSBaseAddrReg);
879 I = SetRegister(I, &TLSBaseAddrReg);
887 // Visit the children of this block in the dominator tree.
888 for (MachineDomTreeNode::iterator I = Node->begin(), E = Node->end();
890 Changed |= VisitNode(*I, TLSBaseAddrReg);
896 // Replace the TLS_base_addr instruction I with a copy from
897 // TLSBaseAddrReg, returning the new instruction.
898 MachineInstr *ReplaceTLSBaseAddrCall(MachineInstr *I,
899 unsigned TLSBaseAddrReg) {
900 MachineFunction *MF = I->getParent()->getParent();
901 const AArch64TargetMachine *TM =
902 static_cast<const AArch64TargetMachine *>(&MF->getTarget());
903 const AArch64InstrInfo *TII = TM->getInstrInfo();
905 // Insert a Copy from TLSBaseAddrReg to x0, which is where the rest of the
906 // code sequence assumes the address will be.
907 MachineInstr *Copy = BuildMI(*I->getParent(), I, I->getDebugLoc(),
908 TII->get(TargetOpcode::COPY),
910 .addReg(TLSBaseAddrReg);
912 // Erase the TLS_base_addr instruction.
913 I->eraseFromParent();
918 // Create a virtal register in *TLSBaseAddrReg, and populate it by
919 // inserting a copy instruction after I. Returns the new instruction.
920 MachineInstr *SetRegister(MachineInstr *I, unsigned *TLSBaseAddrReg) {
921 MachineFunction *MF = I->getParent()->getParent();
922 const AArch64TargetMachine *TM =
923 static_cast<const AArch64TargetMachine *>(&MF->getTarget());
924 const AArch64InstrInfo *TII = TM->getInstrInfo();
926 // Create a virtual register for the TLS base address.
927 MachineRegisterInfo &RegInfo = MF->getRegInfo();
928 *TLSBaseAddrReg = RegInfo.createVirtualRegister(&AArch64::GPR64RegClass);
930 // Insert a copy from X0 to TLSBaseAddrReg for later.
931 MachineInstr *Next = I->getNextNode();
932 MachineInstr *Copy = BuildMI(*I->getParent(), Next, I->getDebugLoc(),
933 TII->get(TargetOpcode::COPY),
935 .addReg(AArch64::X0);
940 virtual const char *getPassName() const {
941 return "Local Dynamic TLS Access Clean-up";
944 virtual void getAnalysisUsage(AnalysisUsage &AU) const {
945 AU.setPreservesCFG();
946 AU.addRequired<MachineDominatorTree>();
947 MachineFunctionPass::getAnalysisUsage(AU);
952 char LDTLSCleanup::ID = 0;
954 llvm::createAArch64CleanupLocalDynamicTLSPass() { return new LDTLSCleanup(); }