1 //===- AArch64InstrInfo.cpp - AArch64 Instruction Information -------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file contains the AArch64 implementation of the TargetInstrInfo class.
12 //===----------------------------------------------------------------------===//
15 #include "AArch64InstrInfo.h"
16 #include "AArch64MachineFunctionInfo.h"
17 #include "AArch64TargetMachine.h"
18 #include "MCTargetDesc/AArch64MCTargetDesc.h"
19 #include "Utils/AArch64BaseInfo.h"
20 #include "llvm/CodeGen/MachineConstantPool.h"
21 #include "llvm/CodeGen/MachineDominators.h"
22 #include "llvm/CodeGen/MachineFrameInfo.h"
23 #include "llvm/CodeGen/MachineFunctionPass.h"
24 #include "llvm/CodeGen/MachineInstrBuilder.h"
25 #include "llvm/CodeGen/MachineRegisterInfo.h"
26 #include "llvm/IR/Function.h"
27 #include "llvm/Support/ErrorHandling.h"
28 #include "llvm/Support/TargetRegistry.h"
33 #define GET_INSTRINFO_CTOR_DTOR
34 #include "AArch64GenInstrInfo.inc"
36 AArch64InstrInfo::AArch64InstrInfo(const AArch64Subtarget &STI)
37 : AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP),
40 void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
41 MachineBasicBlock::iterator I, DebugLoc DL,
42 unsigned DestReg, unsigned SrcReg,
46 if (DestReg == AArch64::XSP || SrcReg == AArch64::XSP) {
47 // E.g. ADD xDst, xsp, #0 (, lsl #0)
48 BuildMI(MBB, I, DL, get(AArch64::ADDxxi_lsl0_s), DestReg)
52 } else if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP) {
53 // E.g. ADD wDST, wsp, #0 (, lsl #0)
54 BuildMI(MBB, I, DL, get(AArch64::ADDwwi_lsl0_s), DestReg)
58 } else if (DestReg == AArch64::NZCV) {
59 assert(AArch64::GPR64RegClass.contains(SrcReg));
60 // E.g. MSR NZCV, xDST
61 BuildMI(MBB, I, DL, get(AArch64::MSRix))
62 .addImm(A64SysReg::NZCV)
64 } else if (SrcReg == AArch64::NZCV) {
65 assert(AArch64::GPR64RegClass.contains(DestReg));
66 // E.g. MRS xDST, NZCV
67 BuildMI(MBB, I, DL, get(AArch64::MRSxi), DestReg)
68 .addImm(A64SysReg::NZCV);
69 } else if (AArch64::GPR64RegClass.contains(DestReg)) {
70 if(AArch64::GPR64RegClass.contains(SrcReg)){
71 Opc = AArch64::ORRxxx_lsl;
72 ZeroReg = AArch64::XZR;
74 assert(AArch64::FPR64RegClass.contains(SrcReg));
75 BuildMI(MBB, I, DL, get(AArch64::FMOVxd), DestReg)
79 } else if (AArch64::GPR32RegClass.contains(DestReg)) {
80 if(AArch64::GPR32RegClass.contains(SrcReg)){
81 Opc = AArch64::ORRwww_lsl;
82 ZeroReg = AArch64::WZR;
84 assert(AArch64::FPR32RegClass.contains(SrcReg));
85 BuildMI(MBB, I, DL, get(AArch64::FMOVws), DestReg)
89 } else if (AArch64::FPR32RegClass.contains(DestReg)) {
90 if(AArch64::FPR32RegClass.contains(SrcReg)){
91 BuildMI(MBB, I, DL, get(AArch64::FMOVss), DestReg)
96 assert(AArch64::GPR32RegClass.contains(SrcReg));
97 BuildMI(MBB, I, DL, get(AArch64::FMOVsw), DestReg)
101 } else if (AArch64::FPR64RegClass.contains(DestReg)) {
102 if(AArch64::FPR64RegClass.contains(SrcReg)){
103 BuildMI(MBB, I, DL, get(AArch64::FMOVdd), DestReg)
108 assert(AArch64::GPR64RegClass.contains(SrcReg));
109 BuildMI(MBB, I, DL, get(AArch64::FMOVdx), DestReg)
113 } else if (AArch64::FPR128RegClass.contains(DestReg)) {
114 assert(AArch64::FPR128RegClass.contains(SrcReg));
116 // If NEON is enable, we use ORR to implement this copy.
117 // If NEON isn't available, emit STR and LDR to handle this.
118 if(getSubTarget().hasNEON()) {
119 BuildMI(MBB, I, DL, get(AArch64::ORRvvv_16B), DestReg)
124 BuildMI(MBB, I, DL, get(AArch64::LSFP128_PreInd_STR), AArch64::XSP)
126 .addReg(AArch64::XSP)
127 .addImm(0x1ff & -16);
129 BuildMI(MBB, I, DL, get(AArch64::LSFP128_PostInd_LDR), DestReg)
130 .addReg(AArch64::XSP, RegState::Define)
131 .addReg(AArch64::XSP)
135 } else if (AArch64::FPR8RegClass.contains(DestReg, SrcReg)) {
136 // The copy of two FPR8 registers is implemented by the copy of two FPR32
137 const TargetRegisterInfo *TRI = &getRegisterInfo();
138 unsigned Dst = TRI->getMatchingSuperReg(DestReg, AArch64::sub_8,
139 &AArch64::FPR32RegClass);
140 unsigned Src = TRI->getMatchingSuperReg(SrcReg, AArch64::sub_8,
141 &AArch64::FPR32RegClass);
142 BuildMI(MBB, I, DL, get(AArch64::FMOVss), Dst)
145 } else if (AArch64::FPR16RegClass.contains(DestReg, SrcReg)) {
146 // The copy of two FPR16 registers is implemented by the copy of two FPR32
147 const TargetRegisterInfo *TRI = &getRegisterInfo();
148 unsigned Dst = TRI->getMatchingSuperReg(DestReg, AArch64::sub_16,
149 &AArch64::FPR32RegClass);
150 unsigned Src = TRI->getMatchingSuperReg(SrcReg, AArch64::sub_16,
151 &AArch64::FPR32RegClass);
152 BuildMI(MBB, I, DL, get(AArch64::FMOVss), Dst)
156 CopyPhysRegTuple(MBB, I, DL, DestReg, SrcReg);
160 // E.g. ORR xDst, xzr, xSrc, lsl #0
161 BuildMI(MBB, I, DL, get(Opc), DestReg)
167 void AArch64InstrInfo::CopyPhysRegTuple(MachineBasicBlock &MBB,
168 MachineBasicBlock::iterator I,
169 DebugLoc DL, unsigned DestReg,
170 unsigned SrcReg) const {
173 if (AArch64::DPairRegClass.contains(DestReg, SrcReg)) {
176 } else if (AArch64::DTripleRegClass.contains(DestReg, SrcReg)) {
179 } else if (AArch64::DQuadRegClass.contains(DestReg, SrcReg)) {
182 } else if (AArch64::QPairRegClass.contains(DestReg, SrcReg)) {
185 } else if (AArch64::QTripleRegClass.contains(DestReg, SrcReg)) {
188 } else if (AArch64::QQuadRegClass.contains(DestReg, SrcReg)) {
192 llvm_unreachable("Unknown register class");
194 unsigned BeginIdx = IsQRegs ? AArch64::qsub_0 : AArch64::dsub_0;
196 const TargetRegisterInfo *TRI = &getRegisterInfo();
197 // Copy register tuples backward when the first Dest reg overlaps
199 if (TRI->regsOverlap(SrcReg, TRI->getSubReg(DestReg, BeginIdx))) {
200 BeginIdx = BeginIdx + (SubRegs - 1);
204 unsigned Opc = IsQRegs ? AArch64::ORRvvv_16B : AArch64::ORRvvv_8B;
205 for (unsigned i = 0; i != SubRegs; ++i) {
206 unsigned Dst = TRI->getSubReg(DestReg, BeginIdx + i * Spacing);
207 unsigned Src = TRI->getSubReg(SrcReg, BeginIdx + i * Spacing);
208 assert(Dst && Src && "Bad sub-register");
209 BuildMI(MBB, I, I->getDebugLoc(), get(Opc), Dst)
216 /// Does the Opcode represent a conditional branch that we can remove and re-add
217 /// at the end of a basic block?
218 static bool isCondBranch(unsigned Opc) {
219 return Opc == AArch64::Bcc || Opc == AArch64::CBZw || Opc == AArch64::CBZx ||
220 Opc == AArch64::CBNZw || Opc == AArch64::CBNZx ||
221 Opc == AArch64::TBZwii || Opc == AArch64::TBZxii ||
222 Opc == AArch64::TBNZwii || Opc == AArch64::TBNZxii;
225 /// Takes apart a given conditional branch MachineInstr (see isCondBranch),
226 /// setting TBB to the destination basic block and populating the Cond vector
227 /// with data necessary to recreate the conditional branch at a later
228 /// date. First element will be the opcode, and subsequent ones define the
229 /// conditions being branched on in an instruction-specific manner.
230 static void classifyCondBranch(MachineInstr *I, MachineBasicBlock *&TBB,
231 SmallVectorImpl<MachineOperand> &Cond) {
232 switch(I->getOpcode()) {
238 // These instructions just have one predicate operand in position 0 (either
239 // a condition code or a register being compared).
240 Cond.push_back(MachineOperand::CreateImm(I->getOpcode()));
241 Cond.push_back(I->getOperand(0));
242 TBB = I->getOperand(1).getMBB();
244 case AArch64::TBZwii:
245 case AArch64::TBZxii:
246 case AArch64::TBNZwii:
247 case AArch64::TBNZxii:
248 // These have two predicate operands: a register and a bit position.
249 Cond.push_back(MachineOperand::CreateImm(I->getOpcode()));
250 Cond.push_back(I->getOperand(0));
251 Cond.push_back(I->getOperand(1));
252 TBB = I->getOperand(2).getMBB();
255 llvm_unreachable("Unknown conditional branch to classify");
261 AArch64InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
262 MachineBasicBlock *&FBB,
263 SmallVectorImpl<MachineOperand> &Cond,
264 bool AllowModify) const {
265 // If the block has no terminators, it just falls into the block after it.
266 MachineBasicBlock::iterator I = MBB.end();
267 if (I == MBB.begin())
270 while (I->isDebugValue()) {
271 if (I == MBB.begin())
275 if (!isUnpredicatedTerminator(I))
278 // Get the last instruction in the block.
279 MachineInstr *LastInst = I;
281 // If there is only one terminator instruction, process it.
282 unsigned LastOpc = LastInst->getOpcode();
283 if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
284 if (LastOpc == AArch64::Bimm) {
285 TBB = LastInst->getOperand(0).getMBB();
288 if (isCondBranch(LastOpc)) {
289 classifyCondBranch(LastInst, TBB, Cond);
292 return true; // Can't handle indirect branch.
295 // Get the instruction before it if it is a terminator.
296 MachineInstr *SecondLastInst = I;
297 unsigned SecondLastOpc = SecondLastInst->getOpcode();
299 // If AllowModify is true and the block ends with two or more unconditional
300 // branches, delete all but the first unconditional branch.
301 if (AllowModify && LastOpc == AArch64::Bimm) {
302 while (SecondLastOpc == AArch64::Bimm) {
303 LastInst->eraseFromParent();
304 LastInst = SecondLastInst;
305 LastOpc = LastInst->getOpcode();
306 if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
307 // Return now the only terminator is an unconditional branch.
308 TBB = LastInst->getOperand(0).getMBB();
312 SecondLastOpc = SecondLastInst->getOpcode();
317 // If there are three terminators, we don't know what sort of block this is.
318 if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(--I))
321 // If the block ends with a B and a Bcc, handle it.
322 if (LastOpc == AArch64::Bimm) {
323 if (SecondLastOpc == AArch64::Bcc) {
324 TBB = SecondLastInst->getOperand(1).getMBB();
325 Cond.push_back(MachineOperand::CreateImm(AArch64::Bcc));
326 Cond.push_back(SecondLastInst->getOperand(0));
327 FBB = LastInst->getOperand(0).getMBB();
329 } else if (isCondBranch(SecondLastOpc)) {
330 classifyCondBranch(SecondLastInst, TBB, Cond);
331 FBB = LastInst->getOperand(0).getMBB();
336 // If the block ends with two unconditional branches, handle it. The second
337 // one is not executed, so remove it.
338 if (SecondLastOpc == AArch64::Bimm && LastOpc == AArch64::Bimm) {
339 TBB = SecondLastInst->getOperand(0).getMBB();
342 I->eraseFromParent();
346 // Otherwise, can't handle this.
350 bool AArch64InstrInfo::ReverseBranchCondition(
351 SmallVectorImpl<MachineOperand> &Cond) const {
352 switch (Cond[0].getImm()) {
354 A64CC::CondCodes CC = static_cast<A64CC::CondCodes>(Cond[1].getImm());
355 CC = A64InvertCondCode(CC);
360 Cond[0].setImm(AArch64::CBNZw);
363 Cond[0].setImm(AArch64::CBNZx);
366 Cond[0].setImm(AArch64::CBZw);
369 Cond[0].setImm(AArch64::CBZx);
371 case AArch64::TBZwii:
372 Cond[0].setImm(AArch64::TBNZwii);
374 case AArch64::TBZxii:
375 Cond[0].setImm(AArch64::TBNZxii);
377 case AArch64::TBNZwii:
378 Cond[0].setImm(AArch64::TBZwii);
380 case AArch64::TBNZxii:
381 Cond[0].setImm(AArch64::TBZxii);
384 llvm_unreachable("Unknown branch type");
390 AArch64InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
391 MachineBasicBlock *FBB,
392 const SmallVectorImpl<MachineOperand> &Cond,
394 if (!FBB && Cond.empty()) {
395 BuildMI(&MBB, DL, get(AArch64::Bimm)).addMBB(TBB);
398 MachineInstrBuilder MIB = BuildMI(&MBB, DL, get(Cond[0].getImm()));
399 for (int i = 1, e = Cond.size(); i != e; ++i)
400 MIB.addOperand(Cond[i]);
405 MachineInstrBuilder MIB = BuildMI(&MBB, DL, get(Cond[0].getImm()));
406 for (int i = 1, e = Cond.size(); i != e; ++i)
407 MIB.addOperand(Cond[i]);
410 BuildMI(&MBB, DL, get(AArch64::Bimm)).addMBB(FBB);
414 unsigned AArch64InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
415 MachineBasicBlock::iterator I = MBB.end();
416 if (I == MBB.begin()) return 0;
418 while (I->isDebugValue()) {
419 if (I == MBB.begin())
423 if (I->getOpcode() != AArch64::Bimm && !isCondBranch(I->getOpcode()))
426 // Remove the branch.
427 I->eraseFromParent();
431 if (I == MBB.begin()) return 1;
433 if (!isCondBranch(I->getOpcode()))
436 // Remove the branch.
437 I->eraseFromParent();
442 AArch64InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MBBI) const {
443 MachineInstr &MI = *MBBI;
444 MachineBasicBlock &MBB = *MI.getParent();
446 unsigned Opcode = MI.getOpcode();
448 case AArch64::TLSDESC_BLRx: {
449 MachineInstr *NewMI =
450 BuildMI(MBB, MBBI, MI.getDebugLoc(), get(AArch64::TLSDESCCALL))
451 .addOperand(MI.getOperand(1));
452 MI.setDesc(get(AArch64::BLRx));
454 llvm::finalizeBundle(MBB, NewMI, *++MBBI);
465 AArch64InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
466 MachineBasicBlock::iterator MBBI,
467 unsigned SrcReg, bool isKill,
469 const TargetRegisterClass *RC,
470 const TargetRegisterInfo *TRI) const {
471 DebugLoc DL = MBB.findDebugLoc(MBBI);
472 MachineFunction &MF = *MBB.getParent();
473 MachineFrameInfo &MFI = *MF.getFrameInfo();
474 unsigned Align = MFI.getObjectAlignment(FrameIdx);
476 MachineMemOperand *MMO
477 = MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx),
478 MachineMemOperand::MOStore,
479 MFI.getObjectSize(FrameIdx),
482 unsigned StoreOp = 0;
483 if (RC->hasType(MVT::i64) || RC->hasType(MVT::i32)) {
484 switch(RC->getSize()) {
485 case 4: StoreOp = AArch64::LS32_STR; break;
486 case 8: StoreOp = AArch64::LS64_STR; break;
488 llvm_unreachable("Unknown size for regclass");
490 } else if (AArch64::FPR8RegClass.hasSubClassEq(RC)) {
491 StoreOp = AArch64::LSFP8_STR;
492 } else if (AArch64::FPR16RegClass.hasSubClassEq(RC)) {
493 StoreOp = AArch64::LSFP16_STR;
494 } else if (RC->hasType(MVT::f32) || RC->hasType(MVT::f64) ||
495 RC->hasType(MVT::f128)) {
496 switch (RC->getSize()) {
497 case 4: StoreOp = AArch64::LSFP32_STR; break;
498 case 8: StoreOp = AArch64::LSFP64_STR; break;
499 case 16: StoreOp = AArch64::LSFP128_STR; break;
501 llvm_unreachable("Unknown size for regclass");
503 } else { // For a super register class has more than one sub registers
504 if (AArch64::DPairRegClass.hasSubClassEq(RC))
505 StoreOp = AArch64::ST1x2_8B;
506 else if (AArch64::DTripleRegClass.hasSubClassEq(RC))
507 StoreOp = AArch64::ST1x3_8B;
508 else if (AArch64::DQuadRegClass.hasSubClassEq(RC))
509 StoreOp = AArch64::ST1x4_8B;
510 else if (AArch64::QPairRegClass.hasSubClassEq(RC))
511 StoreOp = AArch64::ST1x2_16B;
512 else if (AArch64::QTripleRegClass.hasSubClassEq(RC))
513 StoreOp = AArch64::ST1x3_16B;
514 else if (AArch64::QQuadRegClass.hasSubClassEq(RC))
515 StoreOp = AArch64::ST1x4_16B;
517 llvm_unreachable("Unknown reg class");
519 MachineInstrBuilder NewMI = BuildMI(MBB, MBBI, DL, get(StoreOp));
520 // Vector store has different operands from other store instructions.
521 NewMI.addFrameIndex(FrameIdx)
522 .addReg(SrcReg, getKillRegState(isKill))
527 MachineInstrBuilder NewMI = BuildMI(MBB, MBBI, DL, get(StoreOp));
528 NewMI.addReg(SrcReg, getKillRegState(isKill))
529 .addFrameIndex(FrameIdx)
536 AArch64InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
537 MachineBasicBlock::iterator MBBI,
538 unsigned DestReg, int FrameIdx,
539 const TargetRegisterClass *RC,
540 const TargetRegisterInfo *TRI) const {
541 DebugLoc DL = MBB.findDebugLoc(MBBI);
542 MachineFunction &MF = *MBB.getParent();
543 MachineFrameInfo &MFI = *MF.getFrameInfo();
544 unsigned Align = MFI.getObjectAlignment(FrameIdx);
546 MachineMemOperand *MMO
547 = MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx),
548 MachineMemOperand::MOLoad,
549 MFI.getObjectSize(FrameIdx),
553 if (RC->hasType(MVT::i64) || RC->hasType(MVT::i32)) {
554 switch(RC->getSize()) {
555 case 4: LoadOp = AArch64::LS32_LDR; break;
556 case 8: LoadOp = AArch64::LS64_LDR; break;
558 llvm_unreachable("Unknown size for regclass");
560 } else if (AArch64::FPR8RegClass.hasSubClassEq(RC)) {
561 LoadOp = AArch64::LSFP8_LDR;
562 } else if (AArch64::FPR16RegClass.hasSubClassEq(RC)) {
563 LoadOp = AArch64::LSFP16_LDR;
564 } else if (RC->hasType(MVT::f32) || RC->hasType(MVT::f64) ||
565 RC->hasType(MVT::f128)) {
566 switch (RC->getSize()) {
567 case 4: LoadOp = AArch64::LSFP32_LDR; break;
568 case 8: LoadOp = AArch64::LSFP64_LDR; break;
569 case 16: LoadOp = AArch64::LSFP128_LDR; break;
571 llvm_unreachable("Unknown size for regclass");
573 } else { // For a super register class has more than one sub registers
574 if (AArch64::DPairRegClass.hasSubClassEq(RC))
575 LoadOp = AArch64::LD1x2_8B;
576 else if (AArch64::DTripleRegClass.hasSubClassEq(RC))
577 LoadOp = AArch64::LD1x3_8B;
578 else if (AArch64::DQuadRegClass.hasSubClassEq(RC))
579 LoadOp = AArch64::LD1x4_8B;
580 else if (AArch64::QPairRegClass.hasSubClassEq(RC))
581 LoadOp = AArch64::LD1x2_16B;
582 else if (AArch64::QTripleRegClass.hasSubClassEq(RC))
583 LoadOp = AArch64::LD1x3_16B;
584 else if (AArch64::QQuadRegClass.hasSubClassEq(RC))
585 LoadOp = AArch64::LD1x4_16B;
587 llvm_unreachable("Unknown reg class");
589 MachineInstrBuilder NewMI = BuildMI(MBB, MBBI, DL, get(LoadOp), DestReg);
590 // Vector load has different operands from other load instructions.
591 NewMI.addFrameIndex(FrameIdx)
596 MachineInstrBuilder NewMI = BuildMI(MBB, MBBI, DL, get(LoadOp), DestReg);
597 NewMI.addFrameIndex(FrameIdx)
602 unsigned AArch64InstrInfo::estimateRSStackLimit(MachineFunction &MF) const {
603 unsigned Limit = (1 << 16) - 1;
604 for (MachineFunction::iterator BB = MF.begin(),E = MF.end(); BB != E; ++BB) {
605 for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end();
607 for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
608 if (!I->getOperand(i).isFI()) continue;
610 // When using ADDxxi_lsl0_s to get the address of a stack object, 0xfff
611 // is the largest offset guaranteed to fit in the immediate offset.
612 if (I->getOpcode() == AArch64::ADDxxi_lsl0_s) {
613 Limit = std::min(Limit, 0xfffu);
617 int AccessScale, MinOffset, MaxOffset;
618 getAddressConstraints(*I, AccessScale, MinOffset, MaxOffset);
619 Limit = std::min(Limit, static_cast<unsigned>(MaxOffset));
621 break; // At most one FI per instruction
628 void AArch64InstrInfo::getAddressConstraints(const MachineInstr &MI,
629 int &AccessScale, int &MinOffset,
630 int &MaxOffset) const {
631 switch (MI.getOpcode()) {
633 llvm_unreachable("Unknown load/store kind");
634 case TargetOpcode::DBG_VALUE:
639 case AArch64::LS8_LDR: case AArch64::LS8_STR:
640 case AArch64::LSFP8_LDR: case AArch64::LSFP8_STR:
641 case AArch64::LDRSBw:
642 case AArch64::LDRSBx:
647 case AArch64::LS16_LDR: case AArch64::LS16_STR:
648 case AArch64::LSFP16_LDR: case AArch64::LSFP16_STR:
649 case AArch64::LDRSHw:
650 case AArch64::LDRSHx:
653 MaxOffset = 0xfff * AccessScale;
655 case AArch64::LS32_LDR: case AArch64::LS32_STR:
656 case AArch64::LSFP32_LDR: case AArch64::LSFP32_STR:
657 case AArch64::LDRSWx:
658 case AArch64::LDPSWx:
661 MaxOffset = 0xfff * AccessScale;
663 case AArch64::LS64_LDR: case AArch64::LS64_STR:
664 case AArch64::LSFP64_LDR: case AArch64::LSFP64_STR:
668 MaxOffset = 0xfff * AccessScale;
670 case AArch64::LSFP128_LDR: case AArch64::LSFP128_STR:
673 MaxOffset = 0xfff * AccessScale;
675 case AArch64::LSPair32_LDR: case AArch64::LSPair32_STR:
676 case AArch64::LSFPPair32_LDR: case AArch64::LSFPPair32_STR:
678 MinOffset = -0x40 * AccessScale;
679 MaxOffset = 0x3f * AccessScale;
681 case AArch64::LSPair64_LDR: case AArch64::LSPair64_STR:
682 case AArch64::LSFPPair64_LDR: case AArch64::LSFPPair64_STR:
684 MinOffset = -0x40 * AccessScale;
685 MaxOffset = 0x3f * AccessScale;
687 case AArch64::LSFPPair128_LDR: case AArch64::LSFPPair128_STR:
689 MinOffset = -0x40 * AccessScale;
690 MaxOffset = 0x3f * AccessScale;
692 case AArch64::LD1x2_8B: case AArch64::ST1x2_8B:
695 MaxOffset = 0xfff * AccessScale;
697 case AArch64::LD1x3_8B: case AArch64::ST1x3_8B:
700 MaxOffset = 0xfff * AccessScale;
702 case AArch64::LD1x4_8B: case AArch64::ST1x4_8B:
703 case AArch64::LD1x2_16B: case AArch64::ST1x2_16B:
706 MaxOffset = 0xfff * AccessScale;
708 case AArch64::LD1x3_16B: case AArch64::ST1x3_16B:
711 MaxOffset = 0xfff * AccessScale;
713 case AArch64::LD1x4_16B: case AArch64::ST1x4_16B:
716 MaxOffset = 0xfff * AccessScale;
721 unsigned AArch64InstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
722 const MCInstrDesc &MCID = MI.getDesc();
723 const MachineBasicBlock &MBB = *MI.getParent();
724 const MachineFunction &MF = *MBB.getParent();
725 const MCAsmInfo &MAI = *MF.getTarget().getMCAsmInfo();
728 return MCID.getSize();
730 if (MI.getOpcode() == AArch64::INLINEASM)
731 return getInlineAsmLength(MI.getOperand(0).getSymbolName(), MAI);
733 switch (MI.getOpcode()) {
734 case TargetOpcode::BUNDLE:
735 return getInstBundleLength(MI);
736 case TargetOpcode::IMPLICIT_DEF:
737 case TargetOpcode::KILL:
738 case TargetOpcode::CFI_INSTRUCTION:
739 case TargetOpcode::EH_LABEL:
740 case TargetOpcode::GC_LABEL:
741 case TargetOpcode::DBG_VALUE:
742 case AArch64::TLSDESCCALL:
745 llvm_unreachable("Unknown instruction class");
749 unsigned AArch64InstrInfo::getInstBundleLength(const MachineInstr &MI) const {
751 MachineBasicBlock::const_instr_iterator I = MI;
752 MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
753 while (++I != E && I->isInsideBundle()) {
754 assert(!I->isBundle() && "No nested bundle!");
755 Size += getInstSizeInBytes(*I);
760 bool llvm::rewriteA64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
761 unsigned FrameReg, int &Offset,
762 const AArch64InstrInfo &TII) {
763 MachineBasicBlock &MBB = *MI.getParent();
764 MachineFunction &MF = *MBB.getParent();
765 MachineFrameInfo &MFI = *MF.getFrameInfo();
767 MFI.getObjectOffset(FrameRegIdx);
768 llvm_unreachable("Unimplemented rewriteFrameIndex");
771 void llvm::emitRegUpdate(MachineBasicBlock &MBB,
772 MachineBasicBlock::iterator MBBI,
773 DebugLoc dl, const TargetInstrInfo &TII,
774 unsigned DstReg, unsigned SrcReg, unsigned ScratchReg,
775 int64_t NumBytes, MachineInstr::MIFlag MIFlags) {
776 if (NumBytes == 0 && DstReg == SrcReg)
778 else if (abs64(NumBytes) & ~0xffffff) {
779 // Generically, we have to materialize the offset into a temporary register
780 // and subtract it. There are a couple of ways this could be done, for now
781 // we'll use a movz/movk or movn/movk sequence.
782 uint64_t Bits = static_cast<uint64_t>(abs64(NumBytes));
783 BuildMI(MBB, MBBI, dl, TII.get(AArch64::MOVZxii), ScratchReg)
784 .addImm(0xffff & Bits).addImm(0)
785 .setMIFlags(MIFlags);
789 BuildMI(MBB, MBBI, dl, TII.get(AArch64::MOVKxii), ScratchReg)
791 .addImm(0xffff & Bits).addImm(1)
792 .setMIFlags(MIFlags);
797 BuildMI(MBB, MBBI, dl, TII.get(AArch64::MOVKxii), ScratchReg)
799 .addImm(0xffff & Bits).addImm(2)
800 .setMIFlags(MIFlags);
805 BuildMI(MBB, MBBI, dl, TII.get(AArch64::MOVKxii), ScratchReg)
807 .addImm(0xffff & Bits).addImm(3)
808 .setMIFlags(MIFlags);
811 // ADD DST, SRC, xTMP (, lsl #0)
812 unsigned AddOp = NumBytes > 0 ? AArch64::ADDxxx_uxtx : AArch64::SUBxxx_uxtx;
813 BuildMI(MBB, MBBI, dl, TII.get(AddOp), DstReg)
814 .addReg(SrcReg, RegState::Kill)
815 .addReg(ScratchReg, RegState::Kill)
821 // Now we know that the adjustment can be done in at most two add/sub
822 // (immediate) instructions, which is always more efficient than a
823 // literal-pool load, or even a hypothetical movz/movk/add sequence
825 // Decide whether we're doing addition or subtraction
826 unsigned LowOp, HighOp;
828 LowOp = AArch64::ADDxxi_lsl0_s;
829 HighOp = AArch64::ADDxxi_lsl12_s;
831 LowOp = AArch64::SUBxxi_lsl0_s;
832 HighOp = AArch64::SUBxxi_lsl12_s;
833 NumBytes = abs64(NumBytes);
836 // If we're here, at the very least a move needs to be produced, which just
837 // happens to be materializable by an ADD.
838 if ((NumBytes & 0xfff) || NumBytes == 0) {
839 BuildMI(MBB, MBBI, dl, TII.get(LowOp), DstReg)
840 .addReg(SrcReg, RegState::Kill)
841 .addImm(NumBytes & 0xfff)
844 // Next update should use the register we've just defined.
848 if (NumBytes & 0xfff000) {
849 BuildMI(MBB, MBBI, dl, TII.get(HighOp), DstReg)
850 .addReg(SrcReg, RegState::Kill)
851 .addImm(NumBytes >> 12)
856 void llvm::emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
857 DebugLoc dl, const TargetInstrInfo &TII,
858 unsigned ScratchReg, int64_t NumBytes,
859 MachineInstr::MIFlag MIFlags) {
860 emitRegUpdate(MBB, MI, dl, TII, AArch64::XSP, AArch64::XSP, AArch64::X16,
866 struct LDTLSCleanup : public MachineFunctionPass {
868 LDTLSCleanup() : MachineFunctionPass(ID) {}
870 bool runOnMachineFunction(MachineFunction &MF) override {
871 AArch64MachineFunctionInfo* MFI
872 = MF.getInfo<AArch64MachineFunctionInfo>();
873 if (MFI->getNumLocalDynamicTLSAccesses() < 2) {
874 // No point folding accesses if there isn't at least two.
878 MachineDominatorTree *DT = &getAnalysis<MachineDominatorTree>();
879 return VisitNode(DT->getRootNode(), 0);
882 // Visit the dominator subtree rooted at Node in pre-order.
883 // If TLSBaseAddrReg is non-null, then use that to replace any
884 // TLS_base_addr instructions. Otherwise, create the register
885 // when the first such instruction is seen, and then use it
886 // as we encounter more instructions.
887 bool VisitNode(MachineDomTreeNode *Node, unsigned TLSBaseAddrReg) {
888 MachineBasicBlock *BB = Node->getBlock();
889 bool Changed = false;
891 // Traverse the current block.
892 for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;
894 switch (I->getOpcode()) {
895 case AArch64::TLSDESC_BLRx:
896 // Make sure it's a local dynamic access.
897 if (!I->getOperand(1).isSymbol() ||
898 strcmp(I->getOperand(1).getSymbolName(), "_TLS_MODULE_BASE_"))
902 I = ReplaceTLSBaseAddrCall(I, TLSBaseAddrReg);
904 I = SetRegister(I, &TLSBaseAddrReg);
912 // Visit the children of this block in the dominator tree.
913 for (MachineDomTreeNode::iterator I = Node->begin(), E = Node->end();
915 Changed |= VisitNode(*I, TLSBaseAddrReg);
921 // Replace the TLS_base_addr instruction I with a copy from
922 // TLSBaseAddrReg, returning the new instruction.
923 MachineInstr *ReplaceTLSBaseAddrCall(MachineInstr *I,
924 unsigned TLSBaseAddrReg) {
925 MachineFunction *MF = I->getParent()->getParent();
926 const AArch64TargetMachine *TM =
927 static_cast<const AArch64TargetMachine *>(&MF->getTarget());
928 const AArch64InstrInfo *TII = TM->getInstrInfo();
930 // Insert a Copy from TLSBaseAddrReg to x0, which is where the rest of the
931 // code sequence assumes the address will be.
932 MachineInstr *Copy = BuildMI(*I->getParent(), I, I->getDebugLoc(),
933 TII->get(TargetOpcode::COPY),
935 .addReg(TLSBaseAddrReg);
937 // Erase the TLS_base_addr instruction.
938 I->eraseFromParent();
943 // Create a virtal register in *TLSBaseAddrReg, and populate it by
944 // inserting a copy instruction after I. Returns the new instruction.
945 MachineInstr *SetRegister(MachineInstr *I, unsigned *TLSBaseAddrReg) {
946 MachineFunction *MF = I->getParent()->getParent();
947 const AArch64TargetMachine *TM =
948 static_cast<const AArch64TargetMachine *>(&MF->getTarget());
949 const AArch64InstrInfo *TII = TM->getInstrInfo();
951 // Create a virtual register for the TLS base address.
952 MachineRegisterInfo &RegInfo = MF->getRegInfo();
953 *TLSBaseAddrReg = RegInfo.createVirtualRegister(&AArch64::GPR64RegClass);
955 // Insert a copy from X0 to TLSBaseAddrReg for later.
956 MachineInstr *Next = I->getNextNode();
957 MachineInstr *Copy = BuildMI(*I->getParent(), Next, I->getDebugLoc(),
958 TII->get(TargetOpcode::COPY),
960 .addReg(AArch64::X0);
965 const char *getPassName() const override {
966 return "Local Dynamic TLS Access Clean-up";
969 void getAnalysisUsage(AnalysisUsage &AU) const override {
970 AU.setPreservesCFG();
971 AU.addRequired<MachineDominatorTree>();
972 MachineFunctionPass::getAnalysisUsage(AU);
977 char LDTLSCleanup::ID = 0;
979 llvm::createAArch64CleanupLocalDynamicTLSPass() { return new LDTLSCleanup(); }