1 //==-- AArch64ExpandPseudoInsts.cpp - Expand pseudo instructions --*- C++ -*-=//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file contains a pass that expands pseudo instructions into target
11 // instructions to allow proper scheduling and other late optimizations. This
12 // pass should be run after register allocation but before the post-regalloc
15 //===----------------------------------------------------------------------===//
17 #include "MCTargetDesc/AArch64AddressingModes.h"
18 #include "AArch64InstrInfo.h"
19 #include "llvm/CodeGen/MachineFunctionPass.h"
20 #include "llvm/CodeGen/MachineInstrBuilder.h"
21 #include "llvm/Support/MathExtras.h"
25 class AArch64ExpandPseudo : public MachineFunctionPass {
28 AArch64ExpandPseudo() : MachineFunctionPass(ID) {}
30 const AArch64InstrInfo *TII;
32 bool runOnMachineFunction(MachineFunction &Fn) override;
34 const char *getPassName() const override {
35 return "AArch64 pseudo instruction expansion pass";
39 bool expandMBB(MachineBasicBlock &MBB);
40 bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI);
41 bool expandMOVImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
44 char AArch64ExpandPseudo::ID = 0;
47 /// \brief Transfer implicit operands on the pseudo instruction to the
48 /// instructions created from the expansion.
49 static void transferImpOps(MachineInstr &OldMI, MachineInstrBuilder &UseMI,
50 MachineInstrBuilder &DefMI) {
51 const MCInstrDesc &Desc = OldMI.getDesc();
52 for (unsigned i = Desc.getNumOperands(), e = OldMI.getNumOperands(); i != e;
54 const MachineOperand &MO = OldMI.getOperand(i);
55 assert(MO.isReg() && MO.getReg());
63 /// \brief Helper function which extracts the specified 16-bit chunk from a
65 static uint64_t getChunk(uint64_t Imm, unsigned ChunkIdx) {
66 assert(ChunkIdx < 4 && "Out of range chunk index specified!");
68 return (Imm >> (ChunkIdx * 16)) & 0xFFFF;
71 /// \brief Helper function which replicates a 16-bit chunk within a 64-bit
72 /// value. Indices correspond to element numbers in a v4i16.
73 static uint64_t replicateChunk(uint64_t Imm, unsigned FromIdx, unsigned ToIdx) {
74 assert((FromIdx < 4) && (ToIdx < 4) && "Out of range chunk index specified!");
75 const unsigned ShiftAmt = ToIdx * 16;
77 // Replicate the source chunk to the destination position.
78 const uint64_t Chunk = getChunk(Imm, FromIdx) << ShiftAmt;
79 // Clear the destination chunk.
80 Imm &= ~(0xFFFFLL << ShiftAmt);
81 // Insert the replicated chunk.
85 /// \brief Helper function which tries to materialize a 64-bit value with an
86 /// ORR + MOVK instruction sequence.
87 static bool tryOrrMovk(uint64_t UImm, uint64_t OrrImm, MachineInstr &MI,
88 MachineBasicBlock &MBB,
89 MachineBasicBlock::iterator &MBBI,
90 const AArch64InstrInfo *TII, unsigned ChunkIdx) {
91 assert(ChunkIdx < 4 && "Out of range chunk index specified!");
92 const unsigned ShiftAmt = ChunkIdx * 16;
95 if (AArch64_AM::processLogicalImmediate(OrrImm, 64, Encoding)) {
96 // Create the ORR-immediate instruction.
97 MachineInstrBuilder MIB =
98 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORRXri))
99 .addOperand(MI.getOperand(0))
100 .addReg(AArch64::XZR)
103 // Create the MOVK instruction.
104 const unsigned Imm16 = getChunk(UImm, ChunkIdx);
105 const unsigned DstReg = MI.getOperand(0).getReg();
106 const bool DstIsDead = MI.getOperand(0).isDead();
107 MachineInstrBuilder MIB1 =
108 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi))
109 .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
112 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt));
114 transferImpOps(MI, MIB, MIB1);
115 MI.eraseFromParent();
122 /// \brief Check whether the given 16-bit chunk replicated to full 64-bit width
123 /// can be materialized with an ORR instruction.
124 static bool canUseOrr(uint64_t Chunk, uint64_t &Encoding) {
125 Chunk = (Chunk << 48) | (Chunk << 32) | (Chunk << 16) | Chunk;
127 return AArch64_AM::processLogicalImmediate(Chunk, 64, Encoding);
130 /// \brief Check for identical 16-bit chunks within the constant and if so
131 /// materialize them with a single ORR instruction. The remaining one or two
132 /// 16-bit chunks will be materialized with MOVK instructions.
134 /// This allows us to materialize constants like |A|B|A|A| or |A|B|C|A| (order
135 /// of the chunks doesn't matter), assuming |A|A|A|A| can be materialized with
136 /// an ORR instruction.
138 static bool tryToreplicateChunks(uint64_t UImm, MachineInstr &MI,
139 MachineBasicBlock &MBB,
140 MachineBasicBlock::iterator &MBBI,
141 const AArch64InstrInfo *TII) {
142 typedef DenseMap<uint64_t, unsigned> CountMap;
145 // Scan the constant and count how often every chunk occurs.
146 for (unsigned Idx = 0; Idx < 4; ++Idx)
147 ++Counts[getChunk(UImm, Idx)];
149 // Traverse the chunks to find one which occurs more than once.
150 for (CountMap::const_iterator Chunk = Counts.begin(), End = Counts.end();
151 Chunk != End; ++Chunk) {
152 const uint64_t ChunkVal = Chunk->first;
153 const unsigned Count = Chunk->second;
155 uint64_t Encoding = 0;
157 // We are looking for chunks which have two or three instances and can be
158 // materialized with an ORR instruction.
159 if ((Count != 2 && Count != 3) || !canUseOrr(ChunkVal, Encoding))
162 const bool CountThree = Count == 3;
163 // Create the ORR-immediate instruction.
164 MachineInstrBuilder MIB =
165 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORRXri))
166 .addOperand(MI.getOperand(0))
167 .addReg(AArch64::XZR)
170 const unsigned DstReg = MI.getOperand(0).getReg();
171 const bool DstIsDead = MI.getOperand(0).isDead();
173 unsigned ShiftAmt = 0;
175 // Find the first chunk not materialized with the ORR instruction.
176 for (; ShiftAmt < 64; ShiftAmt += 16) {
177 Imm16 = (UImm >> ShiftAmt) & 0xFFFF;
179 if (Imm16 != ChunkVal)
183 // Create the first MOVK instruction.
184 MachineInstrBuilder MIB1 =
185 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi))
187 RegState::Define | getDeadRegState(DstIsDead && CountThree))
190 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt));
192 // In case we have three instances the whole constant is now materialized
195 transferImpOps(MI, MIB, MIB1);
196 MI.eraseFromParent();
200 // Find the remaining chunk which needs to be materialized.
201 for (ShiftAmt += 16; ShiftAmt < 64; ShiftAmt += 16) {
202 Imm16 = (UImm >> ShiftAmt) & 0xFFFF;
204 if (Imm16 != ChunkVal)
208 // Create the second MOVK instruction.
209 MachineInstrBuilder MIB2 =
210 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi))
211 .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
214 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt));
216 transferImpOps(MI, MIB, MIB2);
217 MI.eraseFromParent();
224 /// \brief Check whether this chunk matches the pattern '1...0...'. This pattern
225 /// starts a contiguous sequence of ones if we look at the bits from the LSB
227 static bool isStartChunk(uint64_t Chunk) {
228 if (Chunk == 0 || Chunk == UINT64_MAX)
231 return (CountLeadingOnes_64(Chunk) + countTrailingZeros(Chunk)) == 64;
234 /// \brief Check whether this chunk matches the pattern '0...1...' This pattern
235 /// ends a contiguous sequence of ones if we look at the bits from the LSB
237 static bool isEndChunk(uint64_t Chunk) {
238 if (Chunk == 0 || Chunk == UINT64_MAX)
241 return (countLeadingZeros(Chunk) + CountTrailingOnes_64(Chunk)) == 64;
244 /// \brief Clear or set all bits in the chunk at the given index.
245 static uint64_t updateImm(uint64_t Imm, unsigned Idx, bool Clear) {
246 const uint64_t Mask = 0xFFFF;
249 // Clear chunk in the immediate.
250 Imm &= ~(Mask << (Idx * 16));
252 // Set all bits in the immediate for the particular chunk.
253 Imm |= Mask << (Idx * 16);
258 /// \brief Check whether the constant contains a sequence of contiguous ones,
259 /// which might be interrupted by one or two chunks. If so, materialize the
260 /// sequence of contiguous ones with an ORR instruction.
261 /// Materialize the chunks which are either interrupting the sequence or outside
262 /// of the sequence with a MOVK instruction.
264 /// Assuming S is a chunk which starts the sequence (1...0...), E is a chunk
265 /// which ends the sequence (0...1...). Then we are looking for constants which
266 /// contain at least one S and E chunk.
267 /// E.g. |E|A|B|S|, |A|E|B|S| or |A|B|E|S|.
269 /// We are also looking for constants like |S|A|B|E| where the contiguous
270 /// sequence of ones wraps around the MSB into the LSB.
272 static bool trySequenceOfOnes(uint64_t UImm, MachineInstr &MI,
273 MachineBasicBlock &MBB,
274 MachineBasicBlock::iterator &MBBI,
275 const AArch64InstrInfo *TII) {
276 const int NotSet = -1;
277 const uint64_t Mask = 0xFFFF;
279 int StartIdx = NotSet;
281 // Try to find the chunks which start/end a contiguous sequence of ones.
282 for (int Idx = 0; Idx < 4; ++Idx) {
283 int64_t Chunk = getChunk(UImm, Idx);
284 // Sign extend the 16-bit chunk to 64-bit.
285 Chunk = (Chunk << 48) >> 48;
287 if (isStartChunk(Chunk))
289 else if (isEndChunk(Chunk))
293 // Early exit in case we can't find a start/end chunk.
294 if (StartIdx == NotSet || EndIdx == NotSet)
297 // Outside of the contiguous sequence of ones everything needs to be zero.
298 uint64_t Outside = 0;
299 // Chunks between the start and end chunk need to have all their bits set.
300 uint64_t Inside = Mask;
302 // If our contiguous sequence of ones wraps around from the MSB into the LSB,
303 // just swap indices and pretend we are materializing a contiguous sequence
304 // of zeros surrounded by a contiguous sequence of ones.
305 if (StartIdx > EndIdx) {
306 std::swap(StartIdx, EndIdx);
307 std::swap(Outside, Inside);
310 uint64_t OrrImm = UImm;
311 int FirstMovkIdx = NotSet;
312 int SecondMovkIdx = NotSet;
314 // Find out which chunks we need to patch up to obtain a contiguous sequence
316 for (int Idx = 0; Idx < 4; ++Idx) {
317 const uint64_t Chunk = getChunk(UImm, Idx);
319 // Check whether we are looking at a chunk which is not part of the
320 // contiguous sequence of ones.
321 if ((Idx < StartIdx || EndIdx < Idx) && Chunk != Outside) {
322 OrrImm = updateImm(OrrImm, Idx, Outside == 0);
324 // Remember the index we need to patch.
325 if (FirstMovkIdx == NotSet)
330 // Check whether we are looking a chunk which is part of the contiguous
332 } else if (Idx > StartIdx && Idx < EndIdx && Chunk != Inside) {
333 OrrImm = updateImm(OrrImm, Idx, Inside != Mask);
335 // Remember the index we need to patch.
336 if (FirstMovkIdx == NotSet)
342 assert(FirstMovkIdx != NotSet && "Constant materializable with single ORR!");
344 // Create the ORR-immediate instruction.
345 uint64_t Encoding = 0;
346 AArch64_AM::processLogicalImmediate(OrrImm, 64, Encoding);
347 MachineInstrBuilder MIB =
348 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORRXri))
349 .addOperand(MI.getOperand(0))
350 .addReg(AArch64::XZR)
353 const unsigned DstReg = MI.getOperand(0).getReg();
354 const bool DstIsDead = MI.getOperand(0).isDead();
356 const bool SingleMovk = SecondMovkIdx == NotSet;
357 // Create the first MOVK instruction.
358 MachineInstrBuilder MIB1 =
359 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi))
361 RegState::Define | getDeadRegState(DstIsDead && SingleMovk))
363 .addImm(getChunk(UImm, FirstMovkIdx))
365 AArch64_AM::getShifterImm(AArch64_AM::LSL, FirstMovkIdx * 16));
367 // Early exit in case we only need to emit a single MOVK instruction.
369 transferImpOps(MI, MIB, MIB1);
370 MI.eraseFromParent();
374 // Create the second MOVK instruction.
375 MachineInstrBuilder MIB2 =
376 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi))
377 .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
379 .addImm(getChunk(UImm, SecondMovkIdx))
381 AArch64_AM::getShifterImm(AArch64_AM::LSL, SecondMovkIdx * 16));
383 transferImpOps(MI, MIB, MIB2);
384 MI.eraseFromParent();
388 /// \brief Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more
389 /// real move-immediate instructions to synthesize the immediate.
390 bool AArch64ExpandPseudo::expandMOVImm(MachineBasicBlock &MBB,
391 MachineBasicBlock::iterator MBBI,
393 MachineInstr &MI = *MBBI;
394 uint64_t Imm = MI.getOperand(1).getImm();
395 const unsigned Mask = 0xFFFF;
397 // Try a MOVI instruction (aka ORR-immediate with the zero register).
398 uint64_t UImm = Imm << (64 - BitSize) >> (64 - BitSize);
400 if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) {
401 unsigned Opc = (BitSize == 32 ? AArch64::ORRWri : AArch64::ORRXri);
402 MachineInstrBuilder MIB =
403 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc))
404 .addOperand(MI.getOperand(0))
405 .addReg(BitSize == 32 ? AArch64::WZR : AArch64::XZR)
407 transferImpOps(MI, MIB, MIB);
408 MI.eraseFromParent();
412 // Scan the immediate and count the number of 16-bit chunks which are either
413 // all ones or all zeros.
414 unsigned OneChunks = 0;
415 unsigned ZeroChunks = 0;
416 for (unsigned Shift = 0; Shift < BitSize; Shift += 16) {
417 const unsigned Chunk = (Imm >> Shift) & Mask;
424 // Since we can't materialize the constant with a single ORR instruction,
425 // let's see whether we can materialize 3/4 of the constant with an ORR
426 // instruction and use an additional MOVK instruction to materialize the
429 // We are looking for constants with a pattern like: |A|X|B|X| or |X|A|X|B|.
431 // E.g. assuming |A|X|A|X| is a pattern which can be materialized with ORR,
432 // we would create the following instruction sequence:
434 // ORR x0, xzr, |A|X|A|X|
435 // MOVK x0, |B|, LSL #16
437 // Only look at 64-bit constants which can't be materialized with a single
438 // instruction e.g. which have less than either three all zero or all one
441 // Ignore 32-bit constants here, they always can be materialized with a
442 // MOVZ/MOVN + MOVK pair. Since the 32-bit constant can't be materialized
443 // with a single ORR, the best sequence we can achieve is a ORR + MOVK pair.
444 // Thus we fall back to the default code below which in the best case creates
445 // a single MOVZ/MOVN instruction (in case one chunk is all zero or all one).
447 if (BitSize == 64 && OneChunks < 3 && ZeroChunks < 3) {
448 // If we interpret the 64-bit constant as a v4i16, are elements 0 and 2
450 if (getChunk(UImm, 0) == getChunk(UImm, 2)) {
451 // See if we can come up with a constant which can be materialized with
452 // ORR-immediate by replicating element 3 into element 1.
453 uint64_t OrrImm = replicateChunk(UImm, 3, 1);
454 if (tryOrrMovk(UImm, OrrImm, MI, MBB, MBBI, TII, 1))
457 // See if we can come up with a constant which can be materialized with
458 // ORR-immediate by replicating element 1 into element 3.
459 OrrImm = replicateChunk(UImm, 1, 3);
460 if (tryOrrMovk(UImm, OrrImm, MI, MBB, MBBI, TII, 3))
463 // If we interpret the 64-bit constant as a v4i16, are elements 1 and 3
465 } else if (getChunk(UImm, 1) == getChunk(UImm, 3)) {
466 // See if we can come up with a constant which can be materialized with
467 // ORR-immediate by replicating element 2 into element 0.
468 uint64_t OrrImm = replicateChunk(UImm, 2, 0);
469 if (tryOrrMovk(UImm, OrrImm, MI, MBB, MBBI, TII, 0))
472 // See if we can come up with a constant which can be materialized with
473 // ORR-immediate by replicating element 1 into element 3.
474 OrrImm = replicateChunk(UImm, 0, 2);
475 if (tryOrrMovk(UImm, OrrImm, MI, MBB, MBBI, TII, 2))
480 // Check for identical 16-bit chunks within the constant and if so materialize
481 // them with a single ORR instruction. The remaining one or two 16-bit chunks
482 // will be materialized with MOVK instructions.
483 if (BitSize == 64 && tryToreplicateChunks(UImm, MI, MBB, MBBI, TII))
486 // Check whether the constant contains a sequence of contiguous ones, which
487 // might be interrupted by one or two chunks. If so, materialize the sequence
488 // of contiguous ones with an ORR instruction. Materialize the chunks which
489 // are either interrupting the sequence or outside of the sequence with a
491 if (BitSize == 64 && trySequenceOfOnes(UImm, MI, MBB, MBBI, TII))
494 // Use a MOVZ or MOVN instruction to set the high bits, followed by one or
495 // more MOVK instructions to insert additional 16-bit portions into the
499 // Use MOVN to materialize the high bits if we have more all one chunks
500 // than all zero chunks.
501 if (OneChunks > ZeroChunks) {
508 Imm &= (1LL << 32) - 1;
509 FirstOpc = (isNeg ? AArch64::MOVNWi : AArch64::MOVZWi);
511 FirstOpc = (isNeg ? AArch64::MOVNXi : AArch64::MOVZXi);
513 unsigned Shift = 0; // LSL amount for high bits with MOVZ/MOVN
514 unsigned LastShift = 0; // LSL amount for last MOVK
516 unsigned LZ = countLeadingZeros(Imm);
517 unsigned TZ = countTrailingZeros(Imm);
518 Shift = ((63 - LZ) / 16) * 16;
519 LastShift = (TZ / 16) * 16;
521 unsigned Imm16 = (Imm >> Shift) & Mask;
522 unsigned DstReg = MI.getOperand(0).getReg();
523 bool DstIsDead = MI.getOperand(0).isDead();
524 MachineInstrBuilder MIB1 =
525 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(FirstOpc))
526 .addReg(DstReg, RegState::Define |
527 getDeadRegState(DstIsDead && Shift == LastShift))
529 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, Shift));
531 // If a MOVN was used for the high bits of a negative value, flip the rest
532 // of the bits back for use with MOVK.
536 if (Shift == LastShift) {
537 transferImpOps(MI, MIB1, MIB1);
538 MI.eraseFromParent();
542 MachineInstrBuilder MIB2;
543 unsigned Opc = (BitSize == 32 ? AArch64::MOVKWi : AArch64::MOVKXi);
544 while (Shift != LastShift) {
546 Imm16 = (Imm >> Shift) & Mask;
547 if (Imm16 == (isNeg ? Mask : 0))
548 continue; // This 16-bit portion is already set correctly.
549 MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc))
552 getDeadRegState(DstIsDead && Shift == LastShift))
555 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, Shift));
558 transferImpOps(MI, MIB1, MIB2);
559 MI.eraseFromParent();
563 /// \brief If MBBI references a pseudo instruction that should be expanded here,
564 /// do the expansion and return true. Otherwise return false.
565 bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
566 MachineBasicBlock::iterator MBBI) {
567 MachineInstr &MI = *MBBI;
568 unsigned Opcode = MI.getOpcode();
573 case AArch64::ADDWrr:
574 case AArch64::SUBWrr:
575 case AArch64::ADDXrr:
576 case AArch64::SUBXrr:
577 case AArch64::ADDSWrr:
578 case AArch64::SUBSWrr:
579 case AArch64::ADDSXrr:
580 case AArch64::SUBSXrr:
581 case AArch64::ANDWrr:
582 case AArch64::ANDXrr:
583 case AArch64::BICWrr:
584 case AArch64::BICXrr:
585 case AArch64::ANDSWrr:
586 case AArch64::ANDSXrr:
587 case AArch64::BICSWrr:
588 case AArch64::BICSXrr:
589 case AArch64::EONWrr:
590 case AArch64::EONXrr:
591 case AArch64::EORWrr:
592 case AArch64::EORXrr:
593 case AArch64::ORNWrr:
594 case AArch64::ORNXrr:
595 case AArch64::ORRWrr:
596 case AArch64::ORRXrr: {
598 switch (MI.getOpcode()) {
601 case AArch64::ADDWrr: Opcode = AArch64::ADDWrs; break;
602 case AArch64::SUBWrr: Opcode = AArch64::SUBWrs; break;
603 case AArch64::ADDXrr: Opcode = AArch64::ADDXrs; break;
604 case AArch64::SUBXrr: Opcode = AArch64::SUBXrs; break;
605 case AArch64::ADDSWrr: Opcode = AArch64::ADDSWrs; break;
606 case AArch64::SUBSWrr: Opcode = AArch64::SUBSWrs; break;
607 case AArch64::ADDSXrr: Opcode = AArch64::ADDSXrs; break;
608 case AArch64::SUBSXrr: Opcode = AArch64::SUBSXrs; break;
609 case AArch64::ANDWrr: Opcode = AArch64::ANDWrs; break;
610 case AArch64::ANDXrr: Opcode = AArch64::ANDXrs; break;
611 case AArch64::BICWrr: Opcode = AArch64::BICWrs; break;
612 case AArch64::BICXrr: Opcode = AArch64::BICXrs; break;
613 case AArch64::ANDSWrr: Opcode = AArch64::ANDSWrs; break;
614 case AArch64::ANDSXrr: Opcode = AArch64::ANDSXrs; break;
615 case AArch64::BICSWrr: Opcode = AArch64::BICSWrs; break;
616 case AArch64::BICSXrr: Opcode = AArch64::BICSXrs; break;
617 case AArch64::EONWrr: Opcode = AArch64::EONWrs; break;
618 case AArch64::EONXrr: Opcode = AArch64::EONXrs; break;
619 case AArch64::EORWrr: Opcode = AArch64::EORWrs; break;
620 case AArch64::EORXrr: Opcode = AArch64::EORXrs; break;
621 case AArch64::ORNWrr: Opcode = AArch64::ORNWrs; break;
622 case AArch64::ORNXrr: Opcode = AArch64::ORNXrs; break;
623 case AArch64::ORRWrr: Opcode = AArch64::ORRWrs; break;
624 case AArch64::ORRXrr: Opcode = AArch64::ORRXrs; break;
626 MachineInstrBuilder MIB1 =
627 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opcode),
628 MI.getOperand(0).getReg())
629 .addOperand(MI.getOperand(1))
630 .addOperand(MI.getOperand(2))
631 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
632 transferImpOps(MI, MIB1, MIB1);
633 MI.eraseFromParent();
637 case AArch64::FCVTSHpseudo: {
638 MachineOperand Src = MI.getOperand(1);
641 TII->getRegisterInfo().getSubReg(Src.getReg(), AArch64::hsub);
642 auto MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::FCVTSHr))
643 .addOperand(MI.getOperand(0))
644 .addReg(SrcH, RegState::Undef)
646 transferImpOps(MI, MIB, MIB);
647 MI.eraseFromParent();
650 case AArch64::LOADgot: {
651 // Expand into ADRP + LDR.
652 unsigned DstReg = MI.getOperand(0).getReg();
653 const MachineOperand &MO1 = MI.getOperand(1);
654 unsigned Flags = MO1.getTargetFlags();
655 MachineInstrBuilder MIB1 =
656 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg);
657 MachineInstrBuilder MIB2 =
658 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::LDRXui))
659 .addOperand(MI.getOperand(0))
662 if (MO1.isGlobal()) {
663 MIB1.addGlobalAddress(MO1.getGlobal(), 0, Flags | AArch64II::MO_PAGE);
664 MIB2.addGlobalAddress(MO1.getGlobal(), 0,
665 Flags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
666 } else if (MO1.isSymbol()) {
667 MIB1.addExternalSymbol(MO1.getSymbolName(), Flags | AArch64II::MO_PAGE);
668 MIB2.addExternalSymbol(MO1.getSymbolName(),
669 Flags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
671 assert(MO1.isCPI() &&
672 "Only expect globals, externalsymbols, or constant pools");
673 MIB1.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(),
674 Flags | AArch64II::MO_PAGE);
675 MIB2.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(),
676 Flags | AArch64II::MO_PAGEOFF |
680 transferImpOps(MI, MIB1, MIB2);
681 MI.eraseFromParent();
685 case AArch64::MOVaddr:
686 case AArch64::MOVaddrJT:
687 case AArch64::MOVaddrCP:
688 case AArch64::MOVaddrBA:
689 case AArch64::MOVaddrTLS:
690 case AArch64::MOVaddrEXT: {
691 // Expand into ADRP + ADD.
692 unsigned DstReg = MI.getOperand(0).getReg();
693 MachineInstrBuilder MIB1 =
694 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg)
695 .addOperand(MI.getOperand(1));
697 MachineInstrBuilder MIB2 =
698 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADDXri))
699 .addOperand(MI.getOperand(0))
701 .addOperand(MI.getOperand(2))
704 transferImpOps(MI, MIB1, MIB2);
705 MI.eraseFromParent();
709 case AArch64::MOVi32imm:
710 return expandMOVImm(MBB, MBBI, 32);
711 case AArch64::MOVi64imm:
712 return expandMOVImm(MBB, MBBI, 64);
713 case AArch64::RET_ReallyLR:
714 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::RET))
715 .addReg(AArch64::LR);
716 MI.eraseFromParent();
722 /// \brief Iterate over the instructions in basic block MBB and expand any
723 /// pseudo instructions. Return true if anything was modified.
724 bool AArch64ExpandPseudo::expandMBB(MachineBasicBlock &MBB) {
725 bool Modified = false;
727 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
729 MachineBasicBlock::iterator NMBBI = std::next(MBBI);
730 Modified |= expandMI(MBB, MBBI);
737 bool AArch64ExpandPseudo::runOnMachineFunction(MachineFunction &MF) {
738 TII = static_cast<const AArch64InstrInfo *>(MF.getTarget().getInstrInfo());
740 bool Modified = false;
742 Modified |= expandMBB(MBB);
746 /// \brief Returns an instance of the pseudo instruction expansion pass.
747 FunctionPass *llvm::createAArch64ExpandPseudoPass() {
748 return new AArch64ExpandPseudo();