1 //===-- ARMISelDAGToDAG.cpp - A dag to dag inst selector for ARM ----------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file defines an instruction selector for the ARM target.
12 //===----------------------------------------------------------------------===//
15 #include "ARMBaseInstrInfo.h"
16 #include "ARMTargetMachine.h"
17 #include "MCTargetDesc/ARMAddressingModes.h"
18 #include "llvm/ADT/StringSwitch.h"
19 #include "llvm/CodeGen/MachineFrameInfo.h"
20 #include "llvm/CodeGen/MachineFunction.h"
21 #include "llvm/CodeGen/MachineInstrBuilder.h"
22 #include "llvm/CodeGen/MachineRegisterInfo.h"
23 #include "llvm/CodeGen/SelectionDAG.h"
24 #include "llvm/CodeGen/SelectionDAGISel.h"
25 #include "llvm/IR/CallingConv.h"
26 #include "llvm/IR/Constants.h"
27 #include "llvm/IR/DerivedTypes.h"
28 #include "llvm/IR/Function.h"
29 #include "llvm/IR/Intrinsics.h"
30 #include "llvm/IR/LLVMContext.h"
31 #include "llvm/Support/CommandLine.h"
32 #include "llvm/Support/Compiler.h"
33 #include "llvm/Support/Debug.h"
34 #include "llvm/Support/ErrorHandling.h"
35 #include "llvm/Target/TargetLowering.h"
36 #include "llvm/Target/TargetOptions.h"
40 #define DEBUG_TYPE "arm-isel"
43 DisableShifterOp("disable-shifter-op", cl::Hidden,
44 cl::desc("Disable isel of shifter-op"),
48 CheckVMLxHazard("check-vmlx-hazard", cl::Hidden,
49 cl::desc("Check fp vmla / vmls hazard at isel time"),
52 //===--------------------------------------------------------------------===//
53 /// ARMDAGToDAGISel - ARM specific code to select ARM machine
54 /// instructions for SelectionDAG operations.
59 AM2_BASE, // Simple AM2 (+-imm12)
60 AM2_SHOP // Shifter-op AM2
63 class ARMDAGToDAGISel : public SelectionDAGISel {
64 /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
65 /// make the right decision when generating code for different targets.
66 const ARMSubtarget *Subtarget;
69 explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm, CodeGenOpt::Level OptLevel)
70 : SelectionDAGISel(tm, OptLevel) {}
72 bool runOnMachineFunction(MachineFunction &MF) override {
73 // Reset the subtarget each time through.
74 Subtarget = &MF.getSubtarget<ARMSubtarget>();
75 SelectionDAGISel::runOnMachineFunction(MF);
79 const char *getPassName() const override {
80 return "ARM Instruction Selection";
83 void PreprocessISelDAG() override;
85 /// getI32Imm - Return a target constant of type i32 with the specified
87 inline SDValue getI32Imm(unsigned Imm, SDLoc dl) {
88 return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
91 SDNode *Select(SDNode *N) override;
94 bool hasNoVMLxHazardUse(SDNode *N) const;
95 bool isShifterOpProfitable(const SDValue &Shift,
96 ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt);
97 bool SelectRegShifterOperand(SDValue N, SDValue &A,
98 SDValue &B, SDValue &C,
99 bool CheckProfitability = true);
100 bool SelectImmShifterOperand(SDValue N, SDValue &A,
101 SDValue &B, bool CheckProfitability = true);
102 bool SelectShiftRegShifterOperand(SDValue N, SDValue &A,
103 SDValue &B, SDValue &C) {
104 // Don't apply the profitability check
105 return SelectRegShifterOperand(N, A, B, C, false);
107 bool SelectShiftImmShifterOperand(SDValue N, SDValue &A,
109 // Don't apply the profitability check
110 return SelectImmShifterOperand(N, A, B, false);
113 bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
114 bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc);
116 AddrMode2Type SelectAddrMode2Worker(SDValue N, SDValue &Base,
117 SDValue &Offset, SDValue &Opc);
118 bool SelectAddrMode2Base(SDValue N, SDValue &Base, SDValue &Offset,
120 return SelectAddrMode2Worker(N, Base, Offset, Opc) == AM2_BASE;
123 bool SelectAddrMode2ShOp(SDValue N, SDValue &Base, SDValue &Offset,
125 return SelectAddrMode2Worker(N, Base, Offset, Opc) == AM2_SHOP;
128 bool SelectAddrMode2(SDValue N, SDValue &Base, SDValue &Offset,
130 SelectAddrMode2Worker(N, Base, Offset, Opc);
131 // return SelectAddrMode2ShOp(N, Base, Offset, Opc);
132 // This always matches one way or another.
136 bool SelectCMOVPred(SDValue N, SDValue &Pred, SDValue &Reg) {
137 const ConstantSDNode *CN = cast<ConstantSDNode>(N);
138 Pred = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(N), MVT::i32);
139 Reg = CurDAG->getRegister(ARM::CPSR, MVT::i32);
143 bool SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
144 SDValue &Offset, SDValue &Opc);
145 bool SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
146 SDValue &Offset, SDValue &Opc);
147 bool SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
148 SDValue &Offset, SDValue &Opc);
149 bool SelectAddrOffsetNone(SDValue N, SDValue &Base);
150 bool SelectAddrMode3(SDValue N, SDValue &Base,
151 SDValue &Offset, SDValue &Opc);
152 bool SelectAddrMode3Offset(SDNode *Op, SDValue N,
153 SDValue &Offset, SDValue &Opc);
154 bool SelectAddrMode5(SDValue N, SDValue &Base,
156 bool SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,SDValue &Align);
157 bool SelectAddrMode6Offset(SDNode *Op, SDValue N, SDValue &Offset);
159 bool SelectAddrModePC(SDValue N, SDValue &Offset, SDValue &Label);
161 // Thumb Addressing Modes:
162 bool SelectThumbAddrModeRR(SDValue N, SDValue &Base, SDValue &Offset);
163 bool SelectThumbAddrModeRI(SDValue N, SDValue &Base, SDValue &Offset,
165 bool SelectThumbAddrModeRI5S1(SDValue N, SDValue &Base, SDValue &Offset);
166 bool SelectThumbAddrModeRI5S2(SDValue N, SDValue &Base, SDValue &Offset);
167 bool SelectThumbAddrModeRI5S4(SDValue N, SDValue &Base, SDValue &Offset);
168 bool SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, SDValue &Base,
170 bool SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
172 bool SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
174 bool SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
176 bool SelectThumbAddrModeSP(SDValue N, SDValue &Base, SDValue &OffImm);
178 // Thumb 2 Addressing Modes:
179 bool SelectT2ShifterOperandReg(SDValue N,
180 SDValue &BaseReg, SDValue &Opc);
181 bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
182 bool SelectT2AddrModeImm8(SDValue N, SDValue &Base,
184 bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
186 bool SelectT2AddrModeSoReg(SDValue N, SDValue &Base,
187 SDValue &OffReg, SDValue &ShImm);
188 bool SelectT2AddrModeExclusive(SDValue N, SDValue &Base, SDValue &OffImm);
190 inline bool is_so_imm(unsigned Imm) const {
191 return ARM_AM::getSOImmVal(Imm) != -1;
194 inline bool is_so_imm_not(unsigned Imm) const {
195 return ARM_AM::getSOImmVal(~Imm) != -1;
198 inline bool is_t2_so_imm(unsigned Imm) const {
199 return ARM_AM::getT2SOImmVal(Imm) != -1;
202 inline bool is_t2_so_imm_not(unsigned Imm) const {
203 return ARM_AM::getT2SOImmVal(~Imm) != -1;
206 // Include the pieces autogenerated from the target description.
207 #include "ARMGenDAGISel.inc"
210 /// SelectARMIndexedLoad - Indexed (pre/post inc/dec) load matching code for
212 SDNode *SelectARMIndexedLoad(SDNode *N);
213 SDNode *SelectT2IndexedLoad(SDNode *N);
215 /// SelectVLD - Select NEON load intrinsics. NumVecs should be
216 /// 1, 2, 3 or 4. The opcode arrays specify the instructions used for
217 /// loads of D registers and even subregs and odd subregs of Q registers.
218 /// For NumVecs <= 2, QOpcodes1 is not used.
219 SDNode *SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
220 const uint16_t *DOpcodes,
221 const uint16_t *QOpcodes0, const uint16_t *QOpcodes1);
223 /// SelectVST - Select NEON store intrinsics. NumVecs should
224 /// be 1, 2, 3 or 4. The opcode arrays specify the instructions used for
225 /// stores of D registers and even subregs and odd subregs of Q registers.
226 /// For NumVecs <= 2, QOpcodes1 is not used.
227 SDNode *SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
228 const uint16_t *DOpcodes,
229 const uint16_t *QOpcodes0, const uint16_t *QOpcodes1);
231 /// SelectVLDSTLane - Select NEON load/store lane intrinsics. NumVecs should
232 /// be 2, 3 or 4. The opcode arrays specify the instructions used for
233 /// load/store of D registers and Q registers.
234 SDNode *SelectVLDSTLane(SDNode *N, bool IsLoad,
235 bool isUpdating, unsigned NumVecs,
236 const uint16_t *DOpcodes, const uint16_t *QOpcodes);
238 /// SelectVLDDup - Select NEON load-duplicate intrinsics. NumVecs
239 /// should be 2, 3 or 4. The opcode array specifies the instructions used
240 /// for loading D registers. (Q registers are not supported.)
241 SDNode *SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs,
242 const uint16_t *Opcodes);
244 /// SelectVTBL - Select NEON VTBL and VTBX intrinsics. NumVecs should be 2,
245 /// 3 or 4. These are custom-selected so that a REG_SEQUENCE can be
246 /// generated to force the table registers to be consecutive.
247 SDNode *SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs, unsigned Opc);
249 /// SelectV6T2BitfieldExtractOp - Select SBFX/UBFX instructions for ARM.
250 SDNode *SelectV6T2BitfieldExtractOp(SDNode *N, bool isSigned);
252 // Select special operations if node forms integer ABS pattern
253 SDNode *SelectABSOp(SDNode *N);
255 SDNode *SelectReadRegister(SDNode *N);
256 SDNode *SelectWriteRegister(SDNode *N);
258 SDNode *SelectInlineAsm(SDNode *N);
260 SDNode *SelectConcatVector(SDNode *N);
262 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
263 /// inline asm expressions.
264 bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
265 std::vector<SDValue> &OutOps) override;
267 // Form pairs of consecutive R, S, D, or Q registers.
268 SDNode *createGPRPairNode(EVT VT, SDValue V0, SDValue V1);
269 SDNode *createSRegPairNode(EVT VT, SDValue V0, SDValue V1);
270 SDNode *createDRegPairNode(EVT VT, SDValue V0, SDValue V1);
271 SDNode *createQRegPairNode(EVT VT, SDValue V0, SDValue V1);
273 // Form sequences of 4 consecutive S, D, or Q registers.
274 SDNode *createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
275 SDNode *createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
276 SDNode *createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
278 // Get the alignment operand for a NEON VLD or VST instruction.
279 SDValue GetVLDSTAlign(SDValue Align, SDLoc dl, unsigned NumVecs,
284 /// isInt32Immediate - This method tests to see if the node is a 32-bit constant
285 /// operand. If so Imm will receive the 32-bit value.
286 static bool isInt32Immediate(SDNode *N, unsigned &Imm) {
287 if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) {
288 Imm = cast<ConstantSDNode>(N)->getZExtValue();
294 // isInt32Immediate - This method tests to see if a constant operand.
295 // If so Imm will receive the 32 bit value.
296 static bool isInt32Immediate(SDValue N, unsigned &Imm) {
297 return isInt32Immediate(N.getNode(), Imm);
300 // isOpcWithIntImmediate - This method tests to see if the node is a specific
301 // opcode and that it has a immediate integer right operand.
302 // If so Imm will receive the 32 bit value.
303 static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
304 return N->getOpcode() == Opc &&
305 isInt32Immediate(N->getOperand(1).getNode(), Imm);
308 /// \brief Check whether a particular node is a constant value representable as
309 /// (N * Scale) where (N in [\p RangeMin, \p RangeMax).
311 /// \param ScaledConstant [out] - On success, the pre-scaled constant value.
312 static bool isScaledConstantInRange(SDValue Node, int Scale,
313 int RangeMin, int RangeMax,
314 int &ScaledConstant) {
315 assert(Scale > 0 && "Invalid scale!");
317 // Check that this is a constant.
318 const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Node);
322 ScaledConstant = (int) C->getZExtValue();
323 if ((ScaledConstant % Scale) != 0)
326 ScaledConstant /= Scale;
327 return ScaledConstant >= RangeMin && ScaledConstant < RangeMax;
330 void ARMDAGToDAGISel::PreprocessISelDAG() {
331 if (!Subtarget->hasV6T2Ops())
334 bool isThumb2 = Subtarget->isThumb();
335 for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
336 E = CurDAG->allnodes_end(); I != E; ) {
337 SDNode *N = I++; // Preincrement iterator to avoid invalidation issues.
339 if (N->getOpcode() != ISD::ADD)
342 // Look for (add X1, (and (srl X2, c1), c2)) where c2 is constant with
343 // leading zeros, followed by consecutive set bits, followed by 1 or 2
344 // trailing zeros, e.g. 1020.
345 // Transform the expression to
346 // (add X1, (shl (and (srl X2, c1), (c2>>tz)), tz)) where tz is the number
347 // of trailing zeros of c2. The left shift would be folded as an shifter
348 // operand of 'add' and the 'and' and 'srl' would become a bits extraction
351 SDValue N0 = N->getOperand(0);
352 SDValue N1 = N->getOperand(1);
353 unsigned And_imm = 0;
354 if (!isOpcWithIntImmediate(N1.getNode(), ISD::AND, And_imm)) {
355 if (isOpcWithIntImmediate(N0.getNode(), ISD::AND, And_imm))
361 // Check if the AND mask is an immediate of the form: 000.....1111111100
362 unsigned TZ = countTrailingZeros(And_imm);
363 if (TZ != 1 && TZ != 2)
364 // Be conservative here. Shifter operands aren't always free. e.g. On
365 // Swift, left shifter operand of 1 / 2 for free but others are not.
367 // ubfx r3, r1, #16, #8
368 // ldr.w r3, [r0, r3, lsl #2]
371 // and.w r2, r9, r1, lsr #14
375 if (And_imm & (And_imm + 1))
378 // Look for (and (srl X, c1), c2).
379 SDValue Srl = N1.getOperand(0);
380 unsigned Srl_imm = 0;
381 if (!isOpcWithIntImmediate(Srl.getNode(), ISD::SRL, Srl_imm) ||
385 // Make sure first operand is not a shifter operand which would prevent
386 // folding of the left shift.
391 if (SelectT2ShifterOperandReg(N0, CPTmp0, CPTmp1))
394 if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1) ||
395 SelectRegShifterOperand(N0, CPTmp0, CPTmp1, CPTmp2))
399 // Now make the transformation.
400 Srl = CurDAG->getNode(ISD::SRL, SDLoc(Srl), MVT::i32,
402 CurDAG->getConstant(Srl_imm + TZ, SDLoc(Srl),
404 N1 = CurDAG->getNode(ISD::AND, SDLoc(N1), MVT::i32,
406 CurDAG->getConstant(And_imm, SDLoc(Srl), MVT::i32));
407 N1 = CurDAG->getNode(ISD::SHL, SDLoc(N1), MVT::i32,
408 N1, CurDAG->getConstant(TZ, SDLoc(Srl), MVT::i32));
409 CurDAG->UpdateNodeOperands(N, N0, N1);
413 /// hasNoVMLxHazardUse - Return true if it's desirable to select a FP MLA / MLS
414 /// node. VFP / NEON fp VMLA / VMLS instructions have special RAW hazards (at
415 /// least on current ARM implementations) which should be avoidded.
416 bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const {
417 if (OptLevel == CodeGenOpt::None)
420 if (!CheckVMLxHazard)
423 if (!Subtarget->isCortexA7() && !Subtarget->isCortexA8() &&
424 !Subtarget->isCortexA9() && !Subtarget->isSwift())
430 SDNode *Use = *N->use_begin();
431 if (Use->getOpcode() == ISD::CopyToReg)
433 if (Use->isMachineOpcode()) {
434 const ARMBaseInstrInfo *TII = static_cast<const ARMBaseInstrInfo *>(
435 CurDAG->getSubtarget().getInstrInfo());
437 const MCInstrDesc &MCID = TII->get(Use->getMachineOpcode());
440 unsigned Opcode = MCID.getOpcode();
441 if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
443 // vmlx feeding into another vmlx. We actually want to unfold
444 // the use later in the MLxExpansion pass. e.g.
446 // vmla (stall 8 cycles)
451 // This adds up to about 18 - 19 cycles.
454 // vmul (stall 4 cycles)
455 // vadd adds up to about 14 cycles.
456 return TII->isFpMLxInstruction(Opcode);
462 bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift,
463 ARM_AM::ShiftOpc ShOpcVal,
465 if (!Subtarget->isLikeA9() && !Subtarget->isSwift())
467 if (Shift.hasOneUse())
470 return ShOpcVal == ARM_AM::lsl &&
471 (ShAmt == 2 || (Subtarget->isSwift() && ShAmt == 1));
474 bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N,
477 bool CheckProfitability) {
478 if (DisableShifterOp)
481 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
483 // Don't match base register only case. That is matched to a separate
484 // lower complexity pattern with explicit register operand.
485 if (ShOpcVal == ARM_AM::no_shift) return false;
487 BaseReg = N.getOperand(0);
488 unsigned ShImmVal = 0;
489 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
490 if (!RHS) return false;
491 ShImmVal = RHS->getZExtValue() & 31;
492 Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
497 bool ARMDAGToDAGISel::SelectRegShifterOperand(SDValue N,
501 bool CheckProfitability) {
502 if (DisableShifterOp)
505 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
507 // Don't match base register only case. That is matched to a separate
508 // lower complexity pattern with explicit register operand.
509 if (ShOpcVal == ARM_AM::no_shift) return false;
511 BaseReg = N.getOperand(0);
512 unsigned ShImmVal = 0;
513 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
514 if (RHS) return false;
516 ShReg = N.getOperand(1);
517 if (CheckProfitability && !isShifterOpProfitable(N, ShOpcVal, ShImmVal))
519 Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
525 bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,
528 // Match simple R + imm12 operands.
531 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
532 !CurDAG->isBaseWithConstantOffset(N)) {
533 if (N.getOpcode() == ISD::FrameIndex) {
534 // Match frame index.
535 int FI = cast<FrameIndexSDNode>(N)->getIndex();
536 Base = CurDAG->getTargetFrameIndex(
537 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
538 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
542 if (N.getOpcode() == ARMISD::Wrapper &&
543 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) {
544 Base = N.getOperand(0);
547 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
551 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
552 int RHSC = (int)RHS->getSExtValue();
553 if (N.getOpcode() == ISD::SUB)
556 if (RHSC > -0x1000 && RHSC < 0x1000) { // 12 bits
557 Base = N.getOperand(0);
558 if (Base.getOpcode() == ISD::FrameIndex) {
559 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
560 Base = CurDAG->getTargetFrameIndex(
561 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
563 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
570 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
576 bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset,
578 if (N.getOpcode() == ISD::MUL &&
579 ((!Subtarget->isLikeA9() && !Subtarget->isSwift()) || N.hasOneUse())) {
580 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
581 // X * [3,5,9] -> X + X * [2,4,8] etc.
582 int RHSC = (int)RHS->getZExtValue();
585 ARM_AM::AddrOpc AddSub = ARM_AM::add;
587 AddSub = ARM_AM::sub;
590 if (isPowerOf2_32(RHSC)) {
591 unsigned ShAmt = Log2_32(RHSC);
592 Base = Offset = N.getOperand(0);
593 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt,
602 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
603 // ISD::OR that is equivalent to an ISD::ADD.
604 !CurDAG->isBaseWithConstantOffset(N))
607 // Leave simple R +/- imm12 operands for LDRi12
608 if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::OR) {
610 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
611 -0x1000+1, 0x1000, RHSC)) // 12 bits.
615 // Otherwise this is R +/- [possibly shifted] R.
616 ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::SUB ? ARM_AM::sub:ARM_AM::add;
617 ARM_AM::ShiftOpc ShOpcVal =
618 ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode());
621 Base = N.getOperand(0);
622 Offset = N.getOperand(1);
624 if (ShOpcVal != ARM_AM::no_shift) {
625 // Check to see if the RHS of the shift is a constant, if not, we can't fold
627 if (ConstantSDNode *Sh =
628 dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) {
629 ShAmt = Sh->getZExtValue();
630 if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt))
631 Offset = N.getOperand(1).getOperand(0);
634 ShOpcVal = ARM_AM::no_shift;
637 ShOpcVal = ARM_AM::no_shift;
641 // Try matching (R shl C) + (R).
642 if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift &&
643 !(Subtarget->isLikeA9() || Subtarget->isSwift() ||
644 N.getOperand(0).hasOneUse())) {
645 ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode());
646 if (ShOpcVal != ARM_AM::no_shift) {
647 // Check to see if the RHS of the shift is a constant, if not, we can't
649 if (ConstantSDNode *Sh =
650 dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) {
651 ShAmt = Sh->getZExtValue();
652 if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) {
653 Offset = N.getOperand(0).getOperand(0);
654 Base = N.getOperand(1);
657 ShOpcVal = ARM_AM::no_shift;
660 ShOpcVal = ARM_AM::no_shift;
665 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
673 AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N,
677 if (N.getOpcode() == ISD::MUL &&
678 (!(Subtarget->isLikeA9() || Subtarget->isSwift()) || N.hasOneUse())) {
679 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
680 // X * [3,5,9] -> X + X * [2,4,8] etc.
681 int RHSC = (int)RHS->getZExtValue();
684 ARM_AM::AddrOpc AddSub = ARM_AM::add;
686 AddSub = ARM_AM::sub;
689 if (isPowerOf2_32(RHSC)) {
690 unsigned ShAmt = Log2_32(RHSC);
691 Base = Offset = N.getOperand(0);
692 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt,
701 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
702 // ISD::OR that is equivalent to an ADD.
703 !CurDAG->isBaseWithConstantOffset(N)) {
705 if (N.getOpcode() == ISD::FrameIndex) {
706 int FI = cast<FrameIndexSDNode>(N)->getIndex();
707 Base = CurDAG->getTargetFrameIndex(
708 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
709 } else if (N.getOpcode() == ARMISD::Wrapper &&
710 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) {
711 Base = N.getOperand(0);
713 Offset = CurDAG->getRegister(0, MVT::i32);
714 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(ARM_AM::add, 0,
720 // Match simple R +/- imm12 operands.
721 if (N.getOpcode() != ISD::SUB) {
723 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
724 -0x1000+1, 0x1000, RHSC)) { // 12 bits.
725 Base = N.getOperand(0);
726 if (Base.getOpcode() == ISD::FrameIndex) {
727 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
728 Base = CurDAG->getTargetFrameIndex(
729 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
731 Offset = CurDAG->getRegister(0, MVT::i32);
733 ARM_AM::AddrOpc AddSub = ARM_AM::add;
735 AddSub = ARM_AM::sub;
738 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, RHSC,
745 if ((Subtarget->isLikeA9() || Subtarget->isSwift()) && !N.hasOneUse()) {
746 // Compute R +/- (R << N) and reuse it.
748 Offset = CurDAG->getRegister(0, MVT::i32);
749 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(ARM_AM::add, 0,
755 // Otherwise this is R +/- [possibly shifted] R.
756 ARM_AM::AddrOpc AddSub = N.getOpcode() != ISD::SUB ? ARM_AM::add:ARM_AM::sub;
757 ARM_AM::ShiftOpc ShOpcVal =
758 ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode());
761 Base = N.getOperand(0);
762 Offset = N.getOperand(1);
764 if (ShOpcVal != ARM_AM::no_shift) {
765 // Check to see if the RHS of the shift is a constant, if not, we can't fold
767 if (ConstantSDNode *Sh =
768 dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) {
769 ShAmt = Sh->getZExtValue();
770 if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt))
771 Offset = N.getOperand(1).getOperand(0);
774 ShOpcVal = ARM_AM::no_shift;
777 ShOpcVal = ARM_AM::no_shift;
781 // Try matching (R shl C) + (R).
782 if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift &&
783 !(Subtarget->isLikeA9() || Subtarget->isSwift() ||
784 N.getOperand(0).hasOneUse())) {
785 ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode());
786 if (ShOpcVal != ARM_AM::no_shift) {
787 // Check to see if the RHS of the shift is a constant, if not, we can't
789 if (ConstantSDNode *Sh =
790 dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) {
791 ShAmt = Sh->getZExtValue();
792 if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) {
793 Offset = N.getOperand(0).getOperand(0);
794 Base = N.getOperand(1);
797 ShOpcVal = ARM_AM::no_shift;
800 ShOpcVal = ARM_AM::no_shift;
805 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
810 bool ARMDAGToDAGISel::SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
811 SDValue &Offset, SDValue &Opc) {
812 unsigned Opcode = Op->getOpcode();
813 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
814 ? cast<LoadSDNode>(Op)->getAddressingMode()
815 : cast<StoreSDNode>(Op)->getAddressingMode();
816 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
817 ? ARM_AM::add : ARM_AM::sub;
819 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val))
823 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
825 if (ShOpcVal != ARM_AM::no_shift) {
826 // Check to see if the RHS of the shift is a constant, if not, we can't fold
828 if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
829 ShAmt = Sh->getZExtValue();
830 if (isShifterOpProfitable(N, ShOpcVal, ShAmt))
831 Offset = N.getOperand(0);
834 ShOpcVal = ARM_AM::no_shift;
837 ShOpcVal = ARM_AM::no_shift;
841 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
846 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
847 SDValue &Offset, SDValue &Opc) {
848 unsigned Opcode = Op->getOpcode();
849 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
850 ? cast<LoadSDNode>(Op)->getAddressingMode()
851 : cast<StoreSDNode>(Op)->getAddressingMode();
852 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
853 ? ARM_AM::add : ARM_AM::sub;
855 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
856 if (AddSub == ARM_AM::sub) Val *= -1;
857 Offset = CurDAG->getRegister(0, MVT::i32);
858 Opc = CurDAG->getTargetConstant(Val, SDLoc(Op), MVT::i32);
866 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
867 SDValue &Offset, SDValue &Opc) {
868 unsigned Opcode = Op->getOpcode();
869 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
870 ? cast<LoadSDNode>(Op)->getAddressingMode()
871 : cast<StoreSDNode>(Op)->getAddressingMode();
872 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
873 ? ARM_AM::add : ARM_AM::sub;
875 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
876 Offset = CurDAG->getRegister(0, MVT::i32);
877 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, Val,
879 SDLoc(Op), MVT::i32);
886 bool ARMDAGToDAGISel::SelectAddrOffsetNone(SDValue N, SDValue &Base) {
891 bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N,
892 SDValue &Base, SDValue &Offset,
894 if (N.getOpcode() == ISD::SUB) {
895 // X - C is canonicalize to X + -C, no need to handle it here.
896 Base = N.getOperand(0);
897 Offset = N.getOperand(1);
898 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::sub, 0), SDLoc(N),
903 if (!CurDAG->isBaseWithConstantOffset(N)) {
905 if (N.getOpcode() == ISD::FrameIndex) {
906 int FI = cast<FrameIndexSDNode>(N)->getIndex();
907 Base = CurDAG->getTargetFrameIndex(
908 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
910 Offset = CurDAG->getRegister(0, MVT::i32);
911 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),
916 // If the RHS is +/- imm8, fold into addr mode.
918 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
919 -256 + 1, 256, RHSC)) { // 8 bits.
920 Base = N.getOperand(0);
921 if (Base.getOpcode() == ISD::FrameIndex) {
922 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
923 Base = CurDAG->getTargetFrameIndex(
924 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
926 Offset = CurDAG->getRegister(0, MVT::i32);
928 ARM_AM::AddrOpc AddSub = ARM_AM::add;
930 AddSub = ARM_AM::sub;
933 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, RHSC), SDLoc(N),
938 Base = N.getOperand(0);
939 Offset = N.getOperand(1);
940 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),
945 bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode *Op, SDValue N,
946 SDValue &Offset, SDValue &Opc) {
947 unsigned Opcode = Op->getOpcode();
948 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
949 ? cast<LoadSDNode>(Op)->getAddressingMode()
950 : cast<StoreSDNode>(Op)->getAddressingMode();
951 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
952 ? ARM_AM::add : ARM_AM::sub;
954 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 256, Val)) { // 12 bits.
955 Offset = CurDAG->getRegister(0, MVT::i32);
956 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, Val), SDLoc(Op),
962 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, 0), SDLoc(Op),
967 bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N,
968 SDValue &Base, SDValue &Offset) {
969 if (!CurDAG->isBaseWithConstantOffset(N)) {
971 if (N.getOpcode() == ISD::FrameIndex) {
972 int FI = cast<FrameIndexSDNode>(N)->getIndex();
973 Base = CurDAG->getTargetFrameIndex(
974 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
975 } else if (N.getOpcode() == ARMISD::Wrapper &&
976 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) {
977 Base = N.getOperand(0);
979 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
984 // If the RHS is +/- imm8, fold into addr mode.
986 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4,
987 -256 + 1, 256, RHSC)) {
988 Base = N.getOperand(0);
989 if (Base.getOpcode() == ISD::FrameIndex) {
990 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
991 Base = CurDAG->getTargetFrameIndex(
992 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
995 ARM_AM::AddrOpc AddSub = ARM_AM::add;
997 AddSub = ARM_AM::sub;
1000 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(AddSub, RHSC),
1001 SDLoc(N), MVT::i32);
1006 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
1007 SDLoc(N), MVT::i32);
1011 bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,
1015 unsigned Alignment = 0;
1017 MemSDNode *MemN = cast<MemSDNode>(Parent);
1019 if (isa<LSBaseSDNode>(MemN) ||
1020 ((MemN->getOpcode() == ARMISD::VST1_UPD ||
1021 MemN->getOpcode() == ARMISD::VLD1_UPD) &&
1022 MemN->getConstantOperandVal(MemN->getNumOperands() - 1) == 1)) {
1023 // This case occurs only for VLD1-lane/dup and VST1-lane instructions.
1024 // The maximum alignment is equal to the memory size being referenced.
1025 unsigned MMOAlign = MemN->getAlignment();
1026 unsigned MemSize = MemN->getMemoryVT().getSizeInBits() / 8;
1027 if (MMOAlign >= MemSize && MemSize > 1)
1028 Alignment = MemSize;
1030 // All other uses of addrmode6 are for intrinsics. For now just record
1031 // the raw alignment value; it will be refined later based on the legal
1032 // alignment operands for the intrinsic.
1033 Alignment = MemN->getAlignment();
1036 Align = CurDAG->getTargetConstant(Alignment, SDLoc(N), MVT::i32);
1040 bool ARMDAGToDAGISel::SelectAddrMode6Offset(SDNode *Op, SDValue N,
1042 LSBaseSDNode *LdSt = cast<LSBaseSDNode>(Op);
1043 ISD::MemIndexedMode AM = LdSt->getAddressingMode();
1044 if (AM != ISD::POST_INC)
1047 if (ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N)) {
1048 if (NC->getZExtValue() * 8 == LdSt->getMemoryVT().getSizeInBits())
1049 Offset = CurDAG->getRegister(0, MVT::i32);
1054 bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N,
1055 SDValue &Offset, SDValue &Label) {
1056 if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) {
1057 Offset = N.getOperand(0);
1058 SDValue N1 = N.getOperand(1);
1059 Label = CurDAG->getTargetConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
1060 SDLoc(N), MVT::i32);
1068 //===----------------------------------------------------------------------===//
1069 // Thumb Addressing Modes
1070 //===----------------------------------------------------------------------===//
1072 bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue N,
1073 SDValue &Base, SDValue &Offset){
1074 if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) {
1075 ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N);
1076 if (!NC || !NC->isNullValue())
1083 Base = N.getOperand(0);
1084 Offset = N.getOperand(1);
1089 ARMDAGToDAGISel::SelectThumbAddrModeRI(SDValue N, SDValue &Base,
1090 SDValue &Offset, unsigned Scale) {
1092 SDValue TmpBase, TmpOffImm;
1093 if (SelectThumbAddrModeSP(N, TmpBase, TmpOffImm))
1094 return false; // We want to select tLDRspi / tSTRspi instead.
1096 if (N.getOpcode() == ARMISD::Wrapper &&
1097 N.getOperand(0).getOpcode() == ISD::TargetConstantPool)
1098 return false; // We want to select tLDRpci instead.
1101 if (!CurDAG->isBaseWithConstantOffset(N))
1104 // Thumb does not have [sp, r] address mode.
1105 RegisterSDNode *LHSR = dyn_cast<RegisterSDNode>(N.getOperand(0));
1106 RegisterSDNode *RHSR = dyn_cast<RegisterSDNode>(N.getOperand(1));
1107 if ((LHSR && LHSR->getReg() == ARM::SP) ||
1108 (RHSR && RHSR->getReg() == ARM::SP))
1111 // FIXME: Why do we explicitly check for a match here and then return false?
1112 // Presumably to allow something else to match, but shouldn't this be
1115 if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC))
1118 Base = N.getOperand(0);
1119 Offset = N.getOperand(1);
1124 ARMDAGToDAGISel::SelectThumbAddrModeRI5S1(SDValue N,
1127 return SelectThumbAddrModeRI(N, Base, Offset, 1);
1131 ARMDAGToDAGISel::SelectThumbAddrModeRI5S2(SDValue N,
1134 return SelectThumbAddrModeRI(N, Base, Offset, 2);
1138 ARMDAGToDAGISel::SelectThumbAddrModeRI5S4(SDValue N,
1141 return SelectThumbAddrModeRI(N, Base, Offset, 4);
1145 ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale,
1146 SDValue &Base, SDValue &OffImm) {
1148 SDValue TmpBase, TmpOffImm;
1149 if (SelectThumbAddrModeSP(N, TmpBase, TmpOffImm))
1150 return false; // We want to select tLDRspi / tSTRspi instead.
1152 if (N.getOpcode() == ARMISD::Wrapper &&
1153 N.getOperand(0).getOpcode() == ISD::TargetConstantPool)
1154 return false; // We want to select tLDRpci instead.
1157 if (!CurDAG->isBaseWithConstantOffset(N)) {
1158 if (N.getOpcode() == ARMISD::Wrapper &&
1159 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) {
1160 Base = N.getOperand(0);
1165 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1169 RegisterSDNode *LHSR = dyn_cast<RegisterSDNode>(N.getOperand(0));
1170 RegisterSDNode *RHSR = dyn_cast<RegisterSDNode>(N.getOperand(1));
1171 if ((LHSR && LHSR->getReg() == ARM::SP) ||
1172 (RHSR && RHSR->getReg() == ARM::SP)) {
1173 ConstantSDNode *LHS = dyn_cast<ConstantSDNode>(N.getOperand(0));
1174 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
1175 unsigned LHSC = LHS ? LHS->getZExtValue() : 0;
1176 unsigned RHSC = RHS ? RHS->getZExtValue() : 0;
1178 // Thumb does not have [sp, #imm5] address mode for non-zero imm5.
1179 if (LHSC != 0 || RHSC != 0) return false;
1182 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1186 // If the RHS is + imm5 * scale, fold into addr mode.
1188 if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC)) {
1189 Base = N.getOperand(0);
1190 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1194 Base = N.getOperand(0);
1195 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1200 ARMDAGToDAGISel::SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
1202 return SelectThumbAddrModeImm5S(N, 4, Base, OffImm);
1206 ARMDAGToDAGISel::SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
1208 return SelectThumbAddrModeImm5S(N, 2, Base, OffImm);
1212 ARMDAGToDAGISel::SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
1214 return SelectThumbAddrModeImm5S(N, 1, Base, OffImm);
1217 bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N,
1218 SDValue &Base, SDValue &OffImm) {
1219 if (N.getOpcode() == ISD::FrameIndex) {
1220 int FI = cast<FrameIndexSDNode>(N)->getIndex();
1221 // Only multiples of 4 are allowed for the offset, so the frame object
1222 // alignment must be at least 4.
1223 MachineFrameInfo *MFI = MF->getFrameInfo();
1224 if (MFI->getObjectAlignment(FI) < 4)
1225 MFI->setObjectAlignment(FI, 4);
1226 Base = CurDAG->getTargetFrameIndex(
1227 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1228 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1232 if (!CurDAG->isBaseWithConstantOffset(N))
1235 RegisterSDNode *LHSR = dyn_cast<RegisterSDNode>(N.getOperand(0));
1236 if (N.getOperand(0).getOpcode() == ISD::FrameIndex ||
1237 (LHSR && LHSR->getReg() == ARM::SP)) {
1238 // If the RHS is + imm8 * scale, fold into addr mode.
1240 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4, 0, 256, RHSC)) {
1241 Base = N.getOperand(0);
1242 if (Base.getOpcode() == ISD::FrameIndex) {
1243 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1244 // For LHS+RHS to result in an offset that's a multiple of 4 the object
1245 // indexed by the LHS must be 4-byte aligned.
1246 MachineFrameInfo *MFI = MF->getFrameInfo();
1247 if (MFI->getObjectAlignment(FI) < 4)
1248 MFI->setObjectAlignment(FI, 4);
1249 Base = CurDAG->getTargetFrameIndex(
1250 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1252 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1261 //===----------------------------------------------------------------------===//
1262 // Thumb 2 Addressing Modes
1263 //===----------------------------------------------------------------------===//
1266 bool ARMDAGToDAGISel::SelectT2ShifterOperandReg(SDValue N, SDValue &BaseReg,
1268 if (DisableShifterOp)
1271 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
1273 // Don't match base register only case. That is matched to a separate
1274 // lower complexity pattern with explicit register operand.
1275 if (ShOpcVal == ARM_AM::no_shift) return false;
1277 BaseReg = N.getOperand(0);
1278 unsigned ShImmVal = 0;
1279 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1280 ShImmVal = RHS->getZExtValue() & 31;
1281 Opc = getI32Imm(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal), SDLoc(N));
1288 bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N,
1289 SDValue &Base, SDValue &OffImm) {
1290 // Match simple R + imm12 operands.
1293 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1294 !CurDAG->isBaseWithConstantOffset(N)) {
1295 if (N.getOpcode() == ISD::FrameIndex) {
1296 // Match frame index.
1297 int FI = cast<FrameIndexSDNode>(N)->getIndex();
1298 Base = CurDAG->getTargetFrameIndex(
1299 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1300 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1304 if (N.getOpcode() == ARMISD::Wrapper &&
1305 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) {
1306 Base = N.getOperand(0);
1307 if (Base.getOpcode() == ISD::TargetConstantPool)
1308 return false; // We want to select t2LDRpci instead.
1311 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1315 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1316 if (SelectT2AddrModeImm8(N, Base, OffImm))
1317 // Let t2LDRi8 handle (R - imm8).
1320 int RHSC = (int)RHS->getZExtValue();
1321 if (N.getOpcode() == ISD::SUB)
1324 if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits (unsigned)
1325 Base = N.getOperand(0);
1326 if (Base.getOpcode() == ISD::FrameIndex) {
1327 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1328 Base = CurDAG->getTargetFrameIndex(
1329 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1331 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1338 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1342 bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N,
1343 SDValue &Base, SDValue &OffImm) {
1344 // Match simple R - imm8 operands.
1345 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1346 !CurDAG->isBaseWithConstantOffset(N))
1349 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1350 int RHSC = (int)RHS->getSExtValue();
1351 if (N.getOpcode() == ISD::SUB)
1354 if ((RHSC >= -255) && (RHSC < 0)) { // 8 bits (always negative)
1355 Base = N.getOperand(0);
1356 if (Base.getOpcode() == ISD::FrameIndex) {
1357 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1358 Base = CurDAG->getTargetFrameIndex(
1359 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1361 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1369 bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
1371 unsigned Opcode = Op->getOpcode();
1372 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
1373 ? cast<LoadSDNode>(Op)->getAddressingMode()
1374 : cast<StoreSDNode>(Op)->getAddressingMode();
1376 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x100, RHSC)) { // 8 bits.
1377 OffImm = ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC))
1378 ? CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32)
1379 : CurDAG->getTargetConstant(-RHSC, SDLoc(N), MVT::i32);
1386 bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N,
1388 SDValue &OffReg, SDValue &ShImm) {
1389 // (R - imm8) should be handled by t2LDRi8. The rest are handled by t2LDRi12.
1390 if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N))
1393 // Leave (R + imm12) for t2LDRi12, (R - imm8) for t2LDRi8.
1394 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1395 int RHSC = (int)RHS->getZExtValue();
1396 if (RHSC >= 0 && RHSC < 0x1000) // 12 bits (unsigned)
1398 else if (RHSC < 0 && RHSC >= -255) // 8 bits
1402 // Look for (R + R) or (R + (R << [1,2,3])).
1404 Base = N.getOperand(0);
1405 OffReg = N.getOperand(1);
1407 // Swap if it is ((R << c) + R).
1408 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(OffReg.getOpcode());
1409 if (ShOpcVal != ARM_AM::lsl) {
1410 ShOpcVal = ARM_AM::getShiftOpcForNode(Base.getOpcode());
1411 if (ShOpcVal == ARM_AM::lsl)
1412 std::swap(Base, OffReg);
1415 if (ShOpcVal == ARM_AM::lsl) {
1416 // Check to see if the RHS of the shift is a constant, if not, we can't fold
1418 if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(OffReg.getOperand(1))) {
1419 ShAmt = Sh->getZExtValue();
1420 if (ShAmt < 4 && isShifterOpProfitable(OffReg, ShOpcVal, ShAmt))
1421 OffReg = OffReg.getOperand(0);
1428 ShImm = CurDAG->getTargetConstant(ShAmt, SDLoc(N), MVT::i32);
1433 bool ARMDAGToDAGISel::SelectT2AddrModeExclusive(SDValue N, SDValue &Base,
1435 // This *must* succeed since it's used for the irreplaceable ldrex and strex
1438 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1440 if (N.getOpcode() != ISD::ADD || !CurDAG->isBaseWithConstantOffset(N))
1443 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
1447 uint32_t RHSC = (int)RHS->getZExtValue();
1448 if (RHSC > 1020 || RHSC % 4 != 0)
1451 Base = N.getOperand(0);
1452 if (Base.getOpcode() == ISD::FrameIndex) {
1453 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1454 Base = CurDAG->getTargetFrameIndex(
1455 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1458 OffImm = CurDAG->getTargetConstant(RHSC/4, SDLoc(N), MVT::i32);
1462 //===--------------------------------------------------------------------===//
1464 /// getAL - Returns a ARMCC::AL immediate node.
1465 static inline SDValue getAL(SelectionDAG *CurDAG, SDLoc dl) {
1466 return CurDAG->getTargetConstant((uint64_t)ARMCC::AL, dl, MVT::i32);
1469 SDNode *ARMDAGToDAGISel::SelectARMIndexedLoad(SDNode *N) {
1470 LoadSDNode *LD = cast<LoadSDNode>(N);
1471 ISD::MemIndexedMode AM = LD->getAddressingMode();
1472 if (AM == ISD::UNINDEXED)
1475 EVT LoadedVT = LD->getMemoryVT();
1476 SDValue Offset, AMOpc;
1477 bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1478 unsigned Opcode = 0;
1480 if (LoadedVT == MVT::i32 && isPre &&
1481 SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
1482 Opcode = ARM::LDR_PRE_IMM;
1484 } else if (LoadedVT == MVT::i32 && !isPre &&
1485 SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
1486 Opcode = ARM::LDR_POST_IMM;
1488 } else if (LoadedVT == MVT::i32 &&
1489 SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
1490 Opcode = isPre ? ARM::LDR_PRE_REG : ARM::LDR_POST_REG;
1493 } else if (LoadedVT == MVT::i16 &&
1494 SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
1496 Opcode = (LD->getExtensionType() == ISD::SEXTLOAD)
1497 ? (isPre ? ARM::LDRSH_PRE : ARM::LDRSH_POST)
1498 : (isPre ? ARM::LDRH_PRE : ARM::LDRH_POST);
1499 } else if (LoadedVT == MVT::i8 || LoadedVT == MVT::i1) {
1500 if (LD->getExtensionType() == ISD::SEXTLOAD) {
1501 if (SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
1503 Opcode = isPre ? ARM::LDRSB_PRE : ARM::LDRSB_POST;
1507 SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
1509 Opcode = ARM::LDRB_PRE_IMM;
1510 } else if (!isPre &&
1511 SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
1513 Opcode = ARM::LDRB_POST_IMM;
1514 } else if (SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
1516 Opcode = isPre ? ARM::LDRB_PRE_REG : ARM::LDRB_POST_REG;
1522 if (Opcode == ARM::LDR_PRE_IMM || Opcode == ARM::LDRB_PRE_IMM) {
1523 SDValue Chain = LD->getChain();
1524 SDValue Base = LD->getBasePtr();
1525 SDValue Ops[]= { Base, AMOpc, getAL(CurDAG, SDLoc(N)),
1526 CurDAG->getRegister(0, MVT::i32), Chain };
1527 return CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32,
1528 MVT::i32, MVT::Other, Ops);
1530 SDValue Chain = LD->getChain();
1531 SDValue Base = LD->getBasePtr();
1532 SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG, SDLoc(N)),
1533 CurDAG->getRegister(0, MVT::i32), Chain };
1534 return CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32,
1535 MVT::i32, MVT::Other, Ops);
1542 SDNode *ARMDAGToDAGISel::SelectT2IndexedLoad(SDNode *N) {
1543 LoadSDNode *LD = cast<LoadSDNode>(N);
1544 ISD::MemIndexedMode AM = LD->getAddressingMode();
1545 if (AM == ISD::UNINDEXED)
1548 EVT LoadedVT = LD->getMemoryVT();
1549 bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
1551 bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1552 unsigned Opcode = 0;
1554 if (SelectT2AddrModeImm8Offset(N, LD->getOffset(), Offset)) {
1555 switch (LoadedVT.getSimpleVT().SimpleTy) {
1557 Opcode = isPre ? ARM::t2LDR_PRE : ARM::t2LDR_POST;
1561 Opcode = isPre ? ARM::t2LDRSH_PRE : ARM::t2LDRSH_POST;
1563 Opcode = isPre ? ARM::t2LDRH_PRE : ARM::t2LDRH_POST;
1568 Opcode = isPre ? ARM::t2LDRSB_PRE : ARM::t2LDRSB_POST;
1570 Opcode = isPre ? ARM::t2LDRB_PRE : ARM::t2LDRB_POST;
1579 SDValue Chain = LD->getChain();
1580 SDValue Base = LD->getBasePtr();
1581 SDValue Ops[]= { Base, Offset, getAL(CurDAG, SDLoc(N)),
1582 CurDAG->getRegister(0, MVT::i32), Chain };
1583 return CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1590 /// \brief Form a GPRPair pseudo register from a pair of GPR regs.
1591 SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) {
1592 SDLoc dl(V0.getNode());
1594 CurDAG->getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32);
1595 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);
1596 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32);
1597 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1598 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1601 /// \brief Form a D register from a pair of S registers.
1602 SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1603 SDLoc dl(V0.getNode());
1605 CurDAG->getTargetConstant(ARM::DPR_VFP2RegClassID, dl, MVT::i32);
1606 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32);
1607 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32);
1608 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1609 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1612 /// \brief Form a quad register from a pair of D registers.
1613 SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1614 SDLoc dl(V0.getNode());
1615 SDValue RegClass = CurDAG->getTargetConstant(ARM::QPRRegClassID, dl,
1617 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32);
1618 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32);
1619 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1620 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1623 /// \brief Form 4 consecutive D registers from a pair of Q registers.
1624 SDNode *ARMDAGToDAGISel::createQRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1625 SDLoc dl(V0.getNode());
1626 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl,
1628 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32);
1629 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32);
1630 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1631 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1634 /// \brief Form 4 consecutive S registers.
1635 SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1,
1636 SDValue V2, SDValue V3) {
1637 SDLoc dl(V0.getNode());
1639 CurDAG->getTargetConstant(ARM::QPR_VFP2RegClassID, dl, MVT::i32);
1640 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32);
1641 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32);
1642 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::ssub_2, dl, MVT::i32);
1643 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::ssub_3, dl, MVT::i32);
1644 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1645 V2, SubReg2, V3, SubReg3 };
1646 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1649 /// \brief Form 4 consecutive D registers.
1650 SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1,
1651 SDValue V2, SDValue V3) {
1652 SDLoc dl(V0.getNode());
1653 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl,
1655 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32);
1656 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32);
1657 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::dsub_2, dl, MVT::i32);
1658 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, dl, MVT::i32);
1659 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1660 V2, SubReg2, V3, SubReg3 };
1661 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1664 /// \brief Form 4 consecutive Q registers.
1665 SDNode *ARMDAGToDAGISel::createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1,
1666 SDValue V2, SDValue V3) {
1667 SDLoc dl(V0.getNode());
1668 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQQQPRRegClassID, dl,
1670 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32);
1671 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32);
1672 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::qsub_2, dl, MVT::i32);
1673 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::qsub_3, dl, MVT::i32);
1674 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1675 V2, SubReg2, V3, SubReg3 };
1676 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1679 /// GetVLDSTAlign - Get the alignment (in bytes) for the alignment operand
1680 /// of a NEON VLD or VST instruction. The supported values depend on the
1681 /// number of registers being loaded.
1682 SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, SDLoc dl,
1683 unsigned NumVecs, bool is64BitVector) {
1684 unsigned NumRegs = NumVecs;
1685 if (!is64BitVector && NumVecs < 3)
1688 unsigned Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
1689 if (Alignment >= 32 && NumRegs == 4)
1691 else if (Alignment >= 16 && (NumRegs == 2 || NumRegs == 4))
1693 else if (Alignment >= 8)
1698 return CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
1701 static bool isVLDfixed(unsigned Opc)
1704 default: return false;
1705 case ARM::VLD1d8wb_fixed : return true;
1706 case ARM::VLD1d16wb_fixed : return true;
1707 case ARM::VLD1d64Qwb_fixed : return true;
1708 case ARM::VLD1d32wb_fixed : return true;
1709 case ARM::VLD1d64wb_fixed : return true;
1710 case ARM::VLD1d64TPseudoWB_fixed : return true;
1711 case ARM::VLD1d64QPseudoWB_fixed : return true;
1712 case ARM::VLD1q8wb_fixed : return true;
1713 case ARM::VLD1q16wb_fixed : return true;
1714 case ARM::VLD1q32wb_fixed : return true;
1715 case ARM::VLD1q64wb_fixed : return true;
1716 case ARM::VLD2d8wb_fixed : return true;
1717 case ARM::VLD2d16wb_fixed : return true;
1718 case ARM::VLD2d32wb_fixed : return true;
1719 case ARM::VLD2q8PseudoWB_fixed : return true;
1720 case ARM::VLD2q16PseudoWB_fixed : return true;
1721 case ARM::VLD2q32PseudoWB_fixed : return true;
1722 case ARM::VLD2DUPd8wb_fixed : return true;
1723 case ARM::VLD2DUPd16wb_fixed : return true;
1724 case ARM::VLD2DUPd32wb_fixed : return true;
1728 static bool isVSTfixed(unsigned Opc)
1731 default: return false;
1732 case ARM::VST1d8wb_fixed : return true;
1733 case ARM::VST1d16wb_fixed : return true;
1734 case ARM::VST1d32wb_fixed : return true;
1735 case ARM::VST1d64wb_fixed : return true;
1736 case ARM::VST1q8wb_fixed : return true;
1737 case ARM::VST1q16wb_fixed : return true;
1738 case ARM::VST1q32wb_fixed : return true;
1739 case ARM::VST1q64wb_fixed : return true;
1740 case ARM::VST1d64TPseudoWB_fixed : return true;
1741 case ARM::VST1d64QPseudoWB_fixed : return true;
1742 case ARM::VST2d8wb_fixed : return true;
1743 case ARM::VST2d16wb_fixed : return true;
1744 case ARM::VST2d32wb_fixed : return true;
1745 case ARM::VST2q8PseudoWB_fixed : return true;
1746 case ARM::VST2q16PseudoWB_fixed : return true;
1747 case ARM::VST2q32PseudoWB_fixed : return true;
1751 // Get the register stride update opcode of a VLD/VST instruction that
1752 // is otherwise equivalent to the given fixed stride updating instruction.
1753 static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) {
1754 assert((isVLDfixed(Opc) || isVSTfixed(Opc))
1755 && "Incorrect fixed stride updating instruction.");
1758 case ARM::VLD1d8wb_fixed: return ARM::VLD1d8wb_register;
1759 case ARM::VLD1d16wb_fixed: return ARM::VLD1d16wb_register;
1760 case ARM::VLD1d32wb_fixed: return ARM::VLD1d32wb_register;
1761 case ARM::VLD1d64wb_fixed: return ARM::VLD1d64wb_register;
1762 case ARM::VLD1q8wb_fixed: return ARM::VLD1q8wb_register;
1763 case ARM::VLD1q16wb_fixed: return ARM::VLD1q16wb_register;
1764 case ARM::VLD1q32wb_fixed: return ARM::VLD1q32wb_register;
1765 case ARM::VLD1q64wb_fixed: return ARM::VLD1q64wb_register;
1766 case ARM::VLD1d64Twb_fixed: return ARM::VLD1d64Twb_register;
1767 case ARM::VLD1d64Qwb_fixed: return ARM::VLD1d64Qwb_register;
1768 case ARM::VLD1d64TPseudoWB_fixed: return ARM::VLD1d64TPseudoWB_register;
1769 case ARM::VLD1d64QPseudoWB_fixed: return ARM::VLD1d64QPseudoWB_register;
1771 case ARM::VST1d8wb_fixed: return ARM::VST1d8wb_register;
1772 case ARM::VST1d16wb_fixed: return ARM::VST1d16wb_register;
1773 case ARM::VST1d32wb_fixed: return ARM::VST1d32wb_register;
1774 case ARM::VST1d64wb_fixed: return ARM::VST1d64wb_register;
1775 case ARM::VST1q8wb_fixed: return ARM::VST1q8wb_register;
1776 case ARM::VST1q16wb_fixed: return ARM::VST1q16wb_register;
1777 case ARM::VST1q32wb_fixed: return ARM::VST1q32wb_register;
1778 case ARM::VST1q64wb_fixed: return ARM::VST1q64wb_register;
1779 case ARM::VST1d64TPseudoWB_fixed: return ARM::VST1d64TPseudoWB_register;
1780 case ARM::VST1d64QPseudoWB_fixed: return ARM::VST1d64QPseudoWB_register;
1782 case ARM::VLD2d8wb_fixed: return ARM::VLD2d8wb_register;
1783 case ARM::VLD2d16wb_fixed: return ARM::VLD2d16wb_register;
1784 case ARM::VLD2d32wb_fixed: return ARM::VLD2d32wb_register;
1785 case ARM::VLD2q8PseudoWB_fixed: return ARM::VLD2q8PseudoWB_register;
1786 case ARM::VLD2q16PseudoWB_fixed: return ARM::VLD2q16PseudoWB_register;
1787 case ARM::VLD2q32PseudoWB_fixed: return ARM::VLD2q32PseudoWB_register;
1789 case ARM::VST2d8wb_fixed: return ARM::VST2d8wb_register;
1790 case ARM::VST2d16wb_fixed: return ARM::VST2d16wb_register;
1791 case ARM::VST2d32wb_fixed: return ARM::VST2d32wb_register;
1792 case ARM::VST2q8PseudoWB_fixed: return ARM::VST2q8PseudoWB_register;
1793 case ARM::VST2q16PseudoWB_fixed: return ARM::VST2q16PseudoWB_register;
1794 case ARM::VST2q32PseudoWB_fixed: return ARM::VST2q32PseudoWB_register;
1796 case ARM::VLD2DUPd8wb_fixed: return ARM::VLD2DUPd8wb_register;
1797 case ARM::VLD2DUPd16wb_fixed: return ARM::VLD2DUPd16wb_register;
1798 case ARM::VLD2DUPd32wb_fixed: return ARM::VLD2DUPd32wb_register;
1800 return Opc; // If not one we handle, return it unchanged.
1803 SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
1804 const uint16_t *DOpcodes,
1805 const uint16_t *QOpcodes0,
1806 const uint16_t *QOpcodes1) {
1807 assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range");
1810 SDValue MemAddr, Align;
1811 unsigned AddrOpIdx = isUpdating ? 1 : 2;
1812 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
1815 SDValue Chain = N->getOperand(0);
1816 EVT VT = N->getValueType(0);
1817 bool is64BitVector = VT.is64BitVector();
1818 Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
1820 unsigned OpcodeIndex;
1821 switch (VT.getSimpleVT().SimpleTy) {
1822 default: llvm_unreachable("unhandled vld type");
1823 // Double-register operations:
1824 case MVT::v8i8: OpcodeIndex = 0; break;
1825 case MVT::v4i16: OpcodeIndex = 1; break;
1827 case MVT::v2i32: OpcodeIndex = 2; break;
1828 case MVT::v1i64: OpcodeIndex = 3; break;
1829 // Quad-register operations:
1830 case MVT::v16i8: OpcodeIndex = 0; break;
1831 case MVT::v8i16: OpcodeIndex = 1; break;
1833 case MVT::v4i32: OpcodeIndex = 2; break;
1835 case MVT::v2i64: OpcodeIndex = 3;
1836 assert(NumVecs == 1 && "v2i64 type only supported for VLD1");
1844 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
1847 ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
1849 std::vector<EVT> ResTys;
1850 ResTys.push_back(ResTy);
1852 ResTys.push_back(MVT::i32);
1853 ResTys.push_back(MVT::Other);
1855 SDValue Pred = getAL(CurDAG, dl);
1856 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
1858 SmallVector<SDValue, 7> Ops;
1860 // Double registers and VLD1/VLD2 quad registers are directly supported.
1861 if (is64BitVector || NumVecs <= 2) {
1862 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
1863 QOpcodes0[OpcodeIndex]);
1864 Ops.push_back(MemAddr);
1865 Ops.push_back(Align);
1867 SDValue Inc = N->getOperand(AddrOpIdx + 1);
1868 // FIXME: VLD1/VLD2 fixed increment doesn't need Reg0. Remove the reg0
1869 // case entirely when the rest are updated to that form, too.
1870 if ((NumVecs <= 2) && !isa<ConstantSDNode>(Inc.getNode()))
1871 Opc = getVLDSTRegisterUpdateOpcode(Opc);
1872 // FIXME: We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so
1873 // check for that explicitly too. Horribly hacky, but temporary.
1874 if ((NumVecs > 2 && !isVLDfixed(Opc)) ||
1875 !isa<ConstantSDNode>(Inc.getNode()))
1876 Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc);
1878 Ops.push_back(Pred);
1879 Ops.push_back(Reg0);
1880 Ops.push_back(Chain);
1881 VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1884 // Otherwise, quad registers are loaded with two separate instructions,
1885 // where one loads the even registers and the other loads the odd registers.
1886 EVT AddrTy = MemAddr.getValueType();
1888 // Load the even subregs. This is always an updating load, so that it
1889 // provides the address to the second load for the odd subregs.
1891 SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
1892 const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain };
1893 SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
1894 ResTy, AddrTy, MVT::Other, OpsA);
1895 Chain = SDValue(VLdA, 2);
1897 // Load the odd subregs.
1898 Ops.push_back(SDValue(VLdA, 1));
1899 Ops.push_back(Align);
1901 SDValue Inc = N->getOperand(AddrOpIdx + 1);
1902 assert(isa<ConstantSDNode>(Inc.getNode()) &&
1903 "only constant post-increment update allowed for VLD3/4");
1905 Ops.push_back(Reg0);
1907 Ops.push_back(SDValue(VLdA, 0));
1908 Ops.push_back(Pred);
1909 Ops.push_back(Reg0);
1910 Ops.push_back(Chain);
1911 VLd = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, Ops);
1914 // Transfer memoperands.
1915 MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
1916 MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1917 cast<MachineSDNode>(VLd)->setMemRefs(MemOp, MemOp + 1);
1922 // Extract out the subregisters.
1923 SDValue SuperReg = SDValue(VLd, 0);
1924 assert(ARM::dsub_7 == ARM::dsub_0+7 &&
1925 ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering");
1926 unsigned Sub0 = (is64BitVector ? ARM::dsub_0 : ARM::qsub_0);
1927 for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
1928 ReplaceUses(SDValue(N, Vec),
1929 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
1930 ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1));
1932 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLd, 2));
1936 SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
1937 const uint16_t *DOpcodes,
1938 const uint16_t *QOpcodes0,
1939 const uint16_t *QOpcodes1) {
1940 assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range");
1943 SDValue MemAddr, Align;
1944 unsigned AddrOpIdx = isUpdating ? 1 : 2;
1945 unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
1946 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
1949 MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
1950 MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1952 SDValue Chain = N->getOperand(0);
1953 EVT VT = N->getOperand(Vec0Idx).getValueType();
1954 bool is64BitVector = VT.is64BitVector();
1955 Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
1957 unsigned OpcodeIndex;
1958 switch (VT.getSimpleVT().SimpleTy) {
1959 default: llvm_unreachable("unhandled vst type");
1960 // Double-register operations:
1961 case MVT::v8i8: OpcodeIndex = 0; break;
1962 case MVT::v4i16: OpcodeIndex = 1; break;
1964 case MVT::v2i32: OpcodeIndex = 2; break;
1965 case MVT::v1i64: OpcodeIndex = 3; break;
1966 // Quad-register operations:
1967 case MVT::v16i8: OpcodeIndex = 0; break;
1968 case MVT::v8i16: OpcodeIndex = 1; break;
1970 case MVT::v4i32: OpcodeIndex = 2; break;
1972 case MVT::v2i64: OpcodeIndex = 3;
1973 assert(NumVecs == 1 && "v2i64 type only supported for VST1");
1977 std::vector<EVT> ResTys;
1979 ResTys.push_back(MVT::i32);
1980 ResTys.push_back(MVT::Other);
1982 SDValue Pred = getAL(CurDAG, dl);
1983 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
1984 SmallVector<SDValue, 7> Ops;
1986 // Double registers and VST1/VST2 quad registers are directly supported.
1987 if (is64BitVector || NumVecs <= 2) {
1990 SrcReg = N->getOperand(Vec0Idx);
1991 } else if (is64BitVector) {
1992 // Form a REG_SEQUENCE to force register allocation.
1993 SDValue V0 = N->getOperand(Vec0Idx + 0);
1994 SDValue V1 = N->getOperand(Vec0Idx + 1);
1996 SrcReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
1998 SDValue V2 = N->getOperand(Vec0Idx + 2);
1999 // If it's a vst3, form a quad D-register and leave the last part as
2001 SDValue V3 = (NumVecs == 3)
2002 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
2003 : N->getOperand(Vec0Idx + 3);
2004 SrcReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
2007 // Form a QQ register.
2008 SDValue Q0 = N->getOperand(Vec0Idx);
2009 SDValue Q1 = N->getOperand(Vec0Idx + 1);
2010 SrcReg = SDValue(createQRegPairNode(MVT::v4i64, Q0, Q1), 0);
2013 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2014 QOpcodes0[OpcodeIndex]);
2015 Ops.push_back(MemAddr);
2016 Ops.push_back(Align);
2018 SDValue Inc = N->getOperand(AddrOpIdx + 1);
2019 // FIXME: VST1/VST2 fixed increment doesn't need Reg0. Remove the reg0
2020 // case entirely when the rest are updated to that form, too.
2021 if (NumVecs <= 2 && !isa<ConstantSDNode>(Inc.getNode()))
2022 Opc = getVLDSTRegisterUpdateOpcode(Opc);
2023 // FIXME: We use a VST1 for v1i64 even if the pseudo says vld2/3/4, so
2024 // check for that explicitly too. Horribly hacky, but temporary.
2025 if (!isa<ConstantSDNode>(Inc.getNode()))
2027 else if (NumVecs > 2 && !isVSTfixed(Opc))
2028 Ops.push_back(Reg0);
2030 Ops.push_back(SrcReg);
2031 Ops.push_back(Pred);
2032 Ops.push_back(Reg0);
2033 Ops.push_back(Chain);
2034 SDNode *VSt = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2036 // Transfer memoperands.
2037 cast<MachineSDNode>(VSt)->setMemRefs(MemOp, MemOp + 1);
2042 // Otherwise, quad registers are stored with two separate instructions,
2043 // where one stores the even registers and the other stores the odd registers.
2045 // Form the QQQQ REG_SEQUENCE.
2046 SDValue V0 = N->getOperand(Vec0Idx + 0);
2047 SDValue V1 = N->getOperand(Vec0Idx + 1);
2048 SDValue V2 = N->getOperand(Vec0Idx + 2);
2049 SDValue V3 = (NumVecs == 3)
2050 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2051 : N->getOperand(Vec0Idx + 3);
2052 SDValue RegSeq = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
2054 // Store the even D registers. This is always an updating store, so that it
2055 // provides the address to the second store for the odd subregs.
2056 const SDValue OpsA[] = { MemAddr, Align, Reg0, RegSeq, Pred, Reg0, Chain };
2057 SDNode *VStA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
2058 MemAddr.getValueType(),
2060 cast<MachineSDNode>(VStA)->setMemRefs(MemOp, MemOp + 1);
2061 Chain = SDValue(VStA, 1);
2063 // Store the odd D registers.
2064 Ops.push_back(SDValue(VStA, 0));
2065 Ops.push_back(Align);
2067 SDValue Inc = N->getOperand(AddrOpIdx + 1);
2068 assert(isa<ConstantSDNode>(Inc.getNode()) &&
2069 "only constant post-increment update allowed for VST3/4");
2071 Ops.push_back(Reg0);
2073 Ops.push_back(RegSeq);
2074 Ops.push_back(Pred);
2075 Ops.push_back(Reg0);
2076 Ops.push_back(Chain);
2077 SDNode *VStB = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys,
2079 cast<MachineSDNode>(VStB)->setMemRefs(MemOp, MemOp + 1);
2083 SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad,
2084 bool isUpdating, unsigned NumVecs,
2085 const uint16_t *DOpcodes,
2086 const uint16_t *QOpcodes) {
2087 assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range");
2090 SDValue MemAddr, Align;
2091 unsigned AddrOpIdx = isUpdating ? 1 : 2;
2092 unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
2093 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2096 MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
2097 MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2099 SDValue Chain = N->getOperand(0);
2101 cast<ConstantSDNode>(N->getOperand(Vec0Idx + NumVecs))->getZExtValue();
2102 EVT VT = N->getOperand(Vec0Idx).getValueType();
2103 bool is64BitVector = VT.is64BitVector();
2105 unsigned Alignment = 0;
2107 Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
2108 unsigned NumBytes = NumVecs * VT.getVectorElementType().getSizeInBits()/8;
2109 if (Alignment > NumBytes)
2110 Alignment = NumBytes;
2111 if (Alignment < 8 && Alignment < NumBytes)
2113 // Alignment must be a power of two; make sure of that.
2114 Alignment = (Alignment & -Alignment);
2118 Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
2120 unsigned OpcodeIndex;
2121 switch (VT.getSimpleVT().SimpleTy) {
2122 default: llvm_unreachable("unhandled vld/vst lane type");
2123 // Double-register operations:
2124 case MVT::v8i8: OpcodeIndex = 0; break;
2125 case MVT::v4i16: OpcodeIndex = 1; break;
2127 case MVT::v2i32: OpcodeIndex = 2; break;
2128 // Quad-register operations:
2129 case MVT::v8i16: OpcodeIndex = 0; break;
2131 case MVT::v4i32: OpcodeIndex = 1; break;
2134 std::vector<EVT> ResTys;
2136 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2139 ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(),
2140 MVT::i64, ResTyElts));
2143 ResTys.push_back(MVT::i32);
2144 ResTys.push_back(MVT::Other);
2146 SDValue Pred = getAL(CurDAG, dl);
2147 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2149 SmallVector<SDValue, 8> Ops;
2150 Ops.push_back(MemAddr);
2151 Ops.push_back(Align);
2153 SDValue Inc = N->getOperand(AddrOpIdx + 1);
2154 Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc);
2158 SDValue V0 = N->getOperand(Vec0Idx + 0);
2159 SDValue V1 = N->getOperand(Vec0Idx + 1);
2162 SuperReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
2164 SuperReg = SDValue(createQRegPairNode(MVT::v4i64, V0, V1), 0);
2166 SDValue V2 = N->getOperand(Vec0Idx + 2);
2167 SDValue V3 = (NumVecs == 3)
2168 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2169 : N->getOperand(Vec0Idx + 3);
2171 SuperReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
2173 SuperReg = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
2175 Ops.push_back(SuperReg);
2176 Ops.push_back(getI32Imm(Lane, dl));
2177 Ops.push_back(Pred);
2178 Ops.push_back(Reg0);
2179 Ops.push_back(Chain);
2181 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2182 QOpcodes[OpcodeIndex]);
2183 SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2184 cast<MachineSDNode>(VLdLn)->setMemRefs(MemOp, MemOp + 1);
2188 // Extract the subregisters.
2189 SuperReg = SDValue(VLdLn, 0);
2190 assert(ARM::dsub_7 == ARM::dsub_0+7 &&
2191 ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering");
2192 unsigned Sub0 = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
2193 for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
2194 ReplaceUses(SDValue(N, Vec),
2195 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
2196 ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, 1));
2198 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdLn, 2));
2202 SDNode *ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating,
2204 const uint16_t *Opcodes) {
2205 assert(NumVecs >=2 && NumVecs <= 4 && "VLDDup NumVecs out-of-range");
2208 SDValue MemAddr, Align;
2209 if (!SelectAddrMode6(N, N->getOperand(1), MemAddr, Align))
2212 MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
2213 MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2215 SDValue Chain = N->getOperand(0);
2216 EVT VT = N->getValueType(0);
2218 unsigned Alignment = 0;
2220 Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
2221 unsigned NumBytes = NumVecs * VT.getVectorElementType().getSizeInBits()/8;
2222 if (Alignment > NumBytes)
2223 Alignment = NumBytes;
2224 if (Alignment < 8 && Alignment < NumBytes)
2226 // Alignment must be a power of two; make sure of that.
2227 Alignment = (Alignment & -Alignment);
2231 Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
2233 unsigned OpcodeIndex;
2234 switch (VT.getSimpleVT().SimpleTy) {
2235 default: llvm_unreachable("unhandled vld-dup type");
2236 case MVT::v8i8: OpcodeIndex = 0; break;
2237 case MVT::v4i16: OpcodeIndex = 1; break;
2239 case MVT::v2i32: OpcodeIndex = 2; break;
2242 SDValue Pred = getAL(CurDAG, dl);
2243 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2245 unsigned Opc = Opcodes[OpcodeIndex];
2246 SmallVector<SDValue, 6> Ops;
2247 Ops.push_back(MemAddr);
2248 Ops.push_back(Align);
2250 // fixed-stride update instructions don't have an explicit writeback
2251 // operand. It's implicit in the opcode itself.
2252 SDValue Inc = N->getOperand(2);
2253 if (!isa<ConstantSDNode>(Inc.getNode()))
2255 // FIXME: VLD3 and VLD4 haven't been updated to that form yet.
2256 else if (NumVecs > 2)
2257 Ops.push_back(Reg0);
2259 Ops.push_back(Pred);
2260 Ops.push_back(Reg0);
2261 Ops.push_back(Chain);
2263 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2264 std::vector<EVT> ResTys;
2265 ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(), MVT::i64,ResTyElts));
2267 ResTys.push_back(MVT::i32);
2268 ResTys.push_back(MVT::Other);
2269 SDNode *VLdDup = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2270 cast<MachineSDNode>(VLdDup)->setMemRefs(MemOp, MemOp + 1);
2271 SuperReg = SDValue(VLdDup, 0);
2273 // Extract the subregisters.
2274 assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
2275 unsigned SubIdx = ARM::dsub_0;
2276 for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
2277 ReplaceUses(SDValue(N, Vec),
2278 CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, SuperReg));
2279 ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1));
2281 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdDup, 2));
2285 SDNode *ARMDAGToDAGISel::SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs,
2287 assert(NumVecs >= 2 && NumVecs <= 4 && "VTBL NumVecs out-of-range");
2289 EVT VT = N->getValueType(0);
2290 unsigned FirstTblReg = IsExt ? 2 : 1;
2292 // Form a REG_SEQUENCE to force register allocation.
2294 SDValue V0 = N->getOperand(FirstTblReg + 0);
2295 SDValue V1 = N->getOperand(FirstTblReg + 1);
2297 RegSeq = SDValue(createDRegPairNode(MVT::v16i8, V0, V1), 0);
2299 SDValue V2 = N->getOperand(FirstTblReg + 2);
2300 // If it's a vtbl3, form a quad D-register and leave the last part as
2302 SDValue V3 = (NumVecs == 3)
2303 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2304 : N->getOperand(FirstTblReg + 3);
2305 RegSeq = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
2308 SmallVector<SDValue, 6> Ops;
2310 Ops.push_back(N->getOperand(1));
2311 Ops.push_back(RegSeq);
2312 Ops.push_back(N->getOperand(FirstTblReg + NumVecs));
2313 Ops.push_back(getAL(CurDAG, dl)); // predicate
2314 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // predicate register
2315 return CurDAG->getMachineNode(Opc, dl, VT, Ops);
2318 SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDNode *N,
2320 if (!Subtarget->hasV6T2Ops())
2323 unsigned Opc = isSigned
2324 ? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX)
2325 : (Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX);
2328 // For unsigned extracts, check for a shift right and mask
2329 unsigned And_imm = 0;
2330 if (N->getOpcode() == ISD::AND) {
2331 if (isOpcWithIntImmediate(N, ISD::AND, And_imm)) {
2333 // The immediate is a mask of the low bits iff imm & (imm+1) == 0
2334 if (And_imm & (And_imm + 1))
2337 unsigned Srl_imm = 0;
2338 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL,
2340 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
2342 // Note: The width operand is encoded as width-1.
2343 unsigned Width = countTrailingOnes(And_imm) - 1;
2344 unsigned LSB = Srl_imm;
2346 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2348 if ((LSB + Width + 1) == N->getValueType(0).getSizeInBits()) {
2349 // It's cheaper to use a right shift to extract the top bits.
2350 if (Subtarget->isThumb()) {
2351 Opc = isSigned ? ARM::t2ASRri : ARM::t2LSRri;
2352 SDValue Ops[] = { N->getOperand(0).getOperand(0),
2353 CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2354 getAL(CurDAG, dl), Reg0, Reg0 };
2355 return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2358 // ARM models shift instructions as MOVsi with shifter operand.
2359 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(ISD::SRL);
2361 CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, LSB), dl,
2363 SDValue Ops[] = { N->getOperand(0).getOperand(0), ShOpc,
2364 getAL(CurDAG, dl), Reg0, Reg0 };
2365 return CurDAG->SelectNodeTo(N, ARM::MOVsi, MVT::i32, Ops);
2368 SDValue Ops[] = { N->getOperand(0).getOperand(0),
2369 CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2370 CurDAG->getTargetConstant(Width, dl, MVT::i32),
2371 getAL(CurDAG, dl), Reg0 };
2372 return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2378 // Otherwise, we're looking for a shift of a shift
2379 unsigned Shl_imm = 0;
2380 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) {
2381 assert(Shl_imm > 0 && Shl_imm < 32 && "bad amount in shift node!");
2382 unsigned Srl_imm = 0;
2383 if (isInt32Immediate(N->getOperand(1), Srl_imm)) {
2384 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
2385 // Note: The width operand is encoded as width-1.
2386 unsigned Width = 32 - Srl_imm - 1;
2387 int LSB = Srl_imm - Shl_imm;
2390 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2391 SDValue Ops[] = { N->getOperand(0).getOperand(0),
2392 CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2393 CurDAG->getTargetConstant(Width, dl, MVT::i32),
2394 getAL(CurDAG, dl), Reg0 };
2395 return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2399 if (N->getOpcode() == ISD::SIGN_EXTEND_INREG) {
2400 unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
2402 if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL, LSB) &&
2403 !isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRA, LSB))
2406 if (LSB + Width > 32)
2409 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2410 SDValue Ops[] = { N->getOperand(0).getOperand(0),
2411 CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2412 CurDAG->getTargetConstant(Width - 1, dl, MVT::i32),
2413 getAL(CurDAG, dl), Reg0 };
2414 return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2420 /// Target-specific DAG combining for ISD::XOR.
2421 /// Target-independent combining lowers SELECT_CC nodes of the form
2422 /// select_cc setg[ge] X, 0, X, -X
2423 /// select_cc setgt X, -1, X, -X
2424 /// select_cc setl[te] X, 0, -X, X
2425 /// select_cc setlt X, 1, -X, X
2426 /// which represent Integer ABS into:
2427 /// Y = sra (X, size(X)-1); xor (add (X, Y), Y)
2428 /// ARM instruction selection detects the latter and matches it to
2429 /// ARM::ABS or ARM::t2ABS machine node.
2430 SDNode *ARMDAGToDAGISel::SelectABSOp(SDNode *N){
2431 SDValue XORSrc0 = N->getOperand(0);
2432 SDValue XORSrc1 = N->getOperand(1);
2433 EVT VT = N->getValueType(0);
2435 if (Subtarget->isThumb1Only())
2438 if (XORSrc0.getOpcode() != ISD::ADD || XORSrc1.getOpcode() != ISD::SRA)
2441 SDValue ADDSrc0 = XORSrc0.getOperand(0);
2442 SDValue ADDSrc1 = XORSrc0.getOperand(1);
2443 SDValue SRASrc0 = XORSrc1.getOperand(0);
2444 SDValue SRASrc1 = XORSrc1.getOperand(1);
2445 ConstantSDNode *SRAConstant = dyn_cast<ConstantSDNode>(SRASrc1);
2446 EVT XType = SRASrc0.getValueType();
2447 unsigned Size = XType.getSizeInBits() - 1;
2449 if (ADDSrc1 == XORSrc1 && ADDSrc0 == SRASrc0 &&
2450 XType.isInteger() && SRAConstant != nullptr &&
2451 Size == SRAConstant->getZExtValue()) {
2452 unsigned Opcode = Subtarget->isThumb2() ? ARM::t2ABS : ARM::ABS;
2453 return CurDAG->SelectNodeTo(N, Opcode, VT, ADDSrc0);
2459 SDNode *ARMDAGToDAGISel::SelectConcatVector(SDNode *N) {
2460 // The only time a CONCAT_VECTORS operation can have legal types is when
2461 // two 64-bit vectors are concatenated to a 128-bit vector.
2462 EVT VT = N->getValueType(0);
2463 if (!VT.is128BitVector() || N->getNumOperands() != 2)
2464 llvm_unreachable("unexpected CONCAT_VECTORS");
2465 return createDRegPairNode(VT, N->getOperand(0), N->getOperand(1));
2468 SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
2471 if (N->isMachineOpcode()) {
2473 return nullptr; // Already selected.
2476 switch (N->getOpcode()) {
2478 case ISD::WRITE_REGISTER: {
2479 SDNode *ResNode = SelectWriteRegister(N);
2484 case ISD::READ_REGISTER: {
2485 SDNode *ResNode = SelectReadRegister(N);
2490 case ISD::INLINEASM: {
2491 SDNode *ResNode = SelectInlineAsm(N);
2497 // Select special operations if XOR node forms integer ABS pattern
2498 SDNode *ResNode = SelectABSOp(N);
2501 // Other cases are autogenerated.
2504 case ISD::Constant: {
2505 unsigned Val = cast<ConstantSDNode>(N)->getZExtValue();
2507 if (Subtarget->useMovt(*MF))
2508 // Thumb2-aware targets have the MOVT instruction, so all immediates can
2509 // be done with MOV + MOVT, at worst.
2512 if (Subtarget->isThumb()) {
2513 UseCP = (Val > 255 && // MOV
2514 ~Val > 255 && // MOV + MVN
2515 !ARM_AM::isThumbImmShiftedVal(Val) && // MOV + LSL
2516 !(Subtarget->hasV6T2Ops() && Val <= 0xffff)); // MOVW
2518 UseCP = (ARM_AM::getSOImmVal(Val) == -1 && // MOV
2519 ARM_AM::getSOImmVal(~Val) == -1 && // MVN
2520 !ARM_AM::isSOImmTwoPartVal(Val) && // two instrs.
2521 !(Subtarget->hasV6T2Ops() && Val <= 0xffff)); // MOVW
2525 SDValue CPIdx = CurDAG->getTargetConstantPool(
2526 ConstantInt::get(Type::getInt32Ty(*CurDAG->getContext()), Val),
2527 TLI->getPointerTy(CurDAG->getDataLayout()));
2530 if (Subtarget->isThumb()) {
2531 SDValue Pred = getAL(CurDAG, dl);
2532 SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
2533 SDValue Ops[] = { CPIdx, Pred, PredReg, CurDAG->getEntryNode() };
2534 ResNode = CurDAG->getMachineNode(ARM::tLDRpci, dl, MVT::i32, MVT::Other,
2539 CurDAG->getTargetConstant(0, dl, MVT::i32),
2541 CurDAG->getRegister(0, MVT::i32),
2542 CurDAG->getEntryNode()
2544 ResNode=CurDAG->getMachineNode(ARM::LDRcp, dl, MVT::i32, MVT::Other,
2547 ReplaceUses(SDValue(N, 0), SDValue(ResNode, 0));
2551 // Other cases are autogenerated.
2554 case ISD::FrameIndex: {
2555 // Selects to ADDri FI, 0 which in turn will become ADDri SP, imm.
2556 int FI = cast<FrameIndexSDNode>(N)->getIndex();
2557 SDValue TFI = CurDAG->getTargetFrameIndex(
2558 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
2559 if (Subtarget->isThumb1Only()) {
2560 // Set the alignment of the frame object to 4, to avoid having to generate
2561 // more than one ADD
2562 MachineFrameInfo *MFI = MF->getFrameInfo();
2563 if (MFI->getObjectAlignment(FI) < 4)
2564 MFI->setObjectAlignment(FI, 4);
2565 return CurDAG->SelectNodeTo(N, ARM::tADDframe, MVT::i32, TFI,
2566 CurDAG->getTargetConstant(0, dl, MVT::i32));
2568 unsigned Opc = ((Subtarget->isThumb() && Subtarget->hasThumb2()) ?
2569 ARM::t2ADDri : ARM::ADDri);
2570 SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, dl, MVT::i32),
2571 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
2572 CurDAG->getRegister(0, MVT::i32) };
2573 return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2577 if (SDNode *I = SelectV6T2BitfieldExtractOp(N, false))
2580 case ISD::SIGN_EXTEND_INREG:
2582 if (SDNode *I = SelectV6T2BitfieldExtractOp(N, true))
2586 if (Subtarget->isThumb1Only())
2588 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
2589 unsigned RHSV = C->getZExtValue();
2591 if (isPowerOf2_32(RHSV-1)) { // 2^n+1?
2592 unsigned ShImm = Log2_32(RHSV-1);
2595 SDValue V = N->getOperand(0);
2596 ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
2597 SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
2598 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2599 if (Subtarget->isThumb()) {
2600 SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
2601 return CurDAG->SelectNodeTo(N, ARM::t2ADDrs, MVT::i32, Ops);
2603 SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
2605 return CurDAG->SelectNodeTo(N, ARM::ADDrsi, MVT::i32, Ops);
2608 if (isPowerOf2_32(RHSV+1)) { // 2^n-1?
2609 unsigned ShImm = Log2_32(RHSV+1);
2612 SDValue V = N->getOperand(0);
2613 ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
2614 SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
2615 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2616 if (Subtarget->isThumb()) {
2617 SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
2618 return CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops);
2620 SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
2622 return CurDAG->SelectNodeTo(N, ARM::RSBrsi, MVT::i32, Ops);
2628 // Check for unsigned bitfield extract
2629 if (SDNode *I = SelectV6T2BitfieldExtractOp(N, false))
2632 // (and (or x, c2), c1) and top 16-bits of c1 and c2 match, lower 16-bits
2633 // of c1 are 0xffff, and lower 16-bit of c2 are 0. That is, the top 16-bits
2634 // are entirely contributed by c2 and lower 16-bits are entirely contributed
2635 // by x. That's equal to (or (and x, 0xffff), (and c1, 0xffff0000)).
2636 // Select it to: "movt x, ((c1 & 0xffff) >> 16)
2637 EVT VT = N->getValueType(0);
2640 unsigned Opc = (Subtarget->isThumb() && Subtarget->hasThumb2())
2642 : (Subtarget->hasV6T2Ops() ? ARM::MOVTi16 : 0);
2645 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
2646 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2649 if (N0.getOpcode() == ISD::OR && N0.getNode()->hasOneUse()) {
2650 SDValue N2 = N0.getOperand(1);
2651 ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
2654 unsigned N1CVal = N1C->getZExtValue();
2655 unsigned N2CVal = N2C->getZExtValue();
2656 if ((N1CVal & 0xffff0000U) == (N2CVal & 0xffff0000U) &&
2657 (N1CVal & 0xffffU) == 0xffffU &&
2658 (N2CVal & 0xffffU) == 0x0U) {
2659 SDValue Imm16 = CurDAG->getTargetConstant((N2CVal & 0xFFFF0000U) >> 16,
2661 SDValue Ops[] = { N0.getOperand(0), Imm16,
2662 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) };
2663 return CurDAG->getMachineNode(Opc, dl, VT, Ops);
2668 case ARMISD::VMOVRRD:
2669 return CurDAG->getMachineNode(ARM::VMOVRRD, dl, MVT::i32, MVT::i32,
2670 N->getOperand(0), getAL(CurDAG, dl),
2671 CurDAG->getRegister(0, MVT::i32));
2672 case ISD::UMUL_LOHI: {
2673 if (Subtarget->isThumb1Only())
2675 if (Subtarget->isThumb()) {
2676 SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
2677 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) };
2678 return CurDAG->getMachineNode(ARM::t2UMULL, dl, MVT::i32, MVT::i32, Ops);
2680 SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
2681 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
2682 CurDAG->getRegister(0, MVT::i32) };
2683 return CurDAG->getMachineNode(Subtarget->hasV6Ops() ?
2684 ARM::UMULL : ARM::UMULLv5,
2685 dl, MVT::i32, MVT::i32, Ops);
2688 case ISD::SMUL_LOHI: {
2689 if (Subtarget->isThumb1Only())
2691 if (Subtarget->isThumb()) {
2692 SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
2693 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) };
2694 return CurDAG->getMachineNode(ARM::t2SMULL, dl, MVT::i32, MVT::i32, Ops);
2696 SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
2697 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
2698 CurDAG->getRegister(0, MVT::i32) };
2699 return CurDAG->getMachineNode(Subtarget->hasV6Ops() ?
2700 ARM::SMULL : ARM::SMULLv5,
2701 dl, MVT::i32, MVT::i32, Ops);
2704 case ARMISD::UMLAL:{
2705 if (Subtarget->isThumb()) {
2706 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
2707 N->getOperand(3), getAL(CurDAG, dl),
2708 CurDAG->getRegister(0, MVT::i32)};
2709 return CurDAG->getMachineNode(ARM::t2UMLAL, dl, MVT::i32, MVT::i32, Ops);
2711 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
2712 N->getOperand(3), getAL(CurDAG, dl),
2713 CurDAG->getRegister(0, MVT::i32),
2714 CurDAG->getRegister(0, MVT::i32) };
2715 return CurDAG->getMachineNode(Subtarget->hasV6Ops() ?
2716 ARM::UMLAL : ARM::UMLALv5,
2717 dl, MVT::i32, MVT::i32, Ops);
2720 case ARMISD::SMLAL:{
2721 if (Subtarget->isThumb()) {
2722 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
2723 N->getOperand(3), getAL(CurDAG, dl),
2724 CurDAG->getRegister(0, MVT::i32)};
2725 return CurDAG->getMachineNode(ARM::t2SMLAL, dl, MVT::i32, MVT::i32, Ops);
2727 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
2728 N->getOperand(3), getAL(CurDAG, dl),
2729 CurDAG->getRegister(0, MVT::i32),
2730 CurDAG->getRegister(0, MVT::i32) };
2731 return CurDAG->getMachineNode(Subtarget->hasV6Ops() ?
2732 ARM::SMLAL : ARM::SMLALv5,
2733 dl, MVT::i32, MVT::i32, Ops);
2737 SDNode *ResNode = nullptr;
2738 if (Subtarget->isThumb() && Subtarget->hasThumb2())
2739 ResNode = SelectT2IndexedLoad(N);
2741 ResNode = SelectARMIndexedLoad(N);
2744 // Other cases are autogenerated.
2747 case ARMISD::BRCOND: {
2748 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
2749 // Emits: (Bcc:void (bb:Other):$dst, (imm:i32):$cc)
2750 // Pattern complexity = 6 cost = 1 size = 0
2752 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
2753 // Emits: (tBcc:void (bb:Other):$dst, (imm:i32):$cc)
2754 // Pattern complexity = 6 cost = 1 size = 0
2756 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
2757 // Emits: (t2Bcc:void (bb:Other):$dst, (imm:i32):$cc)
2758 // Pattern complexity = 6 cost = 1 size = 0
2760 unsigned Opc = Subtarget->isThumb() ?
2761 ((Subtarget->hasThumb2()) ? ARM::t2Bcc : ARM::tBcc) : ARM::Bcc;
2762 SDValue Chain = N->getOperand(0);
2763 SDValue N1 = N->getOperand(1);
2764 SDValue N2 = N->getOperand(2);
2765 SDValue N3 = N->getOperand(3);
2766 SDValue InFlag = N->getOperand(4);
2767 assert(N1.getOpcode() == ISD::BasicBlock);
2768 assert(N2.getOpcode() == ISD::Constant);
2769 assert(N3.getOpcode() == ISD::Register);
2771 SDValue Tmp2 = CurDAG->getTargetConstant(((unsigned)
2772 cast<ConstantSDNode>(N2)->getZExtValue()), dl,
2774 SDValue Ops[] = { N1, Tmp2, N3, Chain, InFlag };
2775 SDNode *ResNode = CurDAG->getMachineNode(Opc, dl, MVT::Other,
2777 Chain = SDValue(ResNode, 0);
2778 if (N->getNumValues() == 2) {
2779 InFlag = SDValue(ResNode, 1);
2780 ReplaceUses(SDValue(N, 1), InFlag);
2782 ReplaceUses(SDValue(N, 0),
2783 SDValue(Chain.getNode(), Chain.getResNo()));
2786 case ARMISD::VZIP: {
2788 EVT VT = N->getValueType(0);
2789 switch (VT.getSimpleVT().SimpleTy) {
2790 default: return nullptr;
2791 case MVT::v8i8: Opc = ARM::VZIPd8; break;
2792 case MVT::v4i16: Opc = ARM::VZIPd16; break;
2794 // vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
2795 case MVT::v2i32: Opc = ARM::VTRNd32; break;
2796 case MVT::v16i8: Opc = ARM::VZIPq8; break;
2797 case MVT::v8i16: Opc = ARM::VZIPq16; break;
2799 case MVT::v4i32: Opc = ARM::VZIPq32; break;
2801 SDValue Pred = getAL(CurDAG, dl);
2802 SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
2803 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
2804 return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops);
2806 case ARMISD::VUZP: {
2808 EVT VT = N->getValueType(0);
2809 switch (VT.getSimpleVT().SimpleTy) {
2810 default: return nullptr;
2811 case MVT::v8i8: Opc = ARM::VUZPd8; break;
2812 case MVT::v4i16: Opc = ARM::VUZPd16; break;
2814 // vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
2815 case MVT::v2i32: Opc = ARM::VTRNd32; break;
2816 case MVT::v16i8: Opc = ARM::VUZPq8; break;
2817 case MVT::v8i16: Opc = ARM::VUZPq16; break;
2819 case MVT::v4i32: Opc = ARM::VUZPq32; break;
2821 SDValue Pred = getAL(CurDAG, dl);
2822 SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
2823 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
2824 return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops);
2826 case ARMISD::VTRN: {
2828 EVT VT = N->getValueType(0);
2829 switch (VT.getSimpleVT().SimpleTy) {
2830 default: return nullptr;
2831 case MVT::v8i8: Opc = ARM::VTRNd8; break;
2832 case MVT::v4i16: Opc = ARM::VTRNd16; break;
2834 case MVT::v2i32: Opc = ARM::VTRNd32; break;
2835 case MVT::v16i8: Opc = ARM::VTRNq8; break;
2836 case MVT::v8i16: Opc = ARM::VTRNq16; break;
2838 case MVT::v4i32: Opc = ARM::VTRNq32; break;
2840 SDValue Pred = getAL(CurDAG, dl);
2841 SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
2842 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
2843 return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops);
2845 case ARMISD::BUILD_VECTOR: {
2846 EVT VecVT = N->getValueType(0);
2847 EVT EltVT = VecVT.getVectorElementType();
2848 unsigned NumElts = VecVT.getVectorNumElements();
2849 if (EltVT == MVT::f64) {
2850 assert(NumElts == 2 && "unexpected type for BUILD_VECTOR");
2851 return createDRegPairNode(VecVT, N->getOperand(0), N->getOperand(1));
2853 assert(EltVT == MVT::f32 && "unexpected type for BUILD_VECTOR");
2855 return createSRegPairNode(VecVT, N->getOperand(0), N->getOperand(1));
2856 assert(NumElts == 4 && "unexpected type for BUILD_VECTOR");
2857 return createQuadSRegsNode(VecVT, N->getOperand(0), N->getOperand(1),
2858 N->getOperand(2), N->getOperand(3));
2861 case ARMISD::VLD2DUP: {
2862 static const uint16_t Opcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
2864 return SelectVLDDup(N, false, 2, Opcodes);
2867 case ARMISD::VLD3DUP: {
2868 static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo,
2869 ARM::VLD3DUPd16Pseudo,
2870 ARM::VLD3DUPd32Pseudo };
2871 return SelectVLDDup(N, false, 3, Opcodes);
2874 case ARMISD::VLD4DUP: {
2875 static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo,
2876 ARM::VLD4DUPd16Pseudo,
2877 ARM::VLD4DUPd32Pseudo };
2878 return SelectVLDDup(N, false, 4, Opcodes);
2881 case ARMISD::VLD2DUP_UPD: {
2882 static const uint16_t Opcodes[] = { ARM::VLD2DUPd8wb_fixed,
2883 ARM::VLD2DUPd16wb_fixed,
2884 ARM::VLD2DUPd32wb_fixed };
2885 return SelectVLDDup(N, true, 2, Opcodes);
2888 case ARMISD::VLD3DUP_UPD: {
2889 static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo_UPD,
2890 ARM::VLD3DUPd16Pseudo_UPD,
2891 ARM::VLD3DUPd32Pseudo_UPD };
2892 return SelectVLDDup(N, true, 3, Opcodes);
2895 case ARMISD::VLD4DUP_UPD: {
2896 static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo_UPD,
2897 ARM::VLD4DUPd16Pseudo_UPD,
2898 ARM::VLD4DUPd32Pseudo_UPD };
2899 return SelectVLDDup(N, true, 4, Opcodes);
2902 case ARMISD::VLD1_UPD: {
2903 static const uint16_t DOpcodes[] = { ARM::VLD1d8wb_fixed,
2904 ARM::VLD1d16wb_fixed,
2905 ARM::VLD1d32wb_fixed,
2906 ARM::VLD1d64wb_fixed };
2907 static const uint16_t QOpcodes[] = { ARM::VLD1q8wb_fixed,
2908 ARM::VLD1q16wb_fixed,
2909 ARM::VLD1q32wb_fixed,
2910 ARM::VLD1q64wb_fixed };
2911 return SelectVLD(N, true, 1, DOpcodes, QOpcodes, nullptr);
2914 case ARMISD::VLD2_UPD: {
2915 static const uint16_t DOpcodes[] = { ARM::VLD2d8wb_fixed,
2916 ARM::VLD2d16wb_fixed,
2917 ARM::VLD2d32wb_fixed,
2918 ARM::VLD1q64wb_fixed};
2919 static const uint16_t QOpcodes[] = { ARM::VLD2q8PseudoWB_fixed,
2920 ARM::VLD2q16PseudoWB_fixed,
2921 ARM::VLD2q32PseudoWB_fixed };
2922 return SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr);
2925 case ARMISD::VLD3_UPD: {
2926 static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo_UPD,
2927 ARM::VLD3d16Pseudo_UPD,
2928 ARM::VLD3d32Pseudo_UPD,
2929 ARM::VLD1d64TPseudoWB_fixed};
2930 static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
2931 ARM::VLD3q16Pseudo_UPD,
2932 ARM::VLD3q32Pseudo_UPD };
2933 static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD,
2934 ARM::VLD3q16oddPseudo_UPD,
2935 ARM::VLD3q32oddPseudo_UPD };
2936 return SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
2939 case ARMISD::VLD4_UPD: {
2940 static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo_UPD,
2941 ARM::VLD4d16Pseudo_UPD,
2942 ARM::VLD4d32Pseudo_UPD,
2943 ARM::VLD1d64QPseudoWB_fixed};
2944 static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
2945 ARM::VLD4q16Pseudo_UPD,
2946 ARM::VLD4q32Pseudo_UPD };
2947 static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo_UPD,
2948 ARM::VLD4q16oddPseudo_UPD,
2949 ARM::VLD4q32oddPseudo_UPD };
2950 return SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
2953 case ARMISD::VLD2LN_UPD: {
2954 static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo_UPD,
2955 ARM::VLD2LNd16Pseudo_UPD,
2956 ARM::VLD2LNd32Pseudo_UPD };
2957 static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo_UPD,
2958 ARM::VLD2LNq32Pseudo_UPD };
2959 return SelectVLDSTLane(N, true, true, 2, DOpcodes, QOpcodes);
2962 case ARMISD::VLD3LN_UPD: {
2963 static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo_UPD,
2964 ARM::VLD3LNd16Pseudo_UPD,
2965 ARM::VLD3LNd32Pseudo_UPD };
2966 static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo_UPD,
2967 ARM::VLD3LNq32Pseudo_UPD };
2968 return SelectVLDSTLane(N, true, true, 3, DOpcodes, QOpcodes);
2971 case ARMISD::VLD4LN_UPD: {
2972 static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo_UPD,
2973 ARM::VLD4LNd16Pseudo_UPD,
2974 ARM::VLD4LNd32Pseudo_UPD };
2975 static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo_UPD,
2976 ARM::VLD4LNq32Pseudo_UPD };
2977 return SelectVLDSTLane(N, true, true, 4, DOpcodes, QOpcodes);
2980 case ARMISD::VST1_UPD: {
2981 static const uint16_t DOpcodes[] = { ARM::VST1d8wb_fixed,
2982 ARM::VST1d16wb_fixed,
2983 ARM::VST1d32wb_fixed,
2984 ARM::VST1d64wb_fixed };
2985 static const uint16_t QOpcodes[] = { ARM::VST1q8wb_fixed,
2986 ARM::VST1q16wb_fixed,
2987 ARM::VST1q32wb_fixed,
2988 ARM::VST1q64wb_fixed };
2989 return SelectVST(N, true, 1, DOpcodes, QOpcodes, nullptr);
2992 case ARMISD::VST2_UPD: {
2993 static const uint16_t DOpcodes[] = { ARM::VST2d8wb_fixed,
2994 ARM::VST2d16wb_fixed,
2995 ARM::VST2d32wb_fixed,
2996 ARM::VST1q64wb_fixed};
2997 static const uint16_t QOpcodes[] = { ARM::VST2q8PseudoWB_fixed,
2998 ARM::VST2q16PseudoWB_fixed,
2999 ARM::VST2q32PseudoWB_fixed };
3000 return SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr);
3003 case ARMISD::VST3_UPD: {
3004 static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo_UPD,
3005 ARM::VST3d16Pseudo_UPD,
3006 ARM::VST3d32Pseudo_UPD,
3007 ARM::VST1d64TPseudoWB_fixed};
3008 static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
3009 ARM::VST3q16Pseudo_UPD,
3010 ARM::VST3q32Pseudo_UPD };
3011 static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD,
3012 ARM::VST3q16oddPseudo_UPD,
3013 ARM::VST3q32oddPseudo_UPD };
3014 return SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
3017 case ARMISD::VST4_UPD: {
3018 static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo_UPD,
3019 ARM::VST4d16Pseudo_UPD,
3020 ARM::VST4d32Pseudo_UPD,
3021 ARM::VST1d64QPseudoWB_fixed};
3022 static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
3023 ARM::VST4q16Pseudo_UPD,
3024 ARM::VST4q32Pseudo_UPD };
3025 static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo_UPD,
3026 ARM::VST4q16oddPseudo_UPD,
3027 ARM::VST4q32oddPseudo_UPD };
3028 return SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
3031 case ARMISD::VST2LN_UPD: {
3032 static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo_UPD,
3033 ARM::VST2LNd16Pseudo_UPD,
3034 ARM::VST2LNd32Pseudo_UPD };
3035 static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo_UPD,
3036 ARM::VST2LNq32Pseudo_UPD };
3037 return SelectVLDSTLane(N, false, true, 2, DOpcodes, QOpcodes);
3040 case ARMISD::VST3LN_UPD: {
3041 static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo_UPD,
3042 ARM::VST3LNd16Pseudo_UPD,
3043 ARM::VST3LNd32Pseudo_UPD };
3044 static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo_UPD,
3045 ARM::VST3LNq32Pseudo_UPD };
3046 return SelectVLDSTLane(N, false, true, 3, DOpcodes, QOpcodes);
3049 case ARMISD::VST4LN_UPD: {
3050 static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo_UPD,
3051 ARM::VST4LNd16Pseudo_UPD,
3052 ARM::VST4LNd32Pseudo_UPD };
3053 static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo_UPD,
3054 ARM::VST4LNq32Pseudo_UPD };
3055 return SelectVLDSTLane(N, false, true, 4, DOpcodes, QOpcodes);
3058 case ISD::INTRINSIC_VOID:
3059 case ISD::INTRINSIC_W_CHAIN: {
3060 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
3065 case Intrinsic::arm_ldaexd:
3066 case Intrinsic::arm_ldrexd: {
3068 SDValue Chain = N->getOperand(0);
3069 SDValue MemAddr = N->getOperand(2);
3070 bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2();
3072 bool IsAcquire = IntNo == Intrinsic::arm_ldaexd;
3073 unsigned NewOpc = isThumb ? (IsAcquire ? ARM::t2LDAEXD : ARM::t2LDREXD)
3074 : (IsAcquire ? ARM::LDAEXD : ARM::LDREXD);
3076 // arm_ldrexd returns a i64 value in {i32, i32}
3077 std::vector<EVT> ResTys;
3079 ResTys.push_back(MVT::i32);
3080 ResTys.push_back(MVT::i32);
3082 ResTys.push_back(MVT::Untyped);
3083 ResTys.push_back(MVT::Other);
3085 // Place arguments in the right order.
3086 SmallVector<SDValue, 7> Ops;
3087 Ops.push_back(MemAddr);
3088 Ops.push_back(getAL(CurDAG, dl));
3089 Ops.push_back(CurDAG->getRegister(0, MVT::i32));
3090 Ops.push_back(Chain);
3091 SDNode *Ld = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
3092 // Transfer memoperands.
3093 MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
3094 MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
3095 cast<MachineSDNode>(Ld)->setMemRefs(MemOp, MemOp + 1);
3098 SDValue OutChain = isThumb ? SDValue(Ld, 2) : SDValue(Ld, 1);
3099 if (!SDValue(N, 0).use_empty()) {
3102 Result = SDValue(Ld, 0);
3105 CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);
3106 SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
3107 dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
3108 Result = SDValue(ResNode,0);
3110 ReplaceUses(SDValue(N, 0), Result);
3112 if (!SDValue(N, 1).use_empty()) {
3115 Result = SDValue(Ld, 1);
3118 CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32);
3119 SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
3120 dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
3121 Result = SDValue(ResNode,0);
3123 ReplaceUses(SDValue(N, 1), Result);
3125 ReplaceUses(SDValue(N, 2), OutChain);
3128 case Intrinsic::arm_stlexd:
3129 case Intrinsic::arm_strexd: {
3131 SDValue Chain = N->getOperand(0);
3132 SDValue Val0 = N->getOperand(2);
3133 SDValue Val1 = N->getOperand(3);
3134 SDValue MemAddr = N->getOperand(4);
3136 // Store exclusive double return a i32 value which is the return status
3137 // of the issued store.
3138 const EVT ResTys[] = {MVT::i32, MVT::Other};
3140 bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2();
3141 // Place arguments in the right order.
3142 SmallVector<SDValue, 7> Ops;
3144 Ops.push_back(Val0);
3145 Ops.push_back(Val1);
3147 // arm_strexd uses GPRPair.
3148 Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, Val0, Val1), 0));
3149 Ops.push_back(MemAddr);
3150 Ops.push_back(getAL(CurDAG, dl));
3151 Ops.push_back(CurDAG->getRegister(0, MVT::i32));
3152 Ops.push_back(Chain);
3154 bool IsRelease = IntNo == Intrinsic::arm_stlexd;
3155 unsigned NewOpc = isThumb ? (IsRelease ? ARM::t2STLEXD : ARM::t2STREXD)
3156 : (IsRelease ? ARM::STLEXD : ARM::STREXD);
3158 SDNode *St = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
3159 // Transfer memoperands.
3160 MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
3161 MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
3162 cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1);
3167 case Intrinsic::arm_neon_vld1: {
3168 static const uint16_t DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16,
3169 ARM::VLD1d32, ARM::VLD1d64 };
3170 static const uint16_t QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
3171 ARM::VLD1q32, ARM::VLD1q64};
3172 return SelectVLD(N, false, 1, DOpcodes, QOpcodes, nullptr);
3175 case Intrinsic::arm_neon_vld2: {
3176 static const uint16_t DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16,
3177 ARM::VLD2d32, ARM::VLD1q64 };
3178 static const uint16_t QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo,
3179 ARM::VLD2q32Pseudo };
3180 return SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr);
3183 case Intrinsic::arm_neon_vld3: {
3184 static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo,
3187 ARM::VLD1d64TPseudo };
3188 static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
3189 ARM::VLD3q16Pseudo_UPD,
3190 ARM::VLD3q32Pseudo_UPD };
3191 static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo,
3192 ARM::VLD3q16oddPseudo,
3193 ARM::VLD3q32oddPseudo };
3194 return SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
3197 case Intrinsic::arm_neon_vld4: {
3198 static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo,
3201 ARM::VLD1d64QPseudo };
3202 static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
3203 ARM::VLD4q16Pseudo_UPD,
3204 ARM::VLD4q32Pseudo_UPD };
3205 static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo,
3206 ARM::VLD4q16oddPseudo,
3207 ARM::VLD4q32oddPseudo };
3208 return SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
3211 case Intrinsic::arm_neon_vld2lane: {
3212 static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo,
3213 ARM::VLD2LNd16Pseudo,
3214 ARM::VLD2LNd32Pseudo };
3215 static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo,
3216 ARM::VLD2LNq32Pseudo };
3217 return SelectVLDSTLane(N, true, false, 2, DOpcodes, QOpcodes);
3220 case Intrinsic::arm_neon_vld3lane: {
3221 static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo,
3222 ARM::VLD3LNd16Pseudo,
3223 ARM::VLD3LNd32Pseudo };
3224 static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo,
3225 ARM::VLD3LNq32Pseudo };
3226 return SelectVLDSTLane(N, true, false, 3, DOpcodes, QOpcodes);
3229 case Intrinsic::arm_neon_vld4lane: {
3230 static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo,
3231 ARM::VLD4LNd16Pseudo,
3232 ARM::VLD4LNd32Pseudo };
3233 static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo,
3234 ARM::VLD4LNq32Pseudo };
3235 return SelectVLDSTLane(N, true, false, 4, DOpcodes, QOpcodes);
3238 case Intrinsic::arm_neon_vst1: {
3239 static const uint16_t DOpcodes[] = { ARM::VST1d8, ARM::VST1d16,
3240 ARM::VST1d32, ARM::VST1d64 };
3241 static const uint16_t QOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
3242 ARM::VST1q32, ARM::VST1q64 };
3243 return SelectVST(N, false, 1, DOpcodes, QOpcodes, nullptr);
3246 case Intrinsic::arm_neon_vst2: {
3247 static const uint16_t DOpcodes[] = { ARM::VST2d8, ARM::VST2d16,
3248 ARM::VST2d32, ARM::VST1q64 };
3249 static uint16_t QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo,
3250 ARM::VST2q32Pseudo };
3251 return SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr);
3254 case Intrinsic::arm_neon_vst3: {
3255 static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo,
3258 ARM::VST1d64TPseudo };
3259 static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
3260 ARM::VST3q16Pseudo_UPD,
3261 ARM::VST3q32Pseudo_UPD };
3262 static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo,
3263 ARM::VST3q16oddPseudo,
3264 ARM::VST3q32oddPseudo };
3265 return SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
3268 case Intrinsic::arm_neon_vst4: {
3269 static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo,
3272 ARM::VST1d64QPseudo };
3273 static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
3274 ARM::VST4q16Pseudo_UPD,
3275 ARM::VST4q32Pseudo_UPD };
3276 static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo,
3277 ARM::VST4q16oddPseudo,
3278 ARM::VST4q32oddPseudo };
3279 return SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
3282 case Intrinsic::arm_neon_vst2lane: {
3283 static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo,
3284 ARM::VST2LNd16Pseudo,
3285 ARM::VST2LNd32Pseudo };
3286 static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo,
3287 ARM::VST2LNq32Pseudo };
3288 return SelectVLDSTLane(N, false, false, 2, DOpcodes, QOpcodes);
3291 case Intrinsic::arm_neon_vst3lane: {
3292 static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo,
3293 ARM::VST3LNd16Pseudo,
3294 ARM::VST3LNd32Pseudo };
3295 static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo,
3296 ARM::VST3LNq32Pseudo };
3297 return SelectVLDSTLane(N, false, false, 3, DOpcodes, QOpcodes);
3300 case Intrinsic::arm_neon_vst4lane: {
3301 static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo,
3302 ARM::VST4LNd16Pseudo,
3303 ARM::VST4LNd32Pseudo };
3304 static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo,
3305 ARM::VST4LNq32Pseudo };
3306 return SelectVLDSTLane(N, false, false, 4, DOpcodes, QOpcodes);
3312 case ISD::INTRINSIC_WO_CHAIN: {
3313 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
3318 case Intrinsic::arm_neon_vtbl2:
3319 return SelectVTBL(N, false, 2, ARM::VTBL2);
3320 case Intrinsic::arm_neon_vtbl3:
3321 return SelectVTBL(N, false, 3, ARM::VTBL3Pseudo);
3322 case Intrinsic::arm_neon_vtbl4:
3323 return SelectVTBL(N, false, 4, ARM::VTBL4Pseudo);
3325 case Intrinsic::arm_neon_vtbx2:
3326 return SelectVTBL(N, true, 2, ARM::VTBX2);
3327 case Intrinsic::arm_neon_vtbx3:
3328 return SelectVTBL(N, true, 3, ARM::VTBX3Pseudo);
3329 case Intrinsic::arm_neon_vtbx4:
3330 return SelectVTBL(N, true, 4, ARM::VTBX4Pseudo);
3335 case ARMISD::VTBL1: {
3337 EVT VT = N->getValueType(0);
3338 SmallVector<SDValue, 6> Ops;
3340 Ops.push_back(N->getOperand(0));
3341 Ops.push_back(N->getOperand(1));
3342 Ops.push_back(getAL(CurDAG, dl)); // Predicate
3343 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // Predicate Register
3344 return CurDAG->getMachineNode(ARM::VTBL1, dl, VT, Ops);
3346 case ARMISD::VTBL2: {
3348 EVT VT = N->getValueType(0);
3350 // Form a REG_SEQUENCE to force register allocation.
3351 SDValue V0 = N->getOperand(0);
3352 SDValue V1 = N->getOperand(1);
3353 SDValue RegSeq = SDValue(createDRegPairNode(MVT::v16i8, V0, V1), 0);
3355 SmallVector<SDValue, 6> Ops;
3356 Ops.push_back(RegSeq);
3357 Ops.push_back(N->getOperand(2));
3358 Ops.push_back(getAL(CurDAG, dl)); // Predicate
3359 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // Predicate Register
3360 return CurDAG->getMachineNode(ARM::VTBL2, dl, VT, Ops);
3363 case ISD::CONCAT_VECTORS:
3364 return SelectConcatVector(N);
3367 return SelectCode(N);
3370 // Inspect a register string of the form
3371 // cp<coprocessor>:<opc1>:c<CRn>:c<CRm>:<opc2> (32bit) or
3372 // cp<coprocessor>:<opc1>:c<CRm> (64bit) inspect the fields of the string
3373 // and obtain the integer operands from them, adding these operands to the
3375 static void getIntOperandsFromRegisterString(StringRef RegString,
3376 SelectionDAG *CurDAG, SDLoc DL,
3377 std::vector<SDValue>& Ops) {
3378 SmallVector<StringRef, 5> Fields;
3379 RegString.split(Fields, ":");
3381 if (Fields.size() > 1) {
3382 bool AllIntFields = true;
3384 for (StringRef Field : Fields) {
3385 // Need to trim out leading 'cp' characters and get the integer field.
3387 AllIntFields &= !Field.trim("CPcp").getAsInteger(10, IntField);
3388 Ops.push_back(CurDAG->getTargetConstant(IntField, DL, MVT::i32));
3391 assert(AllIntFields &&
3392 "Unexpected non-integer value in special register string.");
3396 // Maps a Banked Register string to its mask value. The mask value returned is
3397 // for use in the MRSbanked / MSRbanked instruction nodes as the Banked Register
3398 // mask operand, which expresses which register is to be used, e.g. r8, and in
3399 // which mode it is to be used, e.g. usr. Returns -1 to signify that the string
3401 static inline int getBankedRegisterMask(StringRef RegString) {
3402 return StringSwitch<int>(RegString.lower())
3403 .Case("r8_usr", 0x00)
3404 .Case("r9_usr", 0x01)
3405 .Case("r10_usr", 0x02)
3406 .Case("r11_usr", 0x03)
3407 .Case("r12_usr", 0x04)
3408 .Case("sp_usr", 0x05)
3409 .Case("lr_usr", 0x06)
3410 .Case("r8_fiq", 0x08)
3411 .Case("r9_fiq", 0x09)
3412 .Case("r10_fiq", 0x0a)
3413 .Case("r11_fiq", 0x0b)
3414 .Case("r12_fiq", 0x0c)
3415 .Case("sp_fiq", 0x0d)
3416 .Case("lr_fiq", 0x0e)
3417 .Case("lr_irq", 0x10)
3418 .Case("sp_irq", 0x11)
3419 .Case("lr_svc", 0x12)
3420 .Case("sp_svc", 0x13)
3421 .Case("lr_abt", 0x14)
3422 .Case("sp_abt", 0x15)
3423 .Case("lr_und", 0x16)
3424 .Case("sp_und", 0x17)
3425 .Case("lr_mon", 0x1c)
3426 .Case("sp_mon", 0x1d)
3427 .Case("elr_hyp", 0x1e)
3428 .Case("sp_hyp", 0x1f)
3429 .Case("spsr_fiq", 0x2e)
3430 .Case("spsr_irq", 0x30)
3431 .Case("spsr_svc", 0x32)
3432 .Case("spsr_abt", 0x34)
3433 .Case("spsr_und", 0x36)
3434 .Case("spsr_mon", 0x3c)
3435 .Case("spsr_hyp", 0x3e)
3439 // Maps a MClass special register string to its value for use in the
3440 // t2MRS_M / t2MSR_M instruction nodes as the SYSm value operand.
3441 // Returns -1 to signify that the string was invalid.
3442 static inline int getMClassRegisterSYSmValueMask(StringRef RegString) {
3443 return StringSwitch<int>(RegString.lower())
3453 .Case("primask", 0x10)
3454 .Case("basepri", 0x11)
3455 .Case("basepri_max", 0x12)
3456 .Case("faultmask", 0x13)
3457 .Case("control", 0x14)
3461 // The flags here are common to those allowed for apsr in the A class cores and
3462 // those allowed for the special registers in the M class cores. Returns a
3463 // value representing which flags were present, -1 if invalid.
3464 static inline int getMClassFlagsMask(StringRef Flags) {
3468 return StringSwitch<int>(Flags)
3471 .Case("nzcvqg", 0x3)
3475 static int getMClassRegisterMask(StringRef Reg, StringRef Flags, bool IsRead,
3476 const ARMSubtarget *Subtarget) {
3477 // Ensure that the register (without flags) was a valid M Class special
3479 int SYSmvalue = getMClassRegisterSYSmValueMask(Reg);
3480 if (SYSmvalue == -1)
3483 // basepri, basepri_max and faultmask are only valid for V7m.
3484 if (!Subtarget->hasV7Ops() && SYSmvalue >= 0x11 && SYSmvalue <= 0x13)
3487 // If it was a read then we won't be expecting flags and so at this point
3488 // we can return the mask.
3490 assert (Flags.empty() && "Unexpected flags for reading M class register.");
3494 // We know we are now handling a write so need to get the mask for the flags.
3495 int Mask = getMClassFlagsMask(Flags);
3497 // Only apsr, iapsr, eapsr, xpsr can have flags. The other register values
3498 // shouldn't have flags present.
3499 if ((SYSmvalue < 0x4 && Mask == -1) || (SYSmvalue > 0x4 && !Flags.empty()))
3502 // The _g and _nzcvqg versions are only valid if the DSP extension is
3504 if (!Subtarget->hasThumb2DSP() && (Mask & 0x2))
3507 // The register was valid so need to put the mask in the correct place
3508 // (the flags need to be in bits 11-10) and combine with the SYSmvalue to
3509 // construct the operand for the instruction node.
3510 if (SYSmvalue < 0x4)
3511 return SYSmvalue | Mask << 10;
3516 static int getARClassRegisterMask(StringRef Reg, StringRef Flags) {
3517 // The mask operand contains the special register (R Bit) in bit 4, whether
3518 // the register is spsr (R bit is 1) or one of cpsr/apsr (R bit is 0), and
3519 // bits 3-0 contains the fields to be accessed in the special register, set by
3520 // the flags provided with the register.
3522 if (Reg == "apsr") {
3523 // The flags permitted for apsr are the same flags that are allowed in
3524 // M class registers. We get the flag value and then shift the flags into
3525 // the correct place to combine with the mask.
3526 Mask = getMClassFlagsMask(Flags);
3532 if (Reg != "cpsr" && Reg != "spsr") {
3536 // This is the same as if the flags were "fc"
3537 if (Flags.empty() || Flags == "all")
3540 // Inspect the supplied flags string and set the bits in the mask for
3541 // the relevant and valid flags allowed for cpsr and spsr.
3542 for (char Flag : Flags) {
3561 // This avoids allowing strings where the same flag bit appears twice.
3562 if (!FlagVal || (Mask & FlagVal))
3567 // If the register is spsr then we need to set the R bit.
3574 // Lower the read_register intrinsic to ARM specific DAG nodes
3575 // using the supplied metadata string to select the instruction node to use
3576 // and the registers/masks to construct as operands for the node.
3577 SDNode *ARMDAGToDAGISel::SelectReadRegister(SDNode *N){
3578 const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
3579 const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
3580 bool IsThumb2 = Subtarget->isThumb2();
3583 std::vector<SDValue> Ops;
3584 getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops);
3587 // If the special register string was constructed of fields (as defined
3588 // in the ACLE) then need to lower to MRC node (32 bit) or
3589 // MRRC node(64 bit), we can make the distinction based on the number of
3590 // operands we have.
3592 SmallVector<EVT, 3> ResTypes;
3593 if (Ops.size() == 5){
3594 Opcode = IsThumb2 ? ARM::t2MRC : ARM::MRC;
3595 ResTypes.append({ MVT::i32, MVT::Other });
3597 assert(Ops.size() == 3 &&
3598 "Invalid number of fields in special register string.");
3599 Opcode = IsThumb2 ? ARM::t2MRRC : ARM::MRRC;
3600 ResTypes.append({ MVT::i32, MVT::i32, MVT::Other });
3603 Ops.push_back(getAL(CurDAG, DL));
3604 Ops.push_back(CurDAG->getRegister(0, MVT::i32));
3605 Ops.push_back(N->getOperand(0));
3606 return CurDAG->getMachineNode(Opcode, DL, ResTypes, Ops);
3609 std::string SpecialReg = RegString->getString().lower();
3611 int BankedReg = getBankedRegisterMask(SpecialReg);
3612 if (BankedReg != -1) {
3613 Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32),
3614 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
3616 return CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSbanked : ARM::MRSbanked,
3617 DL, MVT::i32, MVT::Other, Ops);
3620 // The VFP registers are read by creating SelectionDAG nodes with opcodes
3621 // corresponding to the register that is being read from. So we switch on the
3622 // string to find which opcode we need to use.
3623 unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
3624 .Case("fpscr", ARM::VMRS)
3625 .Case("fpexc", ARM::VMRS_FPEXC)
3626 .Case("fpsid", ARM::VMRS_FPSID)
3627 .Case("mvfr0", ARM::VMRS_MVFR0)
3628 .Case("mvfr1", ARM::VMRS_MVFR1)
3629 .Case("mvfr2", ARM::VMRS_MVFR2)
3630 .Case("fpinst", ARM::VMRS_FPINST)
3631 .Case("fpinst2", ARM::VMRS_FPINST2)
3634 // If an opcode was found then we can lower the read to a VFP instruction.
3636 if (!Subtarget->hasVFP2())
3638 if (Opcode == ARM::VMRS_MVFR2 && !Subtarget->hasFPARMv8())
3641 Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
3643 return CurDAG->getMachineNode(Opcode, DL, MVT::i32, MVT::Other, Ops);
3646 // If the target is M Class then need to validate that the register string
3647 // is an acceptable value, so check that a mask can be constructed from the
3649 if (Subtarget->isMClass()) {
3650 int SYSmValue = getMClassRegisterMask(SpecialReg, "", true, Subtarget);
3651 if (SYSmValue == -1)
3654 SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32),
3655 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
3657 return CurDAG->getMachineNode(ARM::t2MRS_M, DL, MVT::i32, MVT::Other, Ops);
3660 // Here we know the target is not M Class so we need to check if it is one
3661 // of the remaining possible values which are apsr, cpsr or spsr.
3662 if (SpecialReg == "apsr" || SpecialReg == "cpsr") {
3663 Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
3665 return CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRS_AR : ARM::MRS, DL,
3666 MVT::i32, MVT::Other, Ops);
3669 if (SpecialReg == "spsr") {
3670 Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
3672 return CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSsys_AR : ARM::MRSsys,
3673 DL, MVT::i32, MVT::Other, Ops);
3679 // Lower the write_register intrinsic to ARM specific DAG nodes
3680 // using the supplied metadata string to select the instruction node to use
3681 // and the registers/masks to use in the nodes
3682 SDNode *ARMDAGToDAGISel::SelectWriteRegister(SDNode *N){
3683 const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
3684 const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
3685 bool IsThumb2 = Subtarget->isThumb2();
3688 std::vector<SDValue> Ops;
3689 getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops);
3692 // If the special register string was constructed of fields (as defined
3693 // in the ACLE) then need to lower to MCR node (32 bit) or
3694 // MCRR node(64 bit), we can make the distinction based on the number of
3695 // operands we have.
3697 if (Ops.size() == 5) {
3698 Opcode = IsThumb2 ? ARM::t2MCR : ARM::MCR;
3699 Ops.insert(Ops.begin()+2, N->getOperand(2));
3701 assert(Ops.size() == 3 &&
3702 "Invalid number of fields in special register string.");
3703 Opcode = IsThumb2 ? ARM::t2MCRR : ARM::MCRR;
3704 SDValue WriteValue[] = { N->getOperand(2), N->getOperand(3) };
3705 Ops.insert(Ops.begin()+2, WriteValue, WriteValue+2);
3708 Ops.push_back(getAL(CurDAG, DL));
3709 Ops.push_back(CurDAG->getRegister(0, MVT::i32));
3710 Ops.push_back(N->getOperand(0));
3712 return CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops);
3715 std::string SpecialReg = RegString->getString().lower();
3716 int BankedReg = getBankedRegisterMask(SpecialReg);
3717 if (BankedReg != -1) {
3718 Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32), N->getOperand(2),
3719 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
3721 return CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSRbanked : ARM::MSRbanked,
3722 DL, MVT::Other, Ops);
3725 // The VFP registers are written to by creating SelectionDAG nodes with
3726 // opcodes corresponding to the register that is being written. So we switch
3727 // on the string to find which opcode we need to use.
3728 unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
3729 .Case("fpscr", ARM::VMSR)
3730 .Case("fpexc", ARM::VMSR_FPEXC)
3731 .Case("fpsid", ARM::VMSR_FPSID)
3732 .Case("fpinst", ARM::VMSR_FPINST)
3733 .Case("fpinst2", ARM::VMSR_FPINST2)
3737 if (!Subtarget->hasVFP2())
3739 Ops = { N->getOperand(2), getAL(CurDAG, DL),
3740 CurDAG->getRegister(0, MVT::i32), N->getOperand(0) };
3741 return CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops);
3744 SmallVector<StringRef, 5> Fields;
3745 StringRef(SpecialReg).split(Fields, "_", 1, false);
3746 std::string Reg = Fields[0].str();
3747 StringRef Flags = Fields.size() == 2 ? Fields[1] : "";
3749 // If the target was M Class then need to validate the special register value
3750 // and retrieve the mask for use in the instruction node.
3751 if (Subtarget->isMClass()) {
3752 // basepri_max gets split so need to correct Reg and Flags.
3753 if (SpecialReg == "basepri_max") {
3757 int SYSmValue = getMClassRegisterMask(Reg, Flags, false, Subtarget);
3758 if (SYSmValue == -1)
3761 SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32),
3762 N->getOperand(2), getAL(CurDAG, DL),
3763 CurDAG->getRegister(0, MVT::i32), N->getOperand(0) };
3764 return CurDAG->getMachineNode(ARM::t2MSR_M, DL, MVT::Other, Ops);
3767 // We then check to see if a valid mask can be constructed for one of the
3768 // register string values permitted for the A and R class cores. These values
3769 // are apsr, spsr and cpsr; these are also valid on older cores.
3770 int Mask = getARClassRegisterMask(Reg, Flags);
3772 Ops = { CurDAG->getTargetConstant(Mask, DL, MVT::i32), N->getOperand(2),
3773 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
3775 return CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSR_AR : ARM::MSR,
3776 DL, MVT::Other, Ops);
3782 SDNode *ARMDAGToDAGISel::SelectInlineAsm(SDNode *N){
3783 std::vector<SDValue> AsmNodeOperands;
3784 unsigned Flag, Kind;
3785 bool Changed = false;
3786 unsigned NumOps = N->getNumOperands();
3788 // Normally, i64 data is bounded to two arbitrary GRPs for "%r" constraint.
3789 // However, some instrstions (e.g. ldrexd/strexd in ARM mode) require
3790 // (even/even+1) GPRs and use %n and %Hn to refer to the individual regs
3791 // respectively. Since there is no constraint to explicitly specify a
3792 // reg pair, we use GPRPair reg class for "%r" for 64-bit data. For Thumb,
3793 // the 64-bit data may be referred by H, Q, R modifiers, so we still pack
3794 // them into a GPRPair.
3797 SDValue Glue = N->getGluedNode() ? N->getOperand(NumOps-1)
3798 : SDValue(nullptr,0);
3800 SmallVector<bool, 8> OpChanged;
3801 // Glue node will be appended late.
3802 for(unsigned i = 0, e = N->getGluedNode() ? NumOps - 1 : NumOps; i < e; ++i) {
3803 SDValue op = N->getOperand(i);
3804 AsmNodeOperands.push_back(op);
3806 if (i < InlineAsm::Op_FirstOperand)
3809 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(i))) {
3810 Flag = C->getZExtValue();
3811 Kind = InlineAsm::getKind(Flag);
3816 // Immediate operands to inline asm in the SelectionDAG are modeled with
3817 // two operands. The first is a constant of value InlineAsm::Kind_Imm, and
3818 // the second is a constant with the value of the immediate. If we get here
3819 // and we have a Kind_Imm, skip the next operand, and continue.
3820 if (Kind == InlineAsm::Kind_Imm) {
3821 SDValue op = N->getOperand(++i);
3822 AsmNodeOperands.push_back(op);
3826 unsigned NumRegs = InlineAsm::getNumOperandRegisters(Flag);
3828 OpChanged.push_back(false);
3830 unsigned DefIdx = 0;
3831 bool IsTiedToChangedOp = false;
3832 // If it's a use that is tied with a previous def, it has no
3833 // reg class constraint.
3834 if (Changed && InlineAsm::isUseOperandTiedToDef(Flag, DefIdx))
3835 IsTiedToChangedOp = OpChanged[DefIdx];
3837 if (Kind != InlineAsm::Kind_RegUse && Kind != InlineAsm::Kind_RegDef
3838 && Kind != InlineAsm::Kind_RegDefEarlyClobber)
3842 bool HasRC = InlineAsm::hasRegClassConstraint(Flag, RC);
3843 if ((!IsTiedToChangedOp && (!HasRC || RC != ARM::GPRRegClassID))
3847 assert((i+2 < NumOps) && "Invalid number of operands in inline asm");
3848 SDValue V0 = N->getOperand(i+1);
3849 SDValue V1 = N->getOperand(i+2);
3850 unsigned Reg0 = cast<RegisterSDNode>(V0)->getReg();
3851 unsigned Reg1 = cast<RegisterSDNode>(V1)->getReg();
3853 MachineRegisterInfo &MRI = MF->getRegInfo();
3855 if (Kind == InlineAsm::Kind_RegDef ||
3856 Kind == InlineAsm::Kind_RegDefEarlyClobber) {
3857 // Replace the two GPRs with 1 GPRPair and copy values from GPRPair to
3858 // the original GPRs.
3860 unsigned GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
3861 PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
3862 SDValue Chain = SDValue(N,0);
3864 SDNode *GU = N->getGluedUser();
3865 SDValue RegCopy = CurDAG->getCopyFromReg(Chain, dl, GPVR, MVT::Untyped,
3868 // Extract values from a GPRPair reg and copy to the original GPR reg.
3869 SDValue Sub0 = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32,
3871 SDValue Sub1 = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32,
3873 SDValue T0 = CurDAG->getCopyToReg(Sub0, dl, Reg0, Sub0,
3874 RegCopy.getValue(1));
3875 SDValue T1 = CurDAG->getCopyToReg(Sub1, dl, Reg1, Sub1, T0.getValue(1));
3877 // Update the original glue user.
3878 std::vector<SDValue> Ops(GU->op_begin(), GU->op_end()-1);
3879 Ops.push_back(T1.getValue(1));
3880 CurDAG->UpdateNodeOperands(GU, Ops);
3883 // For Kind == InlineAsm::Kind_RegUse, we first copy two GPRs into a
3884 // GPRPair and then pass the GPRPair to the inline asm.
3885 SDValue Chain = AsmNodeOperands[InlineAsm::Op_InputChain];
3887 // As REG_SEQ doesn't take RegisterSDNode, we copy them first.
3888 SDValue T0 = CurDAG->getCopyFromReg(Chain, dl, Reg0, MVT::i32,
3890 SDValue T1 = CurDAG->getCopyFromReg(Chain, dl, Reg1, MVT::i32,
3892 SDValue Pair = SDValue(createGPRPairNode(MVT::Untyped, T0, T1), 0);
3894 // Copy REG_SEQ into a GPRPair-typed VR and replace the original two
3895 // i32 VRs of inline asm with it.
3896 unsigned GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
3897 PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
3898 Chain = CurDAG->getCopyToReg(T1, dl, GPVR, Pair, T1.getValue(1));
3900 AsmNodeOperands[InlineAsm::Op_InputChain] = Chain;
3901 Glue = Chain.getValue(1);
3906 if(PairedReg.getNode()) {
3907 OpChanged[OpChanged.size() -1 ] = true;
3908 Flag = InlineAsm::getFlagWord(Kind, 1 /* RegNum*/);
3909 if (IsTiedToChangedOp)
3910 Flag = InlineAsm::getFlagWordForMatchingOp(Flag, DefIdx);
3912 Flag = InlineAsm::getFlagWordForRegClass(Flag, ARM::GPRPairRegClassID);
3913 // Replace the current flag.
3914 AsmNodeOperands[AsmNodeOperands.size() -1] = CurDAG->getTargetConstant(
3915 Flag, dl, MVT::i32);
3916 // Add the new register node and skip the original two GPRs.
3917 AsmNodeOperands.push_back(PairedReg);
3918 // Skip the next two GPRs.
3924 AsmNodeOperands.push_back(Glue);
3928 SDValue New = CurDAG->getNode(ISD::INLINEASM, SDLoc(N),
3929 CurDAG->getVTList(MVT::Other, MVT::Glue), AsmNodeOperands);
3931 return New.getNode();
3935 bool ARMDAGToDAGISel::
3936 SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
3937 std::vector<SDValue> &OutOps) {
3938 switch(ConstraintID) {
3940 llvm_unreachable("Unexpected asm memory constraint");
3941 case InlineAsm::Constraint_i:
3942 // FIXME: It seems strange that 'i' is needed here since it's supposed to
3943 // be an immediate and not a memory constraint.
3945 case InlineAsm::Constraint_m:
3946 case InlineAsm::Constraint_Q:
3947 case InlineAsm::Constraint_Um:
3948 case InlineAsm::Constraint_Un:
3949 case InlineAsm::Constraint_Uq:
3950 case InlineAsm::Constraint_Us:
3951 case InlineAsm::Constraint_Ut:
3952 case InlineAsm::Constraint_Uv:
3953 case InlineAsm::Constraint_Uy:
3954 // Require the address to be in a register. That is safe for all ARM
3955 // variants and it is hard to do anything much smarter without knowing
3956 // how the operand is used.
3957 OutOps.push_back(Op);
3963 /// createARMISelDag - This pass converts a legalized DAG into a
3964 /// ARM-specific DAG, ready for instruction scheduling.
3966 FunctionPass *llvm::createARMISelDag(ARMBaseTargetMachine &TM,
3967 CodeGenOpt::Level OptLevel) {
3968 return new ARMDAGToDAGISel(TM, OptLevel);