lib/Target/ARM/ARMISelDAGToDAG.cpp

   1 //===-- ARMISelDAGToDAG.cpp - A dag to dag inst selector for ARM ----------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file defines an instruction selector for the ARM target.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #include "ARM.h"
  15 #include "ARMBaseInstrInfo.h"
  16 #include "ARMTargetMachine.h"
  17 #include "MCTargetDesc/ARMAddressingModes.h"
  18 #include "llvm/ADT/StringSwitch.h"
  19 #include "llvm/CodeGen/MachineFrameInfo.h"
  20 #include "llvm/CodeGen/MachineFunction.h"
  21 #include "llvm/CodeGen/MachineInstrBuilder.h"
  22 #include "llvm/CodeGen/MachineRegisterInfo.h"
  23 #include "llvm/CodeGen/SelectionDAG.h"
  24 #include "llvm/CodeGen/SelectionDAGISel.h"
  25 #include "llvm/IR/CallingConv.h"
  26 #include "llvm/IR/Constants.h"
  27 #include "llvm/IR/DerivedTypes.h"
  28 #include "llvm/IR/Function.h"
  29 #include "llvm/IR/Intrinsics.h"
  30 #include "llvm/IR/LLVMContext.h"
  31 #include "llvm/Support/CommandLine.h"
  32 #include "llvm/Support/Compiler.h"
  33 #include "llvm/Support/Debug.h"
  34 #include "llvm/Support/ErrorHandling.h"
  35 #include "llvm/Target/TargetLowering.h"
  36 #include "llvm/Target/TargetOptions.h"
  37
  38 using namespace llvm;
  39
  40 #define DEBUG_TYPE "arm-isel"
  41
  42 static cl::opt<bool>
  43 DisableShifterOp("disable-shifter-op", cl::Hidden,
  44   cl::desc("Disable isel of shifter-op"),
  45   cl::init(false));
  46
  47 static cl::opt<bool>
  48 CheckVMLxHazard("check-vmlx-hazard", cl::Hidden,
  49   cl::desc("Check fp vmla / vmls hazard at isel time"),
  50   cl::init(true));
  51
  52 //===--------------------------------------------------------------------===//
  53 /// ARMDAGToDAGISel - ARM specific code to select ARM machine
  54 /// instructions for SelectionDAG operations.
  55 ///
  56 namespace {
  57
  58 enum AddrMode2Type {
  59   AM2_BASE, // Simple AM2 (+-imm12)
  60   AM2_SHOP  // Shifter-op AM2
  61 };
  62
  63 class ARMDAGToDAGISel : public SelectionDAGISel {
  64   /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
  65   /// make the right decision when generating code for different targets.
  66   const ARMSubtarget *Subtarget;
  67
  68 public:
  69   explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm, CodeGenOpt::Level OptLevel)
  70       : SelectionDAGISel(tm, OptLevel) {}
  71
  72   bool runOnMachineFunction(MachineFunction &MF) override {
  73     // Reset the subtarget each time through.
  74     Subtarget = &MF.getSubtarget<ARMSubtarget>();
  75     SelectionDAGISel::runOnMachineFunction(MF);
  76     return true;
  77   }
  78
  79   const char *getPassName() const override {
  80     return "ARM Instruction Selection";
  81   }
  82
  83   void PreprocessISelDAG() override;
  84
  85   /// getI32Imm - Return a target constant of type i32 with the specified
  86   /// value.
  87   inline SDValue getI32Imm(unsigned Imm, SDLoc dl) {
  88     return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
  89   }
  90
  91   SDNode *Select(SDNode *N) override;
  92
  93
  94   bool hasNoVMLxHazardUse(SDNode *N) const;
  95   bool isShifterOpProfitable(const SDValue &Shift,
  96                              ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt);
  97   bool SelectRegShifterOperand(SDValue N, SDValue &A,
  98                                SDValue &B, SDValue &C,
  99                                bool CheckProfitability = true);
 100   bool SelectImmShifterOperand(SDValue N, SDValue &A,
 101                                SDValue &B, bool CheckProfitability = true);
 102   bool SelectShiftRegShifterOperand(SDValue N, SDValue &A,
 103                                     SDValue &B, SDValue &C) {
 104     // Don't apply the profitability check
 105     return SelectRegShifterOperand(N, A, B, C, false);
 106   }
 107   bool SelectShiftImmShifterOperand(SDValue N, SDValue &A,
 108                                     SDValue &B) {
 109     // Don't apply the profitability check
 110     return SelectImmShifterOperand(N, A, B, false);
 111   }
 112
 113   bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
 114   bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc);
 115
 116   AddrMode2Type SelectAddrMode2Worker(SDValue N, SDValue &Base,
 117                                       SDValue &Offset, SDValue &Opc);
 118   bool SelectAddrMode2Base(SDValue N, SDValue &Base, SDValue &Offset,
 119                            SDValue &Opc) {
 120     return SelectAddrMode2Worker(N, Base, Offset, Opc) == AM2_BASE;
 121   }
 122
 123   bool SelectAddrMode2ShOp(SDValue N, SDValue &Base, SDValue &Offset,
 124                            SDValue &Opc) {
 125     return SelectAddrMode2Worker(N, Base, Offset, Opc) == AM2_SHOP;
 126   }
 127
 128   bool SelectAddrMode2(SDValue N, SDValue &Base, SDValue &Offset,
 129                        SDValue &Opc) {
 130     SelectAddrMode2Worker(N, Base, Offset, Opc);
 131 //    return SelectAddrMode2ShOp(N, Base, Offset, Opc);
 132     // This always matches one way or another.
 133     return true;
 134   }
 135
 136   bool SelectCMOVPred(SDValue N, SDValue &Pred, SDValue &Reg) {
 137     const ConstantSDNode *CN = cast<ConstantSDNode>(N);
 138     Pred = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(N), MVT::i32);
 139     Reg = CurDAG->getRegister(ARM::CPSR, MVT::i32);
 140     return true;
 141   }
 142
 143   bool SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
 144                              SDValue &Offset, SDValue &Opc);
 145   bool SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
 146                              SDValue &Offset, SDValue &Opc);
 147   bool SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
 148                              SDValue &Offset, SDValue &Opc);
 149   bool SelectAddrOffsetNone(SDValue N, SDValue &Base);
 150   bool SelectAddrMode3(SDValue N, SDValue &Base,
 151                        SDValue &Offset, SDValue &Opc);
 152   bool SelectAddrMode3Offset(SDNode *Op, SDValue N,
 153                              SDValue &Offset, SDValue &Opc);
 154   bool SelectAddrMode5(SDValue N, SDValue &Base,
 155                        SDValue &Offset);
 156   bool SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,SDValue &Align);
 157   bool SelectAddrMode6Offset(SDNode *Op, SDValue N, SDValue &Offset);
 158
 159   bool SelectAddrModePC(SDValue N, SDValue &Offset, SDValue &Label);
 160
 161   // Thumb Addressing Modes:
 162   bool SelectThumbAddrModeRR(SDValue N, SDValue &Base, SDValue &Offset);
 163   bool SelectThumbAddrModeRI(SDValue N, SDValue &Base, SDValue &Offset,
 164                              unsigned Scale);
 165   bool SelectThumbAddrModeRI5S1(SDValue N, SDValue &Base, SDValue &Offset);
 166   bool SelectThumbAddrModeRI5S2(SDValue N, SDValue &Base, SDValue &Offset);
 167   bool SelectThumbAddrModeRI5S4(SDValue N, SDValue &Base, SDValue &Offset);
 168   bool SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, SDValue &Base,
 169                                 SDValue &OffImm);
 170   bool SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
 171                                  SDValue &OffImm);
 172   bool SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
 173                                  SDValue &OffImm);
 174   bool SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
 175                                  SDValue &OffImm);
 176   bool SelectThumbAddrModeSP(SDValue N, SDValue &Base, SDValue &OffImm);
 177
 178   // Thumb 2 Addressing Modes:
 179   bool SelectT2ShifterOperandReg(SDValue N,
 180                                  SDValue &BaseReg, SDValue &Opc);
 181   bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
 182   bool SelectT2AddrModeImm8(SDValue N, SDValue &Base,
 183                             SDValue &OffImm);
 184   bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
 185                                  SDValue &OffImm);
 186   bool SelectT2AddrModeSoReg(SDValue N, SDValue &Base,
 187                              SDValue &OffReg, SDValue &ShImm);
 188   bool SelectT2AddrModeExclusive(SDValue N, SDValue &Base, SDValue &OffImm);
 189
 190   inline bool is_so_imm(unsigned Imm) const {
 191     return ARM_AM::getSOImmVal(Imm) != -1;
 192   }
 193
 194   inline bool is_so_imm_not(unsigned Imm) const {
 195     return ARM_AM::getSOImmVal(~Imm) != -1;
 196   }
 197
 198   inline bool is_t2_so_imm(unsigned Imm) const {
 199     return ARM_AM::getT2SOImmVal(Imm) != -1;
 200   }
 201
 202   inline bool is_t2_so_imm_not(unsigned Imm) const {
 203     return ARM_AM::getT2SOImmVal(~Imm) != -1;
 204   }
 205
 206   // Include the pieces autogenerated from the target description.
 207 #include "ARMGenDAGISel.inc"
 208
 209 private:
 210   /// SelectARMIndexedLoad - Indexed (pre/post inc/dec) load matching code for
 211   /// ARM.
 212   SDNode *SelectARMIndexedLoad(SDNode *N);
 213   SDNode *SelectT2IndexedLoad(SDNode *N);
 214
 215   /// SelectVLD - Select NEON load intrinsics.  NumVecs should be
 216   /// 1, 2, 3 or 4.  The opcode arrays specify the instructions used for
 217   /// loads of D registers and even subregs and odd subregs of Q registers.
 218   /// For NumVecs <= 2, QOpcodes1 is not used.
 219   SDNode *SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
 220                     const uint16_t *DOpcodes,
 221                     const uint16_t *QOpcodes0, const uint16_t *QOpcodes1);
 222
 223   /// SelectVST - Select NEON store intrinsics.  NumVecs should
 224   /// be 1, 2, 3 or 4.  The opcode arrays specify the instructions used for
 225   /// stores of D registers and even subregs and odd subregs of Q registers.
 226   /// For NumVecs <= 2, QOpcodes1 is not used.
 227   SDNode *SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
 228                     const uint16_t *DOpcodes,
 229                     const uint16_t *QOpcodes0, const uint16_t *QOpcodes1);
 230
 231   /// SelectVLDSTLane - Select NEON load/store lane intrinsics.  NumVecs should
 232   /// be 2, 3 or 4.  The opcode arrays specify the instructions used for
 233   /// load/store of D registers and Q registers.
 234   SDNode *SelectVLDSTLane(SDNode *N, bool IsLoad,
 235                           bool isUpdating, unsigned NumVecs,
 236                           const uint16_t *DOpcodes, const uint16_t *QOpcodes);
 237
 238   /// SelectVLDDup - Select NEON load-duplicate intrinsics.  NumVecs
 239   /// should be 2, 3 or 4.  The opcode array specifies the instructions used
 240   /// for loading D registers.  (Q registers are not supported.)
 241   SDNode *SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs,
 242                        const uint16_t *Opcodes);
 243
 244   /// SelectVTBL - Select NEON VTBL and VTBX intrinsics.  NumVecs should be 2,
 245   /// 3 or 4.  These are custom-selected so that a REG_SEQUENCE can be
 246   /// generated to force the table registers to be consecutive.
 247   SDNode *SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs, unsigned Opc);
 248
 249   /// SelectV6T2BitfieldExtractOp - Select SBFX/UBFX instructions for ARM.
 250   SDNode *SelectV6T2BitfieldExtractOp(SDNode *N, bool isSigned);
 251
 252   // Select special operations if node forms integer ABS pattern
 253   SDNode *SelectABSOp(SDNode *N);
 254
 255   SDNode *SelectReadRegister(SDNode *N);
 256   SDNode *SelectWriteRegister(SDNode *N);
 257
 258   SDNode *SelectInlineAsm(SDNode *N);
 259
 260   SDNode *SelectConcatVector(SDNode *N);
 261
 262   /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
 263   /// inline asm expressions.
 264   bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
 265                                     std::vector<SDValue> &OutOps) override;
 266
 267   // Form pairs of consecutive R, S, D, or Q registers.
 268   SDNode *createGPRPairNode(EVT VT, SDValue V0, SDValue V1);
 269   SDNode *createSRegPairNode(EVT VT, SDValue V0, SDValue V1);
 270   SDNode *createDRegPairNode(EVT VT, SDValue V0, SDValue V1);
 271   SDNode *createQRegPairNode(EVT VT, SDValue V0, SDValue V1);
 272
 273   // Form sequences of 4 consecutive S, D, or Q registers.
 274   SDNode *createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
 275   SDNode *createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
 276   SDNode *createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
 277
 278   // Get the alignment operand for a NEON VLD or VST instruction.
 279   SDValue GetVLDSTAlign(SDValue Align, SDLoc dl, unsigned NumVecs,
 280                         bool is64BitVector);
 281 };
 282 }
 283
 284 /// isInt32Immediate - This method tests to see if the node is a 32-bit constant
 285 /// operand. If so Imm will receive the 32-bit value.
 286 static bool isInt32Immediate(SDNode *N, unsigned &Imm) {
 287   if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) {
 288     Imm = cast<ConstantSDNode>(N)->getZExtValue();
 289     return true;
 290   }
 291   return false;
 292 }
 293
 294 // isInt32Immediate - This method tests to see if a constant operand.
 295 // If so Imm will receive the 32 bit value.
 296 static bool isInt32Immediate(SDValue N, unsigned &Imm) {
 297   return isInt32Immediate(N.getNode(), Imm);
 298 }
 299
 300 // isOpcWithIntImmediate - This method tests to see if the node is a specific
 301 // opcode and that it has a immediate integer right operand.
 302 // If so Imm will receive the 32 bit value.
 303 static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
 304   return N->getOpcode() == Opc &&
 305          isInt32Immediate(N->getOperand(1).getNode(), Imm);
 306 }
 307
 308 /// \brief Check whether a particular node is a constant value representable as
 309 /// (N * Scale) where (N in [\p RangeMin, \p RangeMax).
 310 ///
 311 /// \param ScaledConstant [out] - On success, the pre-scaled constant value.
 312 static bool isScaledConstantInRange(SDValue Node, int Scale,
 313                                     int RangeMin, int RangeMax,
 314                                     int &ScaledConstant) {
 315   assert(Scale > 0 && "Invalid scale!");
 316
 317   // Check that this is a constant.
 318   const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Node);
 319   if (!C)
 320     return false;
 321
 322   ScaledConstant = (int) C->getZExtValue();
 323   if ((ScaledConstant % Scale) != 0)
 324     return false;
 325
 326   ScaledConstant /= Scale;
 327   return ScaledConstant >= RangeMin && ScaledConstant < RangeMax;
 328 }
 329
 330 void ARMDAGToDAGISel::PreprocessISelDAG() {
 331   if (!Subtarget->hasV6T2Ops())
 332     return;
 333
 334   bool isThumb2 = Subtarget->isThumb();
 335   for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
 336        E = CurDAG->allnodes_end(); I != E; ) {
 337     SDNode *N = I++;  // Preincrement iterator to avoid invalidation issues.
 338
 339     if (N->getOpcode() != ISD::ADD)
 340       continue;
 341
 342     // Look for (add X1, (and (srl X2, c1), c2)) where c2 is constant with
 343     // leading zeros, followed by consecutive set bits, followed by 1 or 2
 344     // trailing zeros, e.g. 1020.
 345     // Transform the expression to
 346     // (add X1, (shl (and (srl X2, c1), (c2>>tz)), tz)) where tz is the number
 347     // of trailing zeros of c2. The left shift would be folded as an shifter
 348     // operand of 'add' and the 'and' and 'srl' would become a bits extraction
 349     // node (UBFX).
 350
 351     SDValue N0 = N->getOperand(0);
 352     SDValue N1 = N->getOperand(1);
 353     unsigned And_imm = 0;
 354     if (!isOpcWithIntImmediate(N1.getNode(), ISD::AND, And_imm)) {
 355       if (isOpcWithIntImmediate(N0.getNode(), ISD::AND, And_imm))
 356         std::swap(N0, N1);
 357     }
 358     if (!And_imm)
 359       continue;
 360
 361     // Check if the AND mask is an immediate of the form: 000.....1111111100
 362     unsigned TZ = countTrailingZeros(And_imm);
 363     if (TZ != 1 && TZ != 2)
 364       // Be conservative here. Shifter operands aren't always free. e.g. On
 365       // Swift, left shifter operand of 1 / 2 for free but others are not.
 366       // e.g.
 367       //  ubfx   r3, r1, #16, #8
 368       //  ldr.w  r3, [r0, r3, lsl #2]
 369       // vs.
 370       //  mov.w  r9, #1020
 371       //  and.w  r2, r9, r1, lsr #14
 372       //  ldr    r2, [r0, r2]
 373       continue;
 374     And_imm >>= TZ;
 375     if (And_imm & (And_imm + 1))
 376       continue;
 377
 378     // Look for (and (srl X, c1), c2).
 379     SDValue Srl = N1.getOperand(0);
 380     unsigned Srl_imm = 0;
 381     if (!isOpcWithIntImmediate(Srl.getNode(), ISD::SRL, Srl_imm) ||
 382         (Srl_imm <= 2))
 383       continue;
 384
 385     // Make sure first operand is not a shifter operand which would prevent
 386     // folding of the left shift.
 387     SDValue CPTmp0;
 388     SDValue CPTmp1;
 389     SDValue CPTmp2;
 390     if (isThumb2) {
 391       if (SelectT2ShifterOperandReg(N0, CPTmp0, CPTmp1))
 392         continue;
 393     } else {
 394       if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1) ||
 395           SelectRegShifterOperand(N0, CPTmp0, CPTmp1, CPTmp2))
 396         continue;
 397     }
 398
 399     // Now make the transformation.
 400     Srl = CurDAG->getNode(ISD::SRL, SDLoc(Srl), MVT::i32,
 401                           Srl.getOperand(0),
 402                           CurDAG->getConstant(Srl_imm + TZ, SDLoc(Srl),
 403                                               MVT::i32));
 404     N1 = CurDAG->getNode(ISD::AND, SDLoc(N1), MVT::i32,
 405                          Srl,
 406                          CurDAG->getConstant(And_imm, SDLoc(Srl), MVT::i32));
 407     N1 = CurDAG->getNode(ISD::SHL, SDLoc(N1), MVT::i32,
 408                          N1, CurDAG->getConstant(TZ, SDLoc(Srl), MVT::i32));
 409     CurDAG->UpdateNodeOperands(N, N0, N1);
 410   }
 411 }
 412
 413 /// hasNoVMLxHazardUse - Return true if it's desirable to select a FP MLA / MLS
 414 /// node. VFP / NEON fp VMLA / VMLS instructions have special RAW hazards (at
 415 /// least on current ARM implementations) which should be avoidded.
 416 bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const {
 417   if (OptLevel == CodeGenOpt::None)
 418     return true;
 419
 420   if (!CheckVMLxHazard)
 421     return true;
 422
 423   if (!Subtarget->isCortexA7() && !Subtarget->isCortexA8() &&
 424       !Subtarget->isCortexA9() && !Subtarget->isSwift())
 425     return true;
 426
 427   if (!N->hasOneUse())
 428     return false;
 429
 430   SDNode *Use = *N->use_begin();
 431   if (Use->getOpcode() == ISD::CopyToReg)
 432     return true;
 433   if (Use->isMachineOpcode()) {
 434     const ARMBaseInstrInfo *TII = static_cast<const ARMBaseInstrInfo *>(
 435         CurDAG->getSubtarget().getInstrInfo());
 436
 437     const MCInstrDesc &MCID = TII->get(Use->getMachineOpcode());
 438     if (MCID.mayStore())
 439       return true;
 440     unsigned Opcode = MCID.getOpcode();
 441     if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
 442       return true;
 443     // vmlx feeding into another vmlx. We actually want to unfold
 444     // the use later in the MLxExpansion pass. e.g.
 445     // vmla
 446     // vmla (stall 8 cycles)
 447     //
 448     // vmul (5 cycles)
 449     // vadd (5 cycles)
 450     // vmla
 451     // This adds up to about 18 - 19 cycles.
 452     //
 453     // vmla
 454     // vmul (stall 4 cycles)
 455     // vadd adds up to about 14 cycles.
 456     return TII->isFpMLxInstruction(Opcode);
 457   }
 458
 459   return false;
 460 }
 461
 462 bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift,
 463                                             ARM_AM::ShiftOpc ShOpcVal,
 464                                             unsigned ShAmt) {
 465   if (!Subtarget->isLikeA9() && !Subtarget->isSwift())
 466     return true;
 467   if (Shift.hasOneUse())
 468     return true;
 469   // R << 2 is free.
 470   return ShOpcVal == ARM_AM::lsl &&
 471          (ShAmt == 2 || (Subtarget->isSwift() && ShAmt == 1));
 472 }
 473
 474 bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N,
 475                                               SDValue &BaseReg,
 476                                               SDValue &Opc,
 477                                               bool CheckProfitability) {
 478   if (DisableShifterOp)
 479     return false;
 480
 481   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
 482
 483   // Don't match base register only case. That is matched to a separate
 484   // lower complexity pattern with explicit register operand.
 485   if (ShOpcVal == ARM_AM::no_shift) return false;
 486
 487   BaseReg = N.getOperand(0);
 488   unsigned ShImmVal = 0;
 489   ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
 490   if (!RHS) return false;
 491   ShImmVal = RHS->getZExtValue() & 31;
 492   Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
 493                                   SDLoc(N), MVT::i32);
 494   return true;
 495 }
 496
 497 bool ARMDAGToDAGISel::SelectRegShifterOperand(SDValue N,
 498                                               SDValue &BaseReg,
 499                                               SDValue &ShReg,
 500                                               SDValue &Opc,
 501                                               bool CheckProfitability) {
 502   if (DisableShifterOp)
 503     return false;
 504
 505   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
 506
 507   // Don't match base register only case. That is matched to a separate
 508   // lower complexity pattern with explicit register operand.
 509   if (ShOpcVal == ARM_AM::no_shift) return false;
 510
 511   BaseReg = N.getOperand(0);
 512   unsigned ShImmVal = 0;
 513   ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
 514   if (RHS) return false;
 515
 516   ShReg = N.getOperand(1);
 517   if (CheckProfitability && !isShifterOpProfitable(N, ShOpcVal, ShImmVal))
 518     return false;
 519   Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
 520                                   SDLoc(N), MVT::i32);
 521   return true;
 522 }
 523
 524
 525 bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,
 526                                           SDValue &Base,
 527                                           SDValue &OffImm) {
 528   // Match simple R + imm12 operands.
 529
 530   // Base only.
 531   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
 532       !CurDAG->isBaseWithConstantOffset(N)) {
 533     if (N.getOpcode() == ISD::FrameIndex) {
 534       // Match frame index.
 535       int FI = cast<FrameIndexSDNode>(N)->getIndex();
 536       Base = CurDAG->getTargetFrameIndex(
 537           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
 538       OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
 539       return true;
 540     }
 541
 542     if (N.getOpcode() == ARMISD::Wrapper &&
 543         N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) {
 544       Base = N.getOperand(0);
 545     } else
 546       Base = N;
 547     OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
 548     return true;
 549   }
 550
 551   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
 552     int RHSC = (int)RHS->getSExtValue();
 553     if (N.getOpcode() == ISD::SUB)
 554       RHSC = -RHSC;
 555
 556     if (RHSC > -0x1000 && RHSC < 0x1000) { // 12 bits
 557       Base   = N.getOperand(0);
 558       if (Base.getOpcode() == ISD::FrameIndex) {
 559         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
 560         Base = CurDAG->getTargetFrameIndex(
 561             FI, TLI->getPointerTy(CurDAG->getDataLayout()));
 562       }
 563       OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
 564       return true;
 565     }
 566   }
 567
 568   // Base only.
 569   Base = N;
 570   OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
 571   return true;
 572 }
 573
 574
 575
 576 bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset,
 577                                       SDValue &Opc) {
 578   if (N.getOpcode() == ISD::MUL &&
 579       ((!Subtarget->isLikeA9() && !Subtarget->isSwift()) || N.hasOneUse())) {
 580     if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
 581       // X * [3,5,9] -> X + X * [2,4,8] etc.
 582       int RHSC = (int)RHS->getZExtValue();
 583       if (RHSC & 1) {
 584         RHSC = RHSC & ~1;
 585         ARM_AM::AddrOpc AddSub = ARM_AM::add;
 586         if (RHSC < 0) {
 587           AddSub = ARM_AM::sub;
 588           RHSC = - RHSC;
 589         }
 590         if (isPowerOf2_32(RHSC)) {
 591           unsigned ShAmt = Log2_32(RHSC);
 592           Base = Offset = N.getOperand(0);
 593           Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt,
 594                                                             ARM_AM::lsl),
 595                                           SDLoc(N), MVT::i32);
 596           return true;
 597         }
 598       }
 599     }
 600   }
 601
 602   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
 603       // ISD::OR that is equivalent to an ISD::ADD.
 604       !CurDAG->isBaseWithConstantOffset(N))
 605     return false;
 606
 607   // Leave simple R +/- imm12 operands for LDRi12
 608   if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::OR) {
 609     int RHSC;
 610     if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
 611                                 -0x1000+1, 0x1000, RHSC)) // 12 bits.
 612       return false;
 613   }
 614
 615   // Otherwise this is R +/- [possibly shifted] R.
 616   ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::SUB ? ARM_AM::sub:ARM_AM::add;
 617   ARM_AM::ShiftOpc ShOpcVal =
 618     ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode());
 619   unsigned ShAmt = 0;
 620
 621   Base   = N.getOperand(0);
 622   Offset = N.getOperand(1);
 623
 624   if (ShOpcVal != ARM_AM::no_shift) {
 625     // Check to see if the RHS of the shift is a constant, if not, we can't fold
 626     // it.
 627     if (ConstantSDNode *Sh =
 628            dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) {
 629       ShAmt = Sh->getZExtValue();
 630       if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt))
 631         Offset = N.getOperand(1).getOperand(0);
 632       else {
 633         ShAmt = 0;
 634         ShOpcVal = ARM_AM::no_shift;
 635       }
 636     } else {
 637       ShOpcVal = ARM_AM::no_shift;
 638     }
 639   }
 640
 641   // Try matching (R shl C) + (R).
 642   if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift &&
 643       !(Subtarget->isLikeA9() || Subtarget->isSwift() ||
 644         N.getOperand(0).hasOneUse())) {
 645     ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode());
 646     if (ShOpcVal != ARM_AM::no_shift) {
 647       // Check to see if the RHS of the shift is a constant, if not, we can't
 648       // fold it.
 649       if (ConstantSDNode *Sh =
 650           dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) {
 651         ShAmt = Sh->getZExtValue();
 652         if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) {
 653           Offset = N.getOperand(0).getOperand(0);
 654           Base = N.getOperand(1);
 655         } else {
 656           ShAmt = 0;
 657           ShOpcVal = ARM_AM::no_shift;
 658         }
 659       } else {
 660         ShOpcVal = ARM_AM::no_shift;
 661       }
 662     }
 663   }
 664
 665   Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
 666                                   SDLoc(N), MVT::i32);
 667   return true;
 668 }
 669
 670
 671 //-----
 672
 673 AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N,
 674                                                      SDValue &Base,
 675                                                      SDValue &Offset,
 676                                                      SDValue &Opc) {
 677   if (N.getOpcode() == ISD::MUL &&
 678       (!(Subtarget->isLikeA9() || Subtarget->isSwift()) || N.hasOneUse())) {
 679     if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
 680       // X * [3,5,9] -> X + X * [2,4,8] etc.
 681       int RHSC = (int)RHS->getZExtValue();
 682       if (RHSC & 1) {
 683         RHSC = RHSC & ~1;
 684         ARM_AM::AddrOpc AddSub = ARM_AM::add;
 685         if (RHSC < 0) {
 686           AddSub = ARM_AM::sub;
 687           RHSC = - RHSC;
 688         }
 689         if (isPowerOf2_32(RHSC)) {
 690           unsigned ShAmt = Log2_32(RHSC);
 691           Base = Offset = N.getOperand(0);
 692           Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt,
 693                                                             ARM_AM::lsl),
 694                                           SDLoc(N), MVT::i32);
 695           return AM2_SHOP;
 696         }
 697       }
 698     }
 699   }
 700
 701   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
 702       // ISD::OR that is equivalent to an ADD.
 703       !CurDAG->isBaseWithConstantOffset(N)) {
 704     Base = N;
 705     if (N.getOpcode() == ISD::FrameIndex) {
 706       int FI = cast<FrameIndexSDNode>(N)->getIndex();
 707       Base = CurDAG->getTargetFrameIndex(
 708           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
 709     } else if (N.getOpcode() == ARMISD::Wrapper &&
 710                N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) {
 711       Base = N.getOperand(0);
 712     }
 713     Offset = CurDAG->getRegister(0, MVT::i32);
 714     Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(ARM_AM::add, 0,
 715                                                       ARM_AM::no_shift),
 716                                     SDLoc(N), MVT::i32);
 717     return AM2_BASE;
 718   }
 719
 720   // Match simple R +/- imm12 operands.
 721   if (N.getOpcode() != ISD::SUB) {
 722     int RHSC;
 723     if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
 724                                 -0x1000+1, 0x1000, RHSC)) { // 12 bits.
 725       Base = N.getOperand(0);
 726       if (Base.getOpcode() == ISD::FrameIndex) {
 727         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
 728         Base = CurDAG->getTargetFrameIndex(
 729             FI, TLI->getPointerTy(CurDAG->getDataLayout()));
 730       }
 731       Offset = CurDAG->getRegister(0, MVT::i32);
 732
 733       ARM_AM::AddrOpc AddSub = ARM_AM::add;
 734       if (RHSC < 0) {
 735         AddSub = ARM_AM::sub;
 736         RHSC = - RHSC;
 737       }
 738       Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, RHSC,
 739                                                         ARM_AM::no_shift),
 740                                       SDLoc(N), MVT::i32);
 741       return AM2_BASE;
 742     }
 743   }
 744
 745   if ((Subtarget->isLikeA9() || Subtarget->isSwift()) && !N.hasOneUse()) {
 746     // Compute R +/- (R << N) and reuse it.
 747     Base = N;
 748     Offset = CurDAG->getRegister(0, MVT::i32);
 749     Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(ARM_AM::add, 0,
 750                                                       ARM_AM::no_shift),
 751                                     SDLoc(N), MVT::i32);
 752     return AM2_BASE;
 753   }
 754
 755   // Otherwise this is R +/- [possibly shifted] R.
 756   ARM_AM::AddrOpc AddSub = N.getOpcode() != ISD::SUB ? ARM_AM::add:ARM_AM::sub;
 757   ARM_AM::ShiftOpc ShOpcVal =
 758     ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode());
 759   unsigned ShAmt = 0;
 760
 761   Base   = N.getOperand(0);
 762   Offset = N.getOperand(1);
 763
 764   if (ShOpcVal != ARM_AM::no_shift) {
 765     // Check to see if the RHS of the shift is a constant, if not, we can't fold
 766     // it.
 767     if (ConstantSDNode *Sh =
 768            dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) {
 769       ShAmt = Sh->getZExtValue();
 770       if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt))
 771         Offset = N.getOperand(1).getOperand(0);
 772       else {
 773         ShAmt = 0;
 774         ShOpcVal = ARM_AM::no_shift;
 775       }
 776     } else {
 777       ShOpcVal = ARM_AM::no_shift;
 778     }
 779   }
 780
 781   // Try matching (R shl C) + (R).
 782   if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift &&
 783       !(Subtarget->isLikeA9() || Subtarget->isSwift() ||
 784         N.getOperand(0).hasOneUse())) {
 785     ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode());
 786     if (ShOpcVal != ARM_AM::no_shift) {
 787       // Check to see if the RHS of the shift is a constant, if not, we can't
 788       // fold it.
 789       if (ConstantSDNode *Sh =
 790           dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) {
 791         ShAmt = Sh->getZExtValue();
 792         if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) {
 793           Offset = N.getOperand(0).getOperand(0);
 794           Base = N.getOperand(1);
 795         } else {
 796           ShAmt = 0;
 797           ShOpcVal = ARM_AM::no_shift;
 798         }
 799       } else {
 800         ShOpcVal = ARM_AM::no_shift;
 801       }
 802     }
 803   }
 804
 805   Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
 806                                   SDLoc(N), MVT::i32);
 807   return AM2_SHOP;
 808 }
 809
 810 bool ARMDAGToDAGISel::SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
 811                                             SDValue &Offset, SDValue &Opc) {
 812   unsigned Opcode = Op->getOpcode();
 813   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
 814     ? cast<LoadSDNode>(Op)->getAddressingMode()
 815     : cast<StoreSDNode>(Op)->getAddressingMode();
 816   ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
 817     ? ARM_AM::add : ARM_AM::sub;
 818   int Val;
 819   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val))
 820     return false;
 821
 822   Offset = N;
 823   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
 824   unsigned ShAmt = 0;
 825   if (ShOpcVal != ARM_AM::no_shift) {
 826     // Check to see if the RHS of the shift is a constant, if not, we can't fold
 827     // it.
 828     if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
 829       ShAmt = Sh->getZExtValue();
 830       if (isShifterOpProfitable(N, ShOpcVal, ShAmt))
 831         Offset = N.getOperand(0);
 832       else {
 833         ShAmt = 0;
 834         ShOpcVal = ARM_AM::no_shift;
 835       }
 836     } else {
 837       ShOpcVal = ARM_AM::no_shift;
 838     }
 839   }
 840
 841   Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
 842                                   SDLoc(N), MVT::i32);
 843   return true;
 844 }
 845
 846 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
 847                                             SDValue &Offset, SDValue &Opc) {
 848   unsigned Opcode = Op->getOpcode();
 849   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
 850     ? cast<LoadSDNode>(Op)->getAddressingMode()
 851     : cast<StoreSDNode>(Op)->getAddressingMode();
 852   ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
 853     ? ARM_AM::add : ARM_AM::sub;
 854   int Val;
 855   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
 856     if (AddSub == ARM_AM::sub) Val *= -1;
 857     Offset = CurDAG->getRegister(0, MVT::i32);
 858     Opc = CurDAG->getTargetConstant(Val, SDLoc(Op), MVT::i32);
 859     return true;
 860   }
 861
 862   return false;
 863 }
 864
 865
 866 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
 867                                             SDValue &Offset, SDValue &Opc) {
 868   unsigned Opcode = Op->getOpcode();
 869   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
 870     ? cast<LoadSDNode>(Op)->getAddressingMode()
 871     : cast<StoreSDNode>(Op)->getAddressingMode();
 872   ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
 873     ? ARM_AM::add : ARM_AM::sub;
 874   int Val;
 875   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
 876     Offset = CurDAG->getRegister(0, MVT::i32);
 877     Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, Val,
 878                                                       ARM_AM::no_shift),
 879                                     SDLoc(Op), MVT::i32);
 880     return true;
 881   }
 882
 883   return false;
 884 }
 885
 886 bool ARMDAGToDAGISel::SelectAddrOffsetNone(SDValue N, SDValue &Base) {
 887   Base = N;
 888   return true;
 889 }
 890
 891 bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N,
 892                                       SDValue &Base, SDValue &Offset,
 893                                       SDValue &Opc) {
 894   if (N.getOpcode() == ISD::SUB) {
 895     // X - C  is canonicalize to X + -C, no need to handle it here.
 896     Base = N.getOperand(0);
 897     Offset = N.getOperand(1);
 898     Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::sub, 0), SDLoc(N),
 899                                     MVT::i32);
 900     return true;
 901   }
 902
 903   if (!CurDAG->isBaseWithConstantOffset(N)) {
 904     Base = N;
 905     if (N.getOpcode() == ISD::FrameIndex) {
 906       int FI = cast<FrameIndexSDNode>(N)->getIndex();
 907       Base = CurDAG->getTargetFrameIndex(
 908           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
 909     }
 910     Offset = CurDAG->getRegister(0, MVT::i32);
 911     Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),
 912                                     MVT::i32);
 913     return true;
 914   }
 915
 916   // If the RHS is +/- imm8, fold into addr mode.
 917   int RHSC;
 918   if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
 919                               -256 + 1, 256, RHSC)) { // 8 bits.
 920     Base = N.getOperand(0);
 921     if (Base.getOpcode() == ISD::FrameIndex) {
 922       int FI = cast<FrameIndexSDNode>(Base)->getIndex();
 923       Base = CurDAG->getTargetFrameIndex(
 924           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
 925     }
 926     Offset = CurDAG->getRegister(0, MVT::i32);
 927
 928     ARM_AM::AddrOpc AddSub = ARM_AM::add;
 929     if (RHSC < 0) {
 930       AddSub = ARM_AM::sub;
 931       RHSC = -RHSC;
 932     }
 933     Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, RHSC), SDLoc(N),
 934                                     MVT::i32);
 935     return true;
 936   }
 937
 938   Base = N.getOperand(0);
 939   Offset = N.getOperand(1);
 940   Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),
 941                                   MVT::i32);
 942   return true;
 943 }
 944
 945 bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode *Op, SDValue N,
 946                                             SDValue &Offset, SDValue &Opc) {
 947   unsigned Opcode = Op->getOpcode();
 948   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
 949     ? cast<LoadSDNode>(Op)->getAddressingMode()
 950     : cast<StoreSDNode>(Op)->getAddressingMode();
 951   ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
 952     ? ARM_AM::add : ARM_AM::sub;
 953   int Val;
 954   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 256, Val)) { // 12 bits.
 955     Offset = CurDAG->getRegister(0, MVT::i32);
 956     Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, Val), SDLoc(Op),
 957                                     MVT::i32);
 958     return true;
 959   }
 960
 961   Offset = N;
 962   Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, 0), SDLoc(Op),
 963                                   MVT::i32);
 964   return true;
 965 }
 966
 967 bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N,
 968                                       SDValue &Base, SDValue &Offset) {
 969   if (!CurDAG->isBaseWithConstantOffset(N)) {
 970     Base = N;
 971     if (N.getOpcode() == ISD::FrameIndex) {
 972       int FI = cast<FrameIndexSDNode>(N)->getIndex();
 973       Base = CurDAG->getTargetFrameIndex(
 974           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
 975     } else if (N.getOpcode() == ARMISD::Wrapper &&
 976                N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) {
 977       Base = N.getOperand(0);
 978     }
 979     Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
 980                                        SDLoc(N), MVT::i32);
 981     return true;
 982   }
 983
 984   // If the RHS is +/- imm8, fold into addr mode.
 985   int RHSC;
 986   if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4,
 987                               -256 + 1, 256, RHSC)) {
 988     Base = N.getOperand(0);
 989     if (Base.getOpcode() == ISD::FrameIndex) {
 990       int FI = cast<FrameIndexSDNode>(Base)->getIndex();
 991       Base = CurDAG->getTargetFrameIndex(
 992           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
 993     }
 994
 995     ARM_AM::AddrOpc AddSub = ARM_AM::add;
 996     if (RHSC < 0) {
 997       AddSub = ARM_AM::sub;
 998       RHSC = -RHSC;
 999     }
1000     Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(AddSub, RHSC),
1001                                        SDLoc(N), MVT::i32);
1002     return true;
1003   }
1004
1005   Base = N;
1006   Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
1007                                      SDLoc(N), MVT::i32);
1008   return true;
1009 }
1010
1011 bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,
1012                                       SDValue &Align) {
1013   Addr = N;
1014
1015   unsigned Alignment = 0;
1016
1017   MemSDNode *MemN = cast<MemSDNode>(Parent);
1018
1019   if (isa<LSBaseSDNode>(MemN) ||
1020       ((MemN->getOpcode() == ARMISD::VST1_UPD ||
1021         MemN->getOpcode() == ARMISD::VLD1_UPD) &&
1022        MemN->getConstantOperandVal(MemN->getNumOperands() - 1) == 1)) {
1023     // This case occurs only for VLD1-lane/dup and VST1-lane instructions.
1024     // The maximum alignment is equal to the memory size being referenced.
1025     unsigned MMOAlign = MemN->getAlignment();
1026     unsigned MemSize = MemN->getMemoryVT().getSizeInBits() / 8;
1027     if (MMOAlign >= MemSize && MemSize > 1)
1028       Alignment = MemSize;
1029   } else {
1030     // All other uses of addrmode6 are for intrinsics.  For now just record
1031     // the raw alignment value; it will be refined later based on the legal
1032     // alignment operands for the intrinsic.
1033     Alignment = MemN->getAlignment();
1034   }
1035
1036   Align = CurDAG->getTargetConstant(Alignment, SDLoc(N), MVT::i32);
1037   return true;
1038 }
1039
1040 bool ARMDAGToDAGISel::SelectAddrMode6Offset(SDNode *Op, SDValue N,
1041                                             SDValue &Offset) {
1042   LSBaseSDNode *LdSt = cast<LSBaseSDNode>(Op);
1043   ISD::MemIndexedMode AM = LdSt->getAddressingMode();
1044   if (AM != ISD::POST_INC)
1045     return false;
1046   Offset = N;
1047   if (ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N)) {
1048     if (NC->getZExtValue() * 8 == LdSt->getMemoryVT().getSizeInBits())
1049       Offset = CurDAG->getRegister(0, MVT::i32);
1050   }
1051   return true;
1052 }
1053
1054 bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N,
1055                                        SDValue &Offset, SDValue &Label) {
1056   if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) {
1057     Offset = N.getOperand(0);
1058     SDValue N1 = N.getOperand(1);
1059     Label = CurDAG->getTargetConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
1060                                       SDLoc(N), MVT::i32);
1061     return true;
1062   }
1063
1064   return false;
1065 }
1066
1067
1068 //===----------------------------------------------------------------------===//
1069 //                         Thumb Addressing Modes
1070 //===----------------------------------------------------------------------===//
1071
1072 bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue N,
1073                                             SDValue &Base, SDValue &Offset){
1074   if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) {
1075     ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N);
1076     if (!NC || !NC->isNullValue())
1077       return false;
1078
1079     Base = Offset = N;
1080     return true;
1081   }
1082
1083   Base = N.getOperand(0);
1084   Offset = N.getOperand(1);
1085   return true;
1086 }
1087
1088 bool
1089 ARMDAGToDAGISel::SelectThumbAddrModeRI(SDValue N, SDValue &Base,
1090                                        SDValue &Offset, unsigned Scale) {
1091   if (Scale == 4) {
1092     SDValue TmpBase, TmpOffImm;
1093     if (SelectThumbAddrModeSP(N, TmpBase, TmpOffImm))
1094       return false;  // We want to select tLDRspi / tSTRspi instead.
1095
1096     if (N.getOpcode() == ARMISD::Wrapper &&
1097         N.getOperand(0).getOpcode() == ISD::TargetConstantPool)
1098       return false;  // We want to select tLDRpci instead.
1099   }
1100
1101   if (!CurDAG->isBaseWithConstantOffset(N))
1102     return false;
1103
1104   // Thumb does not have [sp, r] address mode.
1105   RegisterSDNode *LHSR = dyn_cast<RegisterSDNode>(N.getOperand(0));
1106   RegisterSDNode *RHSR = dyn_cast<RegisterSDNode>(N.getOperand(1));
1107   if ((LHSR && LHSR->getReg() == ARM::SP) ||
1108       (RHSR && RHSR->getReg() == ARM::SP))
1109     return false;
1110
1111   // FIXME: Why do we explicitly check for a match here and then return false?
1112   // Presumably to allow something else to match, but shouldn't this be
1113   // documented?
1114   int RHSC;
1115   if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC))
1116     return false;
1117
1118   Base = N.getOperand(0);
1119   Offset = N.getOperand(1);
1120   return true;
1121 }
1122
1123 bool
1124 ARMDAGToDAGISel::SelectThumbAddrModeRI5S1(SDValue N,
1125                                           SDValue &Base,
1126                                           SDValue &Offset) {
1127   return SelectThumbAddrModeRI(N, Base, Offset, 1);
1128 }
1129
1130 bool
1131 ARMDAGToDAGISel::SelectThumbAddrModeRI5S2(SDValue N,
1132                                           SDValue &Base,
1133                                           SDValue &Offset) {
1134   return SelectThumbAddrModeRI(N, Base, Offset, 2);
1135 }
1136
1137 bool
1138 ARMDAGToDAGISel::SelectThumbAddrModeRI5S4(SDValue N,
1139                                           SDValue &Base,
1140                                           SDValue &Offset) {
1141   return SelectThumbAddrModeRI(N, Base, Offset, 4);
1142 }
1143
1144 bool
1145 ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale,
1146                                           SDValue &Base, SDValue &OffImm) {
1147   if (Scale == 4) {
1148     SDValue TmpBase, TmpOffImm;
1149     if (SelectThumbAddrModeSP(N, TmpBase, TmpOffImm))
1150       return false;  // We want to select tLDRspi / tSTRspi instead.
1151
1152     if (N.getOpcode() == ARMISD::Wrapper &&
1153         N.getOperand(0).getOpcode() == ISD::TargetConstantPool)
1154       return false;  // We want to select tLDRpci instead.
1155   }
1156
1157   if (!CurDAG->isBaseWithConstantOffset(N)) {
1158     if (N.getOpcode() == ARMISD::Wrapper &&
1159         N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) {
1160       Base = N.getOperand(0);
1161     } else {
1162       Base = N;
1163     }
1164
1165     OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1166     return true;
1167   }
1168
1169   RegisterSDNode *LHSR = dyn_cast<RegisterSDNode>(N.getOperand(0));
1170   RegisterSDNode *RHSR = dyn_cast<RegisterSDNode>(N.getOperand(1));
1171   if ((LHSR && LHSR->getReg() == ARM::SP) ||
1172       (RHSR && RHSR->getReg() == ARM::SP)) {
1173     ConstantSDNode *LHS = dyn_cast<ConstantSDNode>(N.getOperand(0));
1174     ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
1175     unsigned LHSC = LHS ? LHS->getZExtValue() : 0;
1176     unsigned RHSC = RHS ? RHS->getZExtValue() : 0;
1177
1178     // Thumb does not have [sp, #imm5] address mode for non-zero imm5.
1179     if (LHSC != 0 || RHSC != 0) return false;
1180
1181     Base = N;
1182     OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1183     return true;
1184   }
1185
1186   // If the RHS is + imm5 * scale, fold into addr mode.
1187   int RHSC;
1188   if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC)) {
1189     Base = N.getOperand(0);
1190     OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1191     return true;
1192   }
1193
1194   Base = N.getOperand(0);
1195   OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1196   return true;
1197 }
1198
1199 bool
1200 ARMDAGToDAGISel::SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
1201                                            SDValue &OffImm) {
1202   return SelectThumbAddrModeImm5S(N, 4, Base, OffImm);
1203 }
1204
1205 bool
1206 ARMDAGToDAGISel::SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
1207                                            SDValue &OffImm) {
1208   return SelectThumbAddrModeImm5S(N, 2, Base, OffImm);
1209 }
1210
1211 bool
1212 ARMDAGToDAGISel::SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
1213                                            SDValue &OffImm) {
1214   return SelectThumbAddrModeImm5S(N, 1, Base, OffImm);
1215 }
1216
1217 bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N,
1218                                             SDValue &Base, SDValue &OffImm) {
1219   if (N.getOpcode() == ISD::FrameIndex) {
1220     int FI = cast<FrameIndexSDNode>(N)->getIndex();
1221     // Only multiples of 4 are allowed for the offset, so the frame object
1222     // alignment must be at least 4.
1223     MachineFrameInfo *MFI = MF->getFrameInfo();
1224     if (MFI->getObjectAlignment(FI) < 4)
1225       MFI->setObjectAlignment(FI, 4);
1226     Base = CurDAG->getTargetFrameIndex(
1227         FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1228     OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1229     return true;
1230   }
1231
1232   if (!CurDAG->isBaseWithConstantOffset(N))
1233     return false;
1234
1235   RegisterSDNode *LHSR = dyn_cast<RegisterSDNode>(N.getOperand(0));
1236   if (N.getOperand(0).getOpcode() == ISD::FrameIndex ||
1237       (LHSR && LHSR->getReg() == ARM::SP)) {
1238     // If the RHS is + imm8 * scale, fold into addr mode.
1239     int RHSC;
1240     if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4, 0, 256, RHSC)) {
1241       Base = N.getOperand(0);
1242       if (Base.getOpcode() == ISD::FrameIndex) {
1243         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1244         // For LHS+RHS to result in an offset that's a multiple of 4 the object
1245         // indexed by the LHS must be 4-byte aligned.
1246         MachineFrameInfo *MFI = MF->getFrameInfo();
1247         if (MFI->getObjectAlignment(FI) < 4)
1248           MFI->setObjectAlignment(FI, 4);
1249         Base = CurDAG->getTargetFrameIndex(
1250             FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1251       }
1252       OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1253       return true;
1254     }
1255   }
1256
1257   return false;
1258 }
1259
1260
1261 //===----------------------------------------------------------------------===//
1262 //                        Thumb 2 Addressing Modes
1263 //===----------------------------------------------------------------------===//
1264
1265
1266 bool ARMDAGToDAGISel::SelectT2ShifterOperandReg(SDValue N, SDValue &BaseReg,
1267                                                 SDValue &Opc) {
1268   if (DisableShifterOp)
1269     return false;
1270
1271   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
1272
1273   // Don't match base register only case. That is matched to a separate
1274   // lower complexity pattern with explicit register operand.
1275   if (ShOpcVal == ARM_AM::no_shift) return false;
1276
1277   BaseReg = N.getOperand(0);
1278   unsigned ShImmVal = 0;
1279   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1280     ShImmVal = RHS->getZExtValue() & 31;
1281     Opc = getI32Imm(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal), SDLoc(N));
1282     return true;
1283   }
1284
1285   return false;
1286 }
1287
1288 bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N,
1289                                             SDValue &Base, SDValue &OffImm) {
1290   // Match simple R + imm12 operands.
1291
1292   // Base only.
1293   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1294       !CurDAG->isBaseWithConstantOffset(N)) {
1295     if (N.getOpcode() == ISD::FrameIndex) {
1296       // Match frame index.
1297       int FI = cast<FrameIndexSDNode>(N)->getIndex();
1298       Base = CurDAG->getTargetFrameIndex(
1299           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1300       OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1301       return true;
1302     }
1303
1304     if (N.getOpcode() == ARMISD::Wrapper &&
1305         N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) {
1306       Base = N.getOperand(0);
1307       if (Base.getOpcode() == ISD::TargetConstantPool)
1308         return false;  // We want to select t2LDRpci instead.
1309     } else
1310       Base = N;
1311     OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1312     return true;
1313   }
1314
1315   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1316     if (SelectT2AddrModeImm8(N, Base, OffImm))
1317       // Let t2LDRi8 handle (R - imm8).
1318       return false;
1319
1320     int RHSC = (int)RHS->getZExtValue();
1321     if (N.getOpcode() == ISD::SUB)
1322       RHSC = -RHSC;
1323
1324     if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits (unsigned)
1325       Base   = N.getOperand(0);
1326       if (Base.getOpcode() == ISD::FrameIndex) {
1327         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1328         Base = CurDAG->getTargetFrameIndex(
1329             FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1330       }
1331       OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1332       return true;
1333     }
1334   }
1335
1336   // Base only.
1337   Base = N;
1338   OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1339   return true;
1340 }
1341
1342 bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N,
1343                                            SDValue &Base, SDValue &OffImm) {
1344   // Match simple R - imm8 operands.
1345   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1346       !CurDAG->isBaseWithConstantOffset(N))
1347     return false;
1348
1349   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1350     int RHSC = (int)RHS->getSExtValue();
1351     if (N.getOpcode() == ISD::SUB)
1352       RHSC = -RHSC;
1353
1354     if ((RHSC >= -255) && (RHSC < 0)) { // 8 bits (always negative)
1355       Base = N.getOperand(0);
1356       if (Base.getOpcode() == ISD::FrameIndex) {
1357         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1358         Base = CurDAG->getTargetFrameIndex(
1359             FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1360       }
1361       OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1362       return true;
1363     }
1364   }
1365
1366   return false;
1367 }
1368
1369 bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
1370                                                  SDValue &OffImm){
1371   unsigned Opcode = Op->getOpcode();
1372   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
1373     ? cast<LoadSDNode>(Op)->getAddressingMode()
1374     : cast<StoreSDNode>(Op)->getAddressingMode();
1375   int RHSC;
1376   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x100, RHSC)) { // 8 bits.
1377     OffImm = ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC))
1378       ? CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32)
1379       : CurDAG->getTargetConstant(-RHSC, SDLoc(N), MVT::i32);
1380     return true;
1381   }
1382
1383   return false;
1384 }
1385
1386 bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N,
1387                                             SDValue &Base,
1388                                             SDValue &OffReg, SDValue &ShImm) {
1389   // (R - imm8) should be handled by t2LDRi8. The rest are handled by t2LDRi12.
1390   if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N))
1391     return false;
1392
1393   // Leave (R + imm12) for t2LDRi12, (R - imm8) for t2LDRi8.
1394   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1395     int RHSC = (int)RHS->getZExtValue();
1396     if (RHSC >= 0 && RHSC < 0x1000) // 12 bits (unsigned)
1397       return false;
1398     else if (RHSC < 0 && RHSC >= -255) // 8 bits
1399       return false;
1400   }
1401
1402   // Look for (R + R) or (R + (R << [1,2,3])).
1403   unsigned ShAmt = 0;
1404   Base   = N.getOperand(0);
1405   OffReg = N.getOperand(1);
1406
1407   // Swap if it is ((R << c) + R).
1408   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(OffReg.getOpcode());
1409   if (ShOpcVal != ARM_AM::lsl) {
1410     ShOpcVal = ARM_AM::getShiftOpcForNode(Base.getOpcode());
1411     if (ShOpcVal == ARM_AM::lsl)
1412       std::swap(Base, OffReg);
1413   }
1414
1415   if (ShOpcVal == ARM_AM::lsl) {
1416     // Check to see if the RHS of the shift is a constant, if not, we can't fold
1417     // it.
1418     if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(OffReg.getOperand(1))) {
1419       ShAmt = Sh->getZExtValue();
1420       if (ShAmt < 4 && isShifterOpProfitable(OffReg, ShOpcVal, ShAmt))
1421         OffReg = OffReg.getOperand(0);
1422       else {
1423         ShAmt = 0;
1424       }
1425     }
1426   }
1427
1428   ShImm = CurDAG->getTargetConstant(ShAmt, SDLoc(N), MVT::i32);
1429
1430   return true;
1431 }
1432
1433 bool ARMDAGToDAGISel::SelectT2AddrModeExclusive(SDValue N, SDValue &Base,
1434                                                 SDValue &OffImm) {
1435   // This *must* succeed since it's used for the irreplaceable ldrex and strex
1436   // instructions.
1437   Base = N;
1438   OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1439
1440   if (N.getOpcode() != ISD::ADD || !CurDAG->isBaseWithConstantOffset(N))
1441     return true;
1442
1443   ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
1444   if (!RHS)
1445     return true;
1446
1447   uint32_t RHSC = (int)RHS->getZExtValue();
1448   if (RHSC > 1020 || RHSC % 4 != 0)
1449     return true;
1450
1451   Base = N.getOperand(0);
1452   if (Base.getOpcode() == ISD::FrameIndex) {
1453     int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1454     Base = CurDAG->getTargetFrameIndex(
1455         FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1456   }
1457
1458   OffImm = CurDAG->getTargetConstant(RHSC/4, SDLoc(N), MVT::i32);
1459   return true;
1460 }
1461
1462 //===--------------------------------------------------------------------===//
1463
1464 /// getAL - Returns a ARMCC::AL immediate node.
1465 static inline SDValue getAL(SelectionDAG *CurDAG, SDLoc dl) {
1466   return CurDAG->getTargetConstant((uint64_t)ARMCC::AL, dl, MVT::i32);
1467 }
1468
1469 SDNode *ARMDAGToDAGISel::SelectARMIndexedLoad(SDNode *N) {
1470   LoadSDNode *LD = cast<LoadSDNode>(N);
1471   ISD::MemIndexedMode AM = LD->getAddressingMode();
1472   if (AM == ISD::UNINDEXED)
1473     return nullptr;
1474
1475   EVT LoadedVT = LD->getMemoryVT();
1476   SDValue Offset, AMOpc;
1477   bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1478   unsigned Opcode = 0;
1479   bool Match = false;
1480   if (LoadedVT == MVT::i32 && isPre &&
1481       SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
1482     Opcode = ARM::LDR_PRE_IMM;
1483     Match = true;
1484   } else if (LoadedVT == MVT::i32 && !isPre &&
1485       SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
1486     Opcode = ARM::LDR_POST_IMM;
1487     Match = true;
1488   } else if (LoadedVT == MVT::i32 &&
1489       SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
1490     Opcode = isPre ? ARM::LDR_PRE_REG : ARM::LDR_POST_REG;
1491     Match = true;
1492
1493   } else if (LoadedVT == MVT::i16 &&
1494              SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
1495     Match = true;
1496     Opcode = (LD->getExtensionType() == ISD::SEXTLOAD)
1497       ? (isPre ? ARM::LDRSH_PRE : ARM::LDRSH_POST)
1498       : (isPre ? ARM::LDRH_PRE : ARM::LDRH_POST);
1499   } else if (LoadedVT == MVT::i8 || LoadedVT == MVT::i1) {
1500     if (LD->getExtensionType() == ISD::SEXTLOAD) {
1501       if (SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
1502         Match = true;
1503         Opcode = isPre ? ARM::LDRSB_PRE : ARM::LDRSB_POST;
1504       }
1505     } else {
1506       if (isPre &&
1507           SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
1508         Match = true;
1509         Opcode = ARM::LDRB_PRE_IMM;
1510       } else if (!isPre &&
1511                   SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
1512         Match = true;
1513         Opcode = ARM::LDRB_POST_IMM;
1514       } else if (SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
1515         Match = true;
1516         Opcode = isPre ? ARM::LDRB_PRE_REG : ARM::LDRB_POST_REG;
1517       }
1518     }
1519   }
1520
1521   if (Match) {
1522     if (Opcode == ARM::LDR_PRE_IMM || Opcode == ARM::LDRB_PRE_IMM) {
1523       SDValue Chain = LD->getChain();
1524       SDValue Base = LD->getBasePtr();
1525       SDValue Ops[]= { Base, AMOpc, getAL(CurDAG, SDLoc(N)),
1526                        CurDAG->getRegister(0, MVT::i32), Chain };
1527       return CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32,
1528                                     MVT::i32, MVT::Other, Ops);
1529     } else {
1530       SDValue Chain = LD->getChain();
1531       SDValue Base = LD->getBasePtr();
1532       SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG, SDLoc(N)),
1533                        CurDAG->getRegister(0, MVT::i32), Chain };
1534       return CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32,
1535                                     MVT::i32, MVT::Other, Ops);
1536     }
1537   }
1538
1539   return nullptr;
1540 }
1541
1542 SDNode *ARMDAGToDAGISel::SelectT2IndexedLoad(SDNode *N) {
1543   LoadSDNode *LD = cast<LoadSDNode>(N);
1544   ISD::MemIndexedMode AM = LD->getAddressingMode();
1545   if (AM == ISD::UNINDEXED)
1546     return nullptr;
1547
1548   EVT LoadedVT = LD->getMemoryVT();
1549   bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
1550   SDValue Offset;
1551   bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1552   unsigned Opcode = 0;
1553   bool Match = false;
1554   if (SelectT2AddrModeImm8Offset(N, LD->getOffset(), Offset)) {
1555     switch (LoadedVT.getSimpleVT().SimpleTy) {
1556     case MVT::i32:
1557       Opcode = isPre ? ARM::t2LDR_PRE : ARM::t2LDR_POST;
1558       break;
1559     case MVT::i16:
1560       if (isSExtLd)
1561         Opcode = isPre ? ARM::t2LDRSH_PRE : ARM::t2LDRSH_POST;
1562       else
1563         Opcode = isPre ? ARM::t2LDRH_PRE : ARM::t2LDRH_POST;
1564       break;
1565     case MVT::i8:
1566     case MVT::i1:
1567       if (isSExtLd)
1568         Opcode = isPre ? ARM::t2LDRSB_PRE : ARM::t2LDRSB_POST;
1569       else
1570         Opcode = isPre ? ARM::t2LDRB_PRE : ARM::t2LDRB_POST;
1571       break;
1572     default:
1573       return nullptr;
1574     }
1575     Match = true;
1576   }
1577
1578   if (Match) {
1579     SDValue Chain = LD->getChain();
1580     SDValue Base = LD->getBasePtr();
1581     SDValue Ops[]= { Base, Offset, getAL(CurDAG, SDLoc(N)),
1582                      CurDAG->getRegister(0, MVT::i32), Chain };
1583     return CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1584                                   MVT::Other, Ops);
1585   }
1586
1587   return nullptr;
1588 }
1589
1590 /// \brief Form a GPRPair pseudo register from a pair of GPR regs.
1591 SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) {
1592   SDLoc dl(V0.getNode());
1593   SDValue RegClass =
1594     CurDAG->getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32);
1595   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);
1596   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32);
1597   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1598   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1599 }
1600
1601 /// \brief Form a D register from a pair of S registers.
1602 SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1603   SDLoc dl(V0.getNode());
1604   SDValue RegClass =
1605     CurDAG->getTargetConstant(ARM::DPR_VFP2RegClassID, dl, MVT::i32);
1606   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32);
1607   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32);
1608   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1609   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1610 }
1611
1612 /// \brief Form a quad register from a pair of D registers.
1613 SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1614   SDLoc dl(V0.getNode());
1615   SDValue RegClass = CurDAG->getTargetConstant(ARM::QPRRegClassID, dl,
1616                                                MVT::i32);
1617   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32);
1618   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32);
1619   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1620   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1621 }
1622
1623 /// \brief Form 4 consecutive D registers from a pair of Q registers.
1624 SDNode *ARMDAGToDAGISel::createQRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1625   SDLoc dl(V0.getNode());
1626   SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl,
1627                                                MVT::i32);
1628   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32);
1629   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32);
1630   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1631   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1632 }
1633
1634 /// \brief Form 4 consecutive S registers.
1635 SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1,
1636                                    SDValue V2, SDValue V3) {
1637   SDLoc dl(V0.getNode());
1638   SDValue RegClass =
1639     CurDAG->getTargetConstant(ARM::QPR_VFP2RegClassID, dl, MVT::i32);
1640   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32);
1641   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32);
1642   SDValue SubReg2 = CurDAG->getTargetConstant(ARM::ssub_2, dl, MVT::i32);
1643   SDValue SubReg3 = CurDAG->getTargetConstant(ARM::ssub_3, dl, MVT::i32);
1644   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1645                                     V2, SubReg2, V3, SubReg3 };
1646   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1647 }
1648
1649 /// \brief Form 4 consecutive D registers.
1650 SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1,
1651                                    SDValue V2, SDValue V3) {
1652   SDLoc dl(V0.getNode());
1653   SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl,
1654                                                MVT::i32);
1655   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32);
1656   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32);
1657   SDValue SubReg2 = CurDAG->getTargetConstant(ARM::dsub_2, dl, MVT::i32);
1658   SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, dl, MVT::i32);
1659   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1660                                     V2, SubReg2, V3, SubReg3 };
1661   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1662 }
1663
1664 /// \brief Form 4 consecutive Q registers.
1665 SDNode *ARMDAGToDAGISel::createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1,
1666                                    SDValue V2, SDValue V3) {
1667   SDLoc dl(V0.getNode());
1668   SDValue RegClass = CurDAG->getTargetConstant(ARM::QQQQPRRegClassID, dl,
1669                                                MVT::i32);
1670   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32);
1671   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32);
1672   SDValue SubReg2 = CurDAG->getTargetConstant(ARM::qsub_2, dl, MVT::i32);
1673   SDValue SubReg3 = CurDAG->getTargetConstant(ARM::qsub_3, dl, MVT::i32);
1674   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1675                                     V2, SubReg2, V3, SubReg3 };
1676   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1677 }
1678
1679 /// GetVLDSTAlign - Get the alignment (in bytes) for the alignment operand
1680 /// of a NEON VLD or VST instruction.  The supported values depend on the
1681 /// number of registers being loaded.
1682 SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, SDLoc dl,
1683                                        unsigned NumVecs, bool is64BitVector) {
1684   unsigned NumRegs = NumVecs;
1685   if (!is64BitVector && NumVecs < 3)
1686     NumRegs *= 2;
1687
1688   unsigned Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
1689   if (Alignment >= 32 && NumRegs == 4)
1690     Alignment = 32;
1691   else if (Alignment >= 16 && (NumRegs == 2 || NumRegs == 4))
1692     Alignment = 16;
1693   else if (Alignment >= 8)
1694     Alignment = 8;
1695   else
1696     Alignment = 0;
1697
1698   return CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
1699 }
1700
1701 static bool isVLDfixed(unsigned Opc)
1702 {
1703   switch (Opc) {
1704   default: return false;
1705   case ARM::VLD1d8wb_fixed : return true;
1706   case ARM::VLD1d16wb_fixed : return true;
1707   case ARM::VLD1d64Qwb_fixed : return true;
1708   case ARM::VLD1d32wb_fixed : return true;
1709   case ARM::VLD1d64wb_fixed : return true;
1710   case ARM::VLD1d64TPseudoWB_fixed : return true;
1711   case ARM::VLD1d64QPseudoWB_fixed : return true;
1712   case ARM::VLD1q8wb_fixed : return true;
1713   case ARM::VLD1q16wb_fixed : return true;
1714   case ARM::VLD1q32wb_fixed : return true;
1715   case ARM::VLD1q64wb_fixed : return true;
1716   case ARM::VLD2d8wb_fixed : return true;
1717   case ARM::VLD2d16wb_fixed : return true;
1718   case ARM::VLD2d32wb_fixed : return true;
1719   case ARM::VLD2q8PseudoWB_fixed : return true;
1720   case ARM::VLD2q16PseudoWB_fixed : return true;
1721   case ARM::VLD2q32PseudoWB_fixed : return true;
1722   case ARM::VLD2DUPd8wb_fixed : return true;
1723   case ARM::VLD2DUPd16wb_fixed : return true;
1724   case ARM::VLD2DUPd32wb_fixed : return true;
1725   }
1726 }
1727
1728 static bool isVSTfixed(unsigned Opc)
1729 {
1730   switch (Opc) {
1731   default: return false;
1732   case ARM::VST1d8wb_fixed : return true;
1733   case ARM::VST1d16wb_fixed : return true;
1734   case ARM::VST1d32wb_fixed : return true;
1735   case ARM::VST1d64wb_fixed : return true;
1736   case ARM::VST1q8wb_fixed : return true;
1737   case ARM::VST1q16wb_fixed : return true;
1738   case ARM::VST1q32wb_fixed : return true;
1739   case ARM::VST1q64wb_fixed : return true;
1740   case ARM::VST1d64TPseudoWB_fixed : return true;
1741   case ARM::VST1d64QPseudoWB_fixed : return true;
1742   case ARM::VST2d8wb_fixed : return true;
1743   case ARM::VST2d16wb_fixed : return true;
1744   case ARM::VST2d32wb_fixed : return true;
1745   case ARM::VST2q8PseudoWB_fixed : return true;
1746   case ARM::VST2q16PseudoWB_fixed : return true;
1747   case ARM::VST2q32PseudoWB_fixed : return true;
1748   }
1749 }
1750
1751 // Get the register stride update opcode of a VLD/VST instruction that
1752 // is otherwise equivalent to the given fixed stride updating instruction.
1753 static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) {
1754   assert((isVLDfixed(Opc) || isVSTfixed(Opc))
1755     && "Incorrect fixed stride updating instruction.");
1756   switch (Opc) {
1757   default: break;
1758   case ARM::VLD1d8wb_fixed: return ARM::VLD1d8wb_register;
1759   case ARM::VLD1d16wb_fixed: return ARM::VLD1d16wb_register;
1760   case ARM::VLD1d32wb_fixed: return ARM::VLD1d32wb_register;
1761   case ARM::VLD1d64wb_fixed: return ARM::VLD1d64wb_register;
1762   case ARM::VLD1q8wb_fixed: return ARM::VLD1q8wb_register;
1763   case ARM::VLD1q16wb_fixed: return ARM::VLD1q16wb_register;
1764   case ARM::VLD1q32wb_fixed: return ARM::VLD1q32wb_register;
1765   case ARM::VLD1q64wb_fixed: return ARM::VLD1q64wb_register;
1766   case ARM::VLD1d64Twb_fixed: return ARM::VLD1d64Twb_register;
1767   case ARM::VLD1d64Qwb_fixed: return ARM::VLD1d64Qwb_register;
1768   case ARM::VLD1d64TPseudoWB_fixed: return ARM::VLD1d64TPseudoWB_register;
1769   case ARM::VLD1d64QPseudoWB_fixed: return ARM::VLD1d64QPseudoWB_register;
1770
1771   case ARM::VST1d8wb_fixed: return ARM::VST1d8wb_register;
1772   case ARM::VST1d16wb_fixed: return ARM::VST1d16wb_register;
1773   case ARM::VST1d32wb_fixed: return ARM::VST1d32wb_register;
1774   case ARM::VST1d64wb_fixed: return ARM::VST1d64wb_register;
1775   case ARM::VST1q8wb_fixed: return ARM::VST1q8wb_register;
1776   case ARM::VST1q16wb_fixed: return ARM::VST1q16wb_register;
1777   case ARM::VST1q32wb_fixed: return ARM::VST1q32wb_register;
1778   case ARM::VST1q64wb_fixed: return ARM::VST1q64wb_register;
1779   case ARM::VST1d64TPseudoWB_fixed: return ARM::VST1d64TPseudoWB_register;
1780   case ARM::VST1d64QPseudoWB_fixed: return ARM::VST1d64QPseudoWB_register;
1781
1782   case ARM::VLD2d8wb_fixed: return ARM::VLD2d8wb_register;
1783   case ARM::VLD2d16wb_fixed: return ARM::VLD2d16wb_register;
1784   case ARM::VLD2d32wb_fixed: return ARM::VLD2d32wb_register;
1785   case ARM::VLD2q8PseudoWB_fixed: return ARM::VLD2q8PseudoWB_register;
1786   case ARM::VLD2q16PseudoWB_fixed: return ARM::VLD2q16PseudoWB_register;
1787   case ARM::VLD2q32PseudoWB_fixed: return ARM::VLD2q32PseudoWB_register;
1788
1789   case ARM::VST2d8wb_fixed: return ARM::VST2d8wb_register;
1790   case ARM::VST2d16wb_fixed: return ARM::VST2d16wb_register;
1791   case ARM::VST2d32wb_fixed: return ARM::VST2d32wb_register;
1792   case ARM::VST2q8PseudoWB_fixed: return ARM::VST2q8PseudoWB_register;
1793   case ARM::VST2q16PseudoWB_fixed: return ARM::VST2q16PseudoWB_register;
1794   case ARM::VST2q32PseudoWB_fixed: return ARM::VST2q32PseudoWB_register;
1795
1796   case ARM::VLD2DUPd8wb_fixed: return ARM::VLD2DUPd8wb_register;
1797   case ARM::VLD2DUPd16wb_fixed: return ARM::VLD2DUPd16wb_register;
1798   case ARM::VLD2DUPd32wb_fixed: return ARM::VLD2DUPd32wb_register;
1799   }
1800   return Opc; // If not one we handle, return it unchanged.
1801 }
1802
1803 SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
1804                                    const uint16_t *DOpcodes,
1805                                    const uint16_t *QOpcodes0,
1806                                    const uint16_t *QOpcodes1) {
1807   assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range");
1808   SDLoc dl(N);
1809
1810   SDValue MemAddr, Align;
1811   unsigned AddrOpIdx = isUpdating ? 1 : 2;
1812   if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
1813     return nullptr;
1814
1815   SDValue Chain = N->getOperand(0);
1816   EVT VT = N->getValueType(0);
1817   bool is64BitVector = VT.is64BitVector();
1818   Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
1819
1820   unsigned OpcodeIndex;
1821   switch (VT.getSimpleVT().SimpleTy) {
1822   default: llvm_unreachable("unhandled vld type");
1823     // Double-register operations:
1824   case MVT::v8i8:  OpcodeIndex = 0; break;
1825   case MVT::v4i16: OpcodeIndex = 1; break;
1826   case MVT::v2f32:
1827   case MVT::v2i32: OpcodeIndex = 2; break;
1828   case MVT::v1i64: OpcodeIndex = 3; break;
1829     // Quad-register operations:
1830   case MVT::v16i8: OpcodeIndex = 0; break;
1831   case MVT::v8i16: OpcodeIndex = 1; break;
1832   case MVT::v4f32:
1833   case MVT::v4i32: OpcodeIndex = 2; break;
1834   case MVT::v2f64:
1835   case MVT::v2i64: OpcodeIndex = 3;
1836     assert(NumVecs == 1 && "v2i64 type only supported for VLD1");
1837     break;
1838   }
1839
1840   EVT ResTy;
1841   if (NumVecs == 1)
1842     ResTy = VT;
1843   else {
1844     unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
1845     if (!is64BitVector)
1846       ResTyElts *= 2;
1847     ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
1848   }
1849   std::vector<EVT> ResTys;
1850   ResTys.push_back(ResTy);
1851   if (isUpdating)
1852     ResTys.push_back(MVT::i32);
1853   ResTys.push_back(MVT::Other);
1854
1855   SDValue Pred = getAL(CurDAG, dl);
1856   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
1857   SDNode *VLd;
1858   SmallVector<SDValue, 7> Ops;
1859
1860   // Double registers and VLD1/VLD2 quad registers are directly supported.
1861   if (is64BitVector || NumVecs <= 2) {
1862     unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
1863                     QOpcodes0[OpcodeIndex]);
1864     Ops.push_back(MemAddr);
1865     Ops.push_back(Align);
1866     if (isUpdating) {
1867       SDValue Inc = N->getOperand(AddrOpIdx + 1);
1868       // FIXME: VLD1/VLD2 fixed increment doesn't need Reg0. Remove the reg0
1869       // case entirely when the rest are updated to that form, too.
1870       if ((NumVecs <= 2) && !isa<ConstantSDNode>(Inc.getNode()))
1871         Opc = getVLDSTRegisterUpdateOpcode(Opc);
1872       // FIXME: We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so
1873       // check for that explicitly too. Horribly hacky, but temporary.
1874       if ((NumVecs > 2 && !isVLDfixed(Opc)) ||
1875           !isa<ConstantSDNode>(Inc.getNode()))
1876         Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc);
1877     }
1878     Ops.push_back(Pred);
1879     Ops.push_back(Reg0);
1880     Ops.push_back(Chain);
1881     VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1882
1883   } else {
1884     // Otherwise, quad registers are loaded with two separate instructions,
1885     // where one loads the even registers and the other loads the odd registers.
1886     EVT AddrTy = MemAddr.getValueType();
1887
1888     // Load the even subregs.  This is always an updating load, so that it
1889     // provides the address to the second load for the odd subregs.
1890     SDValue ImplDef =
1891       SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
1892     const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain };
1893     SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
1894                                           ResTy, AddrTy, MVT::Other, OpsA);
1895     Chain = SDValue(VLdA, 2);
1896
1897     // Load the odd subregs.
1898     Ops.push_back(SDValue(VLdA, 1));
1899     Ops.push_back(Align);
1900     if (isUpdating) {
1901       SDValue Inc = N->getOperand(AddrOpIdx + 1);
1902       assert(isa<ConstantSDNode>(Inc.getNode()) &&
1903              "only constant post-increment update allowed for VLD3/4");
1904       (void)Inc;
1905       Ops.push_back(Reg0);
1906     }
1907     Ops.push_back(SDValue(VLdA, 0));
1908     Ops.push_back(Pred);
1909     Ops.push_back(Reg0);
1910     Ops.push_back(Chain);
1911     VLd = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, Ops);
1912   }
1913
1914   // Transfer memoperands.
1915   MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
1916   MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1917   cast<MachineSDNode>(VLd)->setMemRefs(MemOp, MemOp + 1);
1918
1919   if (NumVecs == 1)
1920     return VLd;
1921
1922   // Extract out the subregisters.
1923   SDValue SuperReg = SDValue(VLd, 0);
1924   assert(ARM::dsub_7 == ARM::dsub_0+7 &&
1925          ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering");
1926   unsigned Sub0 = (is64BitVector ? ARM::dsub_0 : ARM::qsub_0);
1927   for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
1928     ReplaceUses(SDValue(N, Vec),
1929                 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
1930   ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1));
1931   if (isUpdating)
1932     ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLd, 2));
1933   return nullptr;
1934 }
1935
1936 SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
1937                                    const uint16_t *DOpcodes,
1938                                    const uint16_t *QOpcodes0,
1939                                    const uint16_t *QOpcodes1) {
1940   assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range");
1941   SDLoc dl(N);
1942
1943   SDValue MemAddr, Align;
1944   unsigned AddrOpIdx = isUpdating ? 1 : 2;
1945   unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
1946   if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
1947     return nullptr;
1948
1949   MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
1950   MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1951
1952   SDValue Chain = N->getOperand(0);
1953   EVT VT = N->getOperand(Vec0Idx).getValueType();
1954   bool is64BitVector = VT.is64BitVector();
1955   Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
1956
1957   unsigned OpcodeIndex;
1958   switch (VT.getSimpleVT().SimpleTy) {
1959   default: llvm_unreachable("unhandled vst type");
1960     // Double-register operations:
1961   case MVT::v8i8:  OpcodeIndex = 0; break;
1962   case MVT::v4i16: OpcodeIndex = 1; break;
1963   case MVT::v2f32:
1964   case MVT::v2i32: OpcodeIndex = 2; break;
1965   case MVT::v1i64: OpcodeIndex = 3; break;
1966     // Quad-register operations:
1967   case MVT::v16i8: OpcodeIndex = 0; break;
1968   case MVT::v8i16: OpcodeIndex = 1; break;
1969   case MVT::v4f32:
1970   case MVT::v4i32: OpcodeIndex = 2; break;
1971   case MVT::v2f64:
1972   case MVT::v2i64: OpcodeIndex = 3;
1973     assert(NumVecs == 1 && "v2i64 type only supported for VST1");
1974     break;
1975   }
1976
1977   std::vector<EVT> ResTys;
1978   if (isUpdating)
1979     ResTys.push_back(MVT::i32);
1980   ResTys.push_back(MVT::Other);
1981
1982   SDValue Pred = getAL(CurDAG, dl);
1983   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
1984   SmallVector<SDValue, 7> Ops;
1985
1986   // Double registers and VST1/VST2 quad registers are directly supported.
1987   if (is64BitVector || NumVecs <= 2) {
1988     SDValue SrcReg;
1989     if (NumVecs == 1) {
1990       SrcReg = N->getOperand(Vec0Idx);
1991     } else if (is64BitVector) {
1992       // Form a REG_SEQUENCE to force register allocation.
1993       SDValue V0 = N->getOperand(Vec0Idx + 0);
1994       SDValue V1 = N->getOperand(Vec0Idx + 1);
1995       if (NumVecs == 2)
1996         SrcReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
1997       else {
1998         SDValue V2 = N->getOperand(Vec0Idx + 2);
1999         // If it's a vst3, form a quad D-register and leave the last part as
2000         // an undef.
2001         SDValue V3 = (NumVecs == 3)
2002           ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
2003           : N->getOperand(Vec0Idx + 3);
2004         SrcReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
2005       }
2006     } else {
2007       // Form a QQ register.
2008       SDValue Q0 = N->getOperand(Vec0Idx);
2009       SDValue Q1 = N->getOperand(Vec0Idx + 1);
2010       SrcReg = SDValue(createQRegPairNode(MVT::v4i64, Q0, Q1), 0);
2011     }
2012
2013     unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2014                     QOpcodes0[OpcodeIndex]);
2015     Ops.push_back(MemAddr);
2016     Ops.push_back(Align);
2017     if (isUpdating) {
2018       SDValue Inc = N->getOperand(AddrOpIdx + 1);
2019       // FIXME: VST1/VST2 fixed increment doesn't need Reg0. Remove the reg0
2020       // case entirely when the rest are updated to that form, too.
2021       if (NumVecs <= 2 && !isa<ConstantSDNode>(Inc.getNode()))
2022         Opc = getVLDSTRegisterUpdateOpcode(Opc);
2023       // FIXME: We use a VST1 for v1i64 even if the pseudo says vld2/3/4, so
2024       // check for that explicitly too. Horribly hacky, but temporary.
2025       if  (!isa<ConstantSDNode>(Inc.getNode()))
2026         Ops.push_back(Inc);
2027       else if (NumVecs > 2 && !isVSTfixed(Opc))
2028         Ops.push_back(Reg0);
2029     }
2030     Ops.push_back(SrcReg);
2031     Ops.push_back(Pred);
2032     Ops.push_back(Reg0);
2033     Ops.push_back(Chain);
2034     SDNode *VSt = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2035
2036     // Transfer memoperands.
2037     cast<MachineSDNode>(VSt)->setMemRefs(MemOp, MemOp + 1);
2038
2039     return VSt;
2040   }
2041
2042   // Otherwise, quad registers are stored with two separate instructions,
2043   // where one stores the even registers and the other stores the odd registers.
2044
2045   // Form the QQQQ REG_SEQUENCE.
2046   SDValue V0 = N->getOperand(Vec0Idx + 0);
2047   SDValue V1 = N->getOperand(Vec0Idx + 1);
2048   SDValue V2 = N->getOperand(Vec0Idx + 2);
2049   SDValue V3 = (NumVecs == 3)
2050     ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2051     : N->getOperand(Vec0Idx + 3);
2052   SDValue RegSeq = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
2053
2054   // Store the even D registers.  This is always an updating store, so that it
2055   // provides the address to the second store for the odd subregs.
2056   const SDValue OpsA[] = { MemAddr, Align, Reg0, RegSeq, Pred, Reg0, Chain };
2057   SDNode *VStA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
2058                                         MemAddr.getValueType(),
2059                                         MVT::Other, OpsA);
2060   cast<MachineSDNode>(VStA)->setMemRefs(MemOp, MemOp + 1);
2061   Chain = SDValue(VStA, 1);
2062
2063   // Store the odd D registers.
2064   Ops.push_back(SDValue(VStA, 0));
2065   Ops.push_back(Align);
2066   if (isUpdating) {
2067     SDValue Inc = N->getOperand(AddrOpIdx + 1);
2068     assert(isa<ConstantSDNode>(Inc.getNode()) &&
2069            "only constant post-increment update allowed for VST3/4");
2070     (void)Inc;
2071     Ops.push_back(Reg0);
2072   }
2073   Ops.push_back(RegSeq);
2074   Ops.push_back(Pred);
2075   Ops.push_back(Reg0);
2076   Ops.push_back(Chain);
2077   SDNode *VStB = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys,
2078                                         Ops);
2079   cast<MachineSDNode>(VStB)->setMemRefs(MemOp, MemOp + 1);
2080   return VStB;
2081 }
2082
2083 SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad,
2084                                          bool isUpdating, unsigned NumVecs,
2085                                          const uint16_t *DOpcodes,
2086                                          const uint16_t *QOpcodes) {
2087   assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range");
2088   SDLoc dl(N);
2089
2090   SDValue MemAddr, Align;
2091   unsigned AddrOpIdx = isUpdating ? 1 : 2;
2092   unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
2093   if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2094     return nullptr;
2095
2096   MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
2097   MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2098
2099   SDValue Chain = N->getOperand(0);
2100   unsigned Lane =
2101     cast<ConstantSDNode>(N->getOperand(Vec0Idx + NumVecs))->getZExtValue();
2102   EVT VT = N->getOperand(Vec0Idx).getValueType();
2103   bool is64BitVector = VT.is64BitVector();
2104
2105   unsigned Alignment = 0;
2106   if (NumVecs != 3) {
2107     Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
2108     unsigned NumBytes = NumVecs * VT.getVectorElementType().getSizeInBits()/8;
2109     if (Alignment > NumBytes)
2110       Alignment = NumBytes;
2111     if (Alignment < 8 && Alignment < NumBytes)
2112       Alignment = 0;
2113     // Alignment must be a power of two; make sure of that.
2114     Alignment = (Alignment & -Alignment);
2115     if (Alignment == 1)
2116       Alignment = 0;
2117   }
2118   Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
2119
2120   unsigned OpcodeIndex;
2121   switch (VT.getSimpleVT().SimpleTy) {
2122   default: llvm_unreachable("unhandled vld/vst lane type");
2123     // Double-register operations:
2124   case MVT::v8i8:  OpcodeIndex = 0; break;
2125   case MVT::v4i16: OpcodeIndex = 1; break;
2126   case MVT::v2f32:
2127   case MVT::v2i32: OpcodeIndex = 2; break;
2128     // Quad-register operations:
2129   case MVT::v8i16: OpcodeIndex = 0; break;
2130   case MVT::v4f32:
2131   case MVT::v4i32: OpcodeIndex = 1; break;
2132   }
2133
2134   std::vector<EVT> ResTys;
2135   if (IsLoad) {
2136     unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2137     if (!is64BitVector)
2138       ResTyElts *= 2;
2139     ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(),
2140                                       MVT::i64, ResTyElts));
2141   }
2142   if (isUpdating)
2143     ResTys.push_back(MVT::i32);
2144   ResTys.push_back(MVT::Other);
2145
2146   SDValue Pred = getAL(CurDAG, dl);
2147   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2148
2149   SmallVector<SDValue, 8> Ops;
2150   Ops.push_back(MemAddr);
2151   Ops.push_back(Align);
2152   if (isUpdating) {
2153     SDValue Inc = N->getOperand(AddrOpIdx + 1);
2154     Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc);
2155   }
2156
2157   SDValue SuperReg;
2158   SDValue V0 = N->getOperand(Vec0Idx + 0);
2159   SDValue V1 = N->getOperand(Vec0Idx + 1);
2160   if (NumVecs == 2) {
2161     if (is64BitVector)
2162       SuperReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
2163     else
2164       SuperReg = SDValue(createQRegPairNode(MVT::v4i64, V0, V1), 0);
2165   } else {
2166     SDValue V2 = N->getOperand(Vec0Idx + 2);
2167     SDValue V3 = (NumVecs == 3)
2168       ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2169       : N->getOperand(Vec0Idx + 3);
2170     if (is64BitVector)
2171       SuperReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
2172     else
2173       SuperReg = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
2174   }
2175   Ops.push_back(SuperReg);
2176   Ops.push_back(getI32Imm(Lane, dl));
2177   Ops.push_back(Pred);
2178   Ops.push_back(Reg0);
2179   Ops.push_back(Chain);
2180
2181   unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2182                                   QOpcodes[OpcodeIndex]);
2183   SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2184   cast<MachineSDNode>(VLdLn)->setMemRefs(MemOp, MemOp + 1);
2185   if (!IsLoad)
2186     return VLdLn;
2187
2188   // Extract the subregisters.
2189   SuperReg = SDValue(VLdLn, 0);
2190   assert(ARM::dsub_7 == ARM::dsub_0+7 &&
2191          ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering");
2192   unsigned Sub0 = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
2193   for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
2194     ReplaceUses(SDValue(N, Vec),
2195                 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
2196   ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, 1));
2197   if (isUpdating)
2198     ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdLn, 2));
2199   return nullptr;
2200 }
2201
2202 SDNode *ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating,
2203                                       unsigned NumVecs,
2204                                       const uint16_t *Opcodes) {
2205   assert(NumVecs >=2 && NumVecs <= 4 && "VLDDup NumVecs out-of-range");
2206   SDLoc dl(N);
2207
2208   SDValue MemAddr, Align;
2209   if (!SelectAddrMode6(N, N->getOperand(1), MemAddr, Align))
2210     return nullptr;
2211
2212   MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
2213   MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2214
2215   SDValue Chain = N->getOperand(0);
2216   EVT VT = N->getValueType(0);
2217
2218   unsigned Alignment = 0;
2219   if (NumVecs != 3) {
2220     Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
2221     unsigned NumBytes = NumVecs * VT.getVectorElementType().getSizeInBits()/8;
2222     if (Alignment > NumBytes)
2223       Alignment = NumBytes;
2224     if (Alignment < 8 && Alignment < NumBytes)
2225       Alignment = 0;
2226     // Alignment must be a power of two; make sure of that.
2227     Alignment = (Alignment & -Alignment);
2228     if (Alignment == 1)
2229       Alignment = 0;
2230   }
2231   Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
2232
2233   unsigned OpcodeIndex;
2234   switch (VT.getSimpleVT().SimpleTy) {
2235   default: llvm_unreachable("unhandled vld-dup type");
2236   case MVT::v8i8:  OpcodeIndex = 0; break;
2237   case MVT::v4i16: OpcodeIndex = 1; break;
2238   case MVT::v2f32:
2239   case MVT::v2i32: OpcodeIndex = 2; break;
2240   }
2241
2242   SDValue Pred = getAL(CurDAG, dl);
2243   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2244   SDValue SuperReg;
2245   unsigned Opc = Opcodes[OpcodeIndex];
2246   SmallVector<SDValue, 6> Ops;
2247   Ops.push_back(MemAddr);
2248   Ops.push_back(Align);
2249   if (isUpdating) {
2250     // fixed-stride update instructions don't have an explicit writeback
2251     // operand. It's implicit in the opcode itself.
2252     SDValue Inc = N->getOperand(2);
2253     if (!isa<ConstantSDNode>(Inc.getNode()))
2254       Ops.push_back(Inc);
2255     // FIXME: VLD3 and VLD4 haven't been updated to that form yet.
2256     else if (NumVecs > 2)
2257       Ops.push_back(Reg0);
2258   }
2259   Ops.push_back(Pred);
2260   Ops.push_back(Reg0);
2261   Ops.push_back(Chain);
2262
2263   unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2264   std::vector<EVT> ResTys;
2265   ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(), MVT::i64,ResTyElts));
2266   if (isUpdating)
2267     ResTys.push_back(MVT::i32);
2268   ResTys.push_back(MVT::Other);
2269   SDNode *VLdDup = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2270   cast<MachineSDNode>(VLdDup)->setMemRefs(MemOp, MemOp + 1);
2271   SuperReg = SDValue(VLdDup, 0);
2272
2273   // Extract the subregisters.
2274   assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
2275   unsigned SubIdx = ARM::dsub_0;
2276   for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
2277     ReplaceUses(SDValue(N, Vec),
2278                 CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, SuperReg));
2279   ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1));
2280   if (isUpdating)
2281     ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdDup, 2));
2282   return nullptr;
2283 }
2284
2285 SDNode *ARMDAGToDAGISel::SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs,
2286                                     unsigned Opc) {
2287   assert(NumVecs >= 2 && NumVecs <= 4 && "VTBL NumVecs out-of-range");
2288   SDLoc dl(N);
2289   EVT VT = N->getValueType(0);
2290   unsigned FirstTblReg = IsExt ? 2 : 1;
2291
2292   // Form a REG_SEQUENCE to force register allocation.
2293   SDValue RegSeq;
2294   SDValue V0 = N->getOperand(FirstTblReg + 0);
2295   SDValue V1 = N->getOperand(FirstTblReg + 1);
2296   if (NumVecs == 2)
2297     RegSeq = SDValue(createDRegPairNode(MVT::v16i8, V0, V1), 0);
2298   else {
2299     SDValue V2 = N->getOperand(FirstTblReg + 2);
2300     // If it's a vtbl3, form a quad D-register and leave the last part as
2301     // an undef.
2302     SDValue V3 = (NumVecs == 3)
2303       ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2304       : N->getOperand(FirstTblReg + 3);
2305     RegSeq = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
2306   }
2307
2308   SmallVector<SDValue, 6> Ops;
2309   if (IsExt)
2310     Ops.push_back(N->getOperand(1));
2311   Ops.push_back(RegSeq);
2312   Ops.push_back(N->getOperand(FirstTblReg + NumVecs));
2313   Ops.push_back(getAL(CurDAG, dl)); // predicate
2314   Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // predicate register
2315   return CurDAG->getMachineNode(Opc, dl, VT, Ops);
2316 }
2317
2318 SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDNode *N,
2319                                                      bool isSigned) {
2320   if (!Subtarget->hasV6T2Ops())
2321     return nullptr;
2322
2323   unsigned Opc = isSigned
2324     ? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX)
2325     : (Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX);
2326   SDLoc dl(N);
2327
2328   // For unsigned extracts, check for a shift right and mask
2329   unsigned And_imm = 0;
2330   if (N->getOpcode() == ISD::AND) {
2331     if (isOpcWithIntImmediate(N, ISD::AND, And_imm)) {
2332
2333       // The immediate is a mask of the low bits iff imm & (imm+1) == 0
2334       if (And_imm & (And_imm + 1))
2335         return nullptr;
2336
2337       unsigned Srl_imm = 0;
2338       if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL,
2339                                 Srl_imm)) {
2340         assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
2341
2342         // Note: The width operand is encoded as width-1.
2343         unsigned Width = countTrailingOnes(And_imm) - 1;
2344         unsigned LSB = Srl_imm;
2345
2346         SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2347
2348         if ((LSB + Width + 1) == N->getValueType(0).getSizeInBits()) {
2349           // It's cheaper to use a right shift to extract the top bits.
2350           if (Subtarget->isThumb()) {
2351             Opc = isSigned ? ARM::t2ASRri : ARM::t2LSRri;
2352             SDValue Ops[] = { N->getOperand(0).getOperand(0),
2353                               CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2354                               getAL(CurDAG, dl), Reg0, Reg0 };
2355             return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2356           }
2357
2358           // ARM models shift instructions as MOVsi with shifter operand.
2359           ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(ISD::SRL);
2360           SDValue ShOpc =
2361             CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, LSB), dl,
2362                                       MVT::i32);
2363           SDValue Ops[] = { N->getOperand(0).getOperand(0), ShOpc,
2364                             getAL(CurDAG, dl), Reg0, Reg0 };
2365           return CurDAG->SelectNodeTo(N, ARM::MOVsi, MVT::i32, Ops);
2366         }
2367
2368         SDValue Ops[] = { N->getOperand(0).getOperand(0),
2369                           CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2370                           CurDAG->getTargetConstant(Width, dl, MVT::i32),
2371                           getAL(CurDAG, dl), Reg0 };
2372         return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2373       }
2374     }
2375     return nullptr;
2376   }
2377
2378   // Otherwise, we're looking for a shift of a shift
2379   unsigned Shl_imm = 0;
2380   if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) {
2381     assert(Shl_imm > 0 && Shl_imm < 32 && "bad amount in shift node!");
2382     unsigned Srl_imm = 0;
2383     if (isInt32Immediate(N->getOperand(1), Srl_imm)) {
2384       assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
2385       // Note: The width operand is encoded as width-1.
2386       unsigned Width = 32 - Srl_imm - 1;
2387       int LSB = Srl_imm - Shl_imm;
2388       if (LSB < 0)
2389         return nullptr;
2390       SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2391       SDValue Ops[] = { N->getOperand(0).getOperand(0),
2392                         CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2393                         CurDAG->getTargetConstant(Width, dl, MVT::i32),
2394                         getAL(CurDAG, dl), Reg0 };
2395       return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2396     }
2397   }
2398
2399   if (N->getOpcode() == ISD::SIGN_EXTEND_INREG) {
2400     unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
2401     unsigned LSB = 0;
2402     if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL, LSB) &&
2403         !isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRA, LSB))
2404       return nullptr;
2405
2406     if (LSB + Width > 32)
2407       return nullptr;
2408
2409     SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2410     SDValue Ops[] = { N->getOperand(0).getOperand(0),
2411                       CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2412                       CurDAG->getTargetConstant(Width - 1, dl, MVT::i32),
2413                       getAL(CurDAG, dl), Reg0 };
2414     return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2415   }
2416
2417   return nullptr;
2418 }
2419
2420 /// Target-specific DAG combining for ISD::XOR.
2421 /// Target-independent combining lowers SELECT_CC nodes of the form
2422 /// select_cc setg[ge] X,  0,  X, -X
2423 /// select_cc setgt    X, -1,  X, -X
2424 /// select_cc setl[te] X,  0, -X,  X
2425 /// select_cc setlt    X,  1, -X,  X
2426 /// which represent Integer ABS into:
2427 /// Y = sra (X, size(X)-1); xor (add (X, Y), Y)
2428 /// ARM instruction selection detects the latter and matches it to
2429 /// ARM::ABS or ARM::t2ABS machine node.
2430 SDNode *ARMDAGToDAGISel::SelectABSOp(SDNode *N){
2431   SDValue XORSrc0 = N->getOperand(0);
2432   SDValue XORSrc1 = N->getOperand(1);
2433   EVT VT = N->getValueType(0);
2434
2435   if (Subtarget->isThumb1Only())
2436     return nullptr;
2437
2438   if (XORSrc0.getOpcode() != ISD::ADD || XORSrc1.getOpcode() != ISD::SRA)
2439     return nullptr;
2440
2441   SDValue ADDSrc0 = XORSrc0.getOperand(0);
2442   SDValue ADDSrc1 = XORSrc0.getOperand(1);
2443   SDValue SRASrc0 = XORSrc1.getOperand(0);
2444   SDValue SRASrc1 = XORSrc1.getOperand(1);
2445   ConstantSDNode *SRAConstant =  dyn_cast<ConstantSDNode>(SRASrc1);
2446   EVT XType = SRASrc0.getValueType();
2447   unsigned Size = XType.getSizeInBits() - 1;
2448
2449   if (ADDSrc1 == XORSrc1 && ADDSrc0 == SRASrc0 &&
2450       XType.isInteger() && SRAConstant != nullptr &&
2451       Size == SRAConstant->getZExtValue()) {
2452     unsigned Opcode = Subtarget->isThumb2() ? ARM::t2ABS : ARM::ABS;
2453     return CurDAG->SelectNodeTo(N, Opcode, VT, ADDSrc0);
2454   }
2455
2456   return nullptr;
2457 }
2458
2459 SDNode *ARMDAGToDAGISel::SelectConcatVector(SDNode *N) {
2460   // The only time a CONCAT_VECTORS operation can have legal types is when
2461   // two 64-bit vectors are concatenated to a 128-bit vector.
2462   EVT VT = N->getValueType(0);
2463   if (!VT.is128BitVector() || N->getNumOperands() != 2)
2464     llvm_unreachable("unexpected CONCAT_VECTORS");
2465   return createDRegPairNode(VT, N->getOperand(0), N->getOperand(1));
2466 }
2467
2468 SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
2469   SDLoc dl(N);
2470
2471   if (N->isMachineOpcode()) {
2472     N->setNodeId(-1);
2473     return nullptr;   // Already selected.
2474   }
2475
2476   switch (N->getOpcode()) {
2477   default: break;
2478   case ISD::WRITE_REGISTER: {
2479     SDNode *ResNode = SelectWriteRegister(N);
2480     if (ResNode)
2481       return ResNode;
2482     break;
2483   }
2484   case ISD::READ_REGISTER: {
2485     SDNode *ResNode = SelectReadRegister(N);
2486     if (ResNode)
2487       return ResNode;
2488     break;
2489   }
2490   case ISD::INLINEASM: {
2491     SDNode *ResNode = SelectInlineAsm(N);
2492     if (ResNode)
2493       return ResNode;
2494     break;
2495   }
2496   case ISD::XOR: {
2497     // Select special operations if XOR node forms integer ABS pattern
2498     SDNode *ResNode = SelectABSOp(N);
2499     if (ResNode)
2500       return ResNode;
2501     // Other cases are autogenerated.
2502     break;
2503   }
2504   case ISD::Constant: {
2505     unsigned Val = cast<ConstantSDNode>(N)->getZExtValue();
2506     bool UseCP = true;
2507     if (Subtarget->useMovt(*MF))
2508       // Thumb2-aware targets have the MOVT instruction, so all immediates can
2509       // be done with MOV + MOVT, at worst.
2510       UseCP = false;
2511     else {
2512       if (Subtarget->isThumb()) {
2513         UseCP = (Val > 255 &&                                  // MOV
2514                  ~Val > 255 &&                                 // MOV + MVN
2515                  !ARM_AM::isThumbImmShiftedVal(Val) &&         // MOV + LSL
2516                  !(Subtarget->hasV6T2Ops() && Val <= 0xffff)); // MOVW
2517       } else
2518         UseCP = (ARM_AM::getSOImmVal(Val) == -1 &&             // MOV
2519                  ARM_AM::getSOImmVal(~Val) == -1 &&            // MVN
2520                  !ARM_AM::isSOImmTwoPartVal(Val) &&            // two instrs.
2521                  !(Subtarget->hasV6T2Ops() && Val <= 0xffff)); // MOVW
2522     }
2523
2524     if (UseCP) {
2525       SDValue CPIdx = CurDAG->getTargetConstantPool(
2526           ConstantInt::get(Type::getInt32Ty(*CurDAG->getContext()), Val),
2527           TLI->getPointerTy(CurDAG->getDataLayout()));
2528
2529       SDNode *ResNode;
2530       if (Subtarget->isThumb()) {
2531         SDValue Pred = getAL(CurDAG, dl);
2532         SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
2533         SDValue Ops[] = { CPIdx, Pred, PredReg, CurDAG->getEntryNode() };
2534         ResNode = CurDAG->getMachineNode(ARM::tLDRpci, dl, MVT::i32, MVT::Other,
2535                                          Ops);
2536       } else {
2537         SDValue Ops[] = {
2538           CPIdx,
2539           CurDAG->getTargetConstant(0, dl, MVT::i32),
2540           getAL(CurDAG, dl),
2541           CurDAG->getRegister(0, MVT::i32),
2542           CurDAG->getEntryNode()
2543         };
2544         ResNode=CurDAG->getMachineNode(ARM::LDRcp, dl, MVT::i32, MVT::Other,
2545                                        Ops);
2546       }
2547       ReplaceUses(SDValue(N, 0), SDValue(ResNode, 0));
2548       return nullptr;
2549     }
2550
2551     // Other cases are autogenerated.
2552     break;
2553   }
2554   case ISD::FrameIndex: {
2555     // Selects to ADDri FI, 0 which in turn will become ADDri SP, imm.
2556     int FI = cast<FrameIndexSDNode>(N)->getIndex();
2557     SDValue TFI = CurDAG->getTargetFrameIndex(
2558         FI, TLI->getPointerTy(CurDAG->getDataLayout()));
2559     if (Subtarget->isThumb1Only()) {
2560       // Set the alignment of the frame object to 4, to avoid having to generate
2561       // more than one ADD
2562       MachineFrameInfo *MFI = MF->getFrameInfo();
2563       if (MFI->getObjectAlignment(FI) < 4)
2564         MFI->setObjectAlignment(FI, 4);
2565       return CurDAG->SelectNodeTo(N, ARM::tADDframe, MVT::i32, TFI,
2566                                   CurDAG->getTargetConstant(0, dl, MVT::i32));
2567     } else {
2568       unsigned Opc = ((Subtarget->isThumb() && Subtarget->hasThumb2()) ?
2569                       ARM::t2ADDri : ARM::ADDri);
2570       SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, dl, MVT::i32),
2571                         getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
2572                         CurDAG->getRegister(0, MVT::i32) };
2573       return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2574     }
2575   }
2576   case ISD::SRL:
2577     if (SDNode *I = SelectV6T2BitfieldExtractOp(N, false))
2578       return I;
2579     break;
2580   case ISD::SIGN_EXTEND_INREG:
2581   case ISD::SRA:
2582     if (SDNode *I = SelectV6T2BitfieldExtractOp(N, true))
2583       return I;
2584     break;
2585   case ISD::MUL:
2586     if (Subtarget->isThumb1Only())
2587       break;
2588     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
2589       unsigned RHSV = C->getZExtValue();
2590       if (!RHSV) break;
2591       if (isPowerOf2_32(RHSV-1)) {  // 2^n+1?
2592         unsigned ShImm = Log2_32(RHSV-1);
2593         if (ShImm >= 32)
2594           break;
2595         SDValue V = N->getOperand(0);
2596         ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
2597         SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
2598         SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2599         if (Subtarget->isThumb()) {
2600           SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
2601           return CurDAG->SelectNodeTo(N, ARM::t2ADDrs, MVT::i32, Ops);
2602         } else {
2603           SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
2604                             Reg0 };
2605           return CurDAG->SelectNodeTo(N, ARM::ADDrsi, MVT::i32, Ops);
2606         }
2607       }
2608       if (isPowerOf2_32(RHSV+1)) {  // 2^n-1?
2609         unsigned ShImm = Log2_32(RHSV+1);
2610         if (ShImm >= 32)
2611           break;
2612         SDValue V = N->getOperand(0);
2613         ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
2614         SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
2615         SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2616         if (Subtarget->isThumb()) {
2617           SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
2618           return CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops);
2619         } else {
2620           SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
2621                             Reg0 };
2622           return CurDAG->SelectNodeTo(N, ARM::RSBrsi, MVT::i32, Ops);
2623         }
2624       }
2625     }
2626     break;
2627   case ISD::AND: {
2628     // Check for unsigned bitfield extract
2629     if (SDNode *I = SelectV6T2BitfieldExtractOp(N, false))
2630       return I;
2631
2632     // (and (or x, c2), c1) and top 16-bits of c1 and c2 match, lower 16-bits
2633     // of c1 are 0xffff, and lower 16-bit of c2 are 0. That is, the top 16-bits
2634     // are entirely contributed by c2 and lower 16-bits are entirely contributed
2635     // by x. That's equal to (or (and x, 0xffff), (and c1, 0xffff0000)).
2636     // Select it to: "movt x, ((c1 & 0xffff) >> 16)
2637     EVT VT = N->getValueType(0);
2638     if (VT != MVT::i32)
2639       break;
2640     unsigned Opc = (Subtarget->isThumb() && Subtarget->hasThumb2())
2641       ? ARM::t2MOVTi16
2642       : (Subtarget->hasV6T2Ops() ? ARM::MOVTi16 : 0);
2643     if (!Opc)
2644       break;
2645     SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
2646     ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2647     if (!N1C)
2648       break;
2649     if (N0.getOpcode() == ISD::OR && N0.getNode()->hasOneUse()) {
2650       SDValue N2 = N0.getOperand(1);
2651       ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
2652       if (!N2C)
2653         break;
2654       unsigned N1CVal = N1C->getZExtValue();
2655       unsigned N2CVal = N2C->getZExtValue();
2656       if ((N1CVal & 0xffff0000U) == (N2CVal & 0xffff0000U) &&
2657           (N1CVal & 0xffffU) == 0xffffU &&
2658           (N2CVal & 0xffffU) == 0x0U) {
2659         SDValue Imm16 = CurDAG->getTargetConstant((N2CVal & 0xFFFF0000U) >> 16,
2660                                                   dl, MVT::i32);
2661         SDValue Ops[] = { N0.getOperand(0), Imm16,
2662                           getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) };
2663         return CurDAG->getMachineNode(Opc, dl, VT, Ops);
2664       }
2665     }
2666     break;
2667   }
2668   case ARMISD::VMOVRRD:
2669     return CurDAG->getMachineNode(ARM::VMOVRRD, dl, MVT::i32, MVT::i32,
2670                                   N->getOperand(0), getAL(CurDAG, dl),
2671                                   CurDAG->getRegister(0, MVT::i32));
2672   case ISD::UMUL_LOHI: {
2673     if (Subtarget->isThumb1Only())
2674       break;
2675     if (Subtarget->isThumb()) {
2676       SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
2677                         getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) };
2678       return CurDAG->getMachineNode(ARM::t2UMULL, dl, MVT::i32, MVT::i32, Ops);
2679     } else {
2680       SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
2681                         getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
2682                         CurDAG->getRegister(0, MVT::i32) };
2683       return CurDAG->getMachineNode(Subtarget->hasV6Ops() ?
2684                                     ARM::UMULL : ARM::UMULLv5,
2685                                     dl, MVT::i32, MVT::i32, Ops);
2686     }
2687   }
2688   case ISD::SMUL_LOHI: {
2689     if (Subtarget->isThumb1Only())
2690       break;
2691     if (Subtarget->isThumb()) {
2692       SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
2693                         getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) };
2694       return CurDAG->getMachineNode(ARM::t2SMULL, dl, MVT::i32, MVT::i32, Ops);
2695     } else {
2696       SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
2697                         getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
2698                         CurDAG->getRegister(0, MVT::i32) };
2699       return CurDAG->getMachineNode(Subtarget->hasV6Ops() ?
2700                                     ARM::SMULL : ARM::SMULLv5,
2701                                     dl, MVT::i32, MVT::i32, Ops);
2702     }
2703   }
2704   case ARMISD::UMLAL:{
2705     if (Subtarget->isThumb()) {
2706       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
2707                         N->getOperand(3), getAL(CurDAG, dl),
2708                         CurDAG->getRegister(0, MVT::i32)};
2709       return CurDAG->getMachineNode(ARM::t2UMLAL, dl, MVT::i32, MVT::i32, Ops);
2710     }else{
2711       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
2712                         N->getOperand(3), getAL(CurDAG, dl),
2713                         CurDAG->getRegister(0, MVT::i32),
2714                         CurDAG->getRegister(0, MVT::i32) };
2715       return CurDAG->getMachineNode(Subtarget->hasV6Ops() ?
2716                                       ARM::UMLAL : ARM::UMLALv5,
2717                                       dl, MVT::i32, MVT::i32, Ops);
2718     }
2719   }
2720   case ARMISD::SMLAL:{
2721     if (Subtarget->isThumb()) {
2722       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
2723                         N->getOperand(3), getAL(CurDAG, dl),
2724                         CurDAG->getRegister(0, MVT::i32)};
2725       return CurDAG->getMachineNode(ARM::t2SMLAL, dl, MVT::i32, MVT::i32, Ops);
2726     }else{
2727       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
2728                         N->getOperand(3), getAL(CurDAG, dl),
2729                         CurDAG->getRegister(0, MVT::i32),
2730                         CurDAG->getRegister(0, MVT::i32) };
2731       return CurDAG->getMachineNode(Subtarget->hasV6Ops() ?
2732                                       ARM::SMLAL : ARM::SMLALv5,
2733                                       dl, MVT::i32, MVT::i32, Ops);
2734     }
2735   }
2736   case ISD::LOAD: {
2737     SDNode *ResNode = nullptr;
2738     if (Subtarget->isThumb() && Subtarget->hasThumb2())
2739       ResNode = SelectT2IndexedLoad(N);
2740     else
2741       ResNode = SelectARMIndexedLoad(N);
2742     if (ResNode)
2743       return ResNode;
2744     // Other cases are autogenerated.
2745     break;
2746   }
2747   case ARMISD::BRCOND: {
2748     // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
2749     // Emits: (Bcc:void (bb:Other):$dst, (imm:i32):$cc)
2750     // Pattern complexity = 6  cost = 1  size = 0
2751
2752     // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
2753     // Emits: (tBcc:void (bb:Other):$dst, (imm:i32):$cc)
2754     // Pattern complexity = 6  cost = 1  size = 0
2755
2756     // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
2757     // Emits: (t2Bcc:void (bb:Other):$dst, (imm:i32):$cc)
2758     // Pattern complexity = 6  cost = 1  size = 0
2759
2760     unsigned Opc = Subtarget->isThumb() ?
2761       ((Subtarget->hasThumb2()) ? ARM::t2Bcc : ARM::tBcc) : ARM::Bcc;
2762     SDValue Chain = N->getOperand(0);
2763     SDValue N1 = N->getOperand(1);
2764     SDValue N2 = N->getOperand(2);
2765     SDValue N3 = N->getOperand(3);
2766     SDValue InFlag = N->getOperand(4);
2767     assert(N1.getOpcode() == ISD::BasicBlock);
2768     assert(N2.getOpcode() == ISD::Constant);
2769     assert(N3.getOpcode() == ISD::Register);
2770
2771     SDValue Tmp2 = CurDAG->getTargetConstant(((unsigned)
2772                                cast<ConstantSDNode>(N2)->getZExtValue()), dl,
2773                                MVT::i32);
2774     SDValue Ops[] = { N1, Tmp2, N3, Chain, InFlag };
2775     SDNode *ResNode = CurDAG->getMachineNode(Opc, dl, MVT::Other,
2776                                              MVT::Glue, Ops);
2777     Chain = SDValue(ResNode, 0);
2778     if (N->getNumValues() == 2) {
2779       InFlag = SDValue(ResNode, 1);
2780       ReplaceUses(SDValue(N, 1), InFlag);
2781     }
2782     ReplaceUses(SDValue(N, 0),
2783                 SDValue(Chain.getNode(), Chain.getResNo()));
2784     return nullptr;
2785   }
2786   case ARMISD::VZIP: {
2787     unsigned Opc = 0;
2788     EVT VT = N->getValueType(0);
2789     switch (VT.getSimpleVT().SimpleTy) {
2790     default: return nullptr;
2791     case MVT::v8i8:  Opc = ARM::VZIPd8; break;
2792     case MVT::v4i16: Opc = ARM::VZIPd16; break;
2793     case MVT::v2f32:
2794     // vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
2795     case MVT::v2i32: Opc = ARM::VTRNd32; break;
2796     case MVT::v16i8: Opc = ARM::VZIPq8; break;
2797     case MVT::v8i16: Opc = ARM::VZIPq16; break;
2798     case MVT::v4f32:
2799     case MVT::v4i32: Opc = ARM::VZIPq32; break;
2800     }
2801     SDValue Pred = getAL(CurDAG, dl);
2802     SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
2803     SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
2804     return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops);
2805   }
2806   case ARMISD::VUZP: {
2807     unsigned Opc = 0;
2808     EVT VT = N->getValueType(0);
2809     switch (VT.getSimpleVT().SimpleTy) {
2810     default: return nullptr;
2811     case MVT::v8i8:  Opc = ARM::VUZPd8; break;
2812     case MVT::v4i16: Opc = ARM::VUZPd16; break;
2813     case MVT::v2f32:
2814     // vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
2815     case MVT::v2i32: Opc = ARM::VTRNd32; break;
2816     case MVT::v16i8: Opc = ARM::VUZPq8; break;
2817     case MVT::v8i16: Opc = ARM::VUZPq16; break;
2818     case MVT::v4f32:
2819     case MVT::v4i32: Opc = ARM::VUZPq32; break;
2820     }
2821     SDValue Pred = getAL(CurDAG, dl);
2822     SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
2823     SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
2824     return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops);
2825   }
2826   case ARMISD::VTRN: {
2827     unsigned Opc = 0;
2828     EVT VT = N->getValueType(0);
2829     switch (VT.getSimpleVT().SimpleTy) {
2830     default: return nullptr;
2831     case MVT::v8i8:  Opc = ARM::VTRNd8; break;
2832     case MVT::v4i16: Opc = ARM::VTRNd16; break;
2833     case MVT::v2f32:
2834     case MVT::v2i32: Opc = ARM::VTRNd32; break;
2835     case MVT::v16i8: Opc = ARM::VTRNq8; break;
2836     case MVT::v8i16: Opc = ARM::VTRNq16; break;
2837     case MVT::v4f32:
2838     case MVT::v4i32: Opc = ARM::VTRNq32; break;
2839     }
2840     SDValue Pred = getAL(CurDAG, dl);
2841     SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
2842     SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
2843     return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops);
2844   }
2845   case ARMISD::BUILD_VECTOR: {
2846     EVT VecVT = N->getValueType(0);
2847     EVT EltVT = VecVT.getVectorElementType();
2848     unsigned NumElts = VecVT.getVectorNumElements();
2849     if (EltVT == MVT::f64) {
2850       assert(NumElts == 2 && "unexpected type for BUILD_VECTOR");
2851       return createDRegPairNode(VecVT, N->getOperand(0), N->getOperand(1));
2852     }
2853     assert(EltVT == MVT::f32 && "unexpected type for BUILD_VECTOR");
2854     if (NumElts == 2)
2855       return createSRegPairNode(VecVT, N->getOperand(0), N->getOperand(1));
2856     assert(NumElts == 4 && "unexpected type for BUILD_VECTOR");
2857     return createQuadSRegsNode(VecVT, N->getOperand(0), N->getOperand(1),
2858                      N->getOperand(2), N->getOperand(3));
2859   }
2860
2861   case ARMISD::VLD2DUP: {
2862     static const uint16_t Opcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
2863                                         ARM::VLD2DUPd32 };
2864     return SelectVLDDup(N, false, 2, Opcodes);
2865   }
2866
2867   case ARMISD::VLD3DUP: {
2868     static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo,
2869                                         ARM::VLD3DUPd16Pseudo,
2870                                         ARM::VLD3DUPd32Pseudo };
2871     return SelectVLDDup(N, false, 3, Opcodes);
2872   }
2873
2874   case ARMISD::VLD4DUP: {
2875     static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo,
2876                                         ARM::VLD4DUPd16Pseudo,
2877                                         ARM::VLD4DUPd32Pseudo };
2878     return SelectVLDDup(N, false, 4, Opcodes);
2879   }
2880
2881   case ARMISD::VLD2DUP_UPD: {
2882     static const uint16_t Opcodes[] = { ARM::VLD2DUPd8wb_fixed,
2883                                         ARM::VLD2DUPd16wb_fixed,
2884                                         ARM::VLD2DUPd32wb_fixed };
2885     return SelectVLDDup(N, true, 2, Opcodes);
2886   }
2887
2888   case ARMISD::VLD3DUP_UPD: {
2889     static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo_UPD,
2890                                         ARM::VLD3DUPd16Pseudo_UPD,
2891                                         ARM::VLD3DUPd32Pseudo_UPD };
2892     return SelectVLDDup(N, true, 3, Opcodes);
2893   }
2894
2895   case ARMISD::VLD4DUP_UPD: {
2896     static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo_UPD,
2897                                         ARM::VLD4DUPd16Pseudo_UPD,
2898                                         ARM::VLD4DUPd32Pseudo_UPD };
2899     return SelectVLDDup(N, true, 4, Opcodes);
2900   }
2901
2902   case ARMISD::VLD1_UPD: {
2903     static const uint16_t DOpcodes[] = { ARM::VLD1d8wb_fixed,
2904                                          ARM::VLD1d16wb_fixed,
2905                                          ARM::VLD1d32wb_fixed,
2906                                          ARM::VLD1d64wb_fixed };
2907     static const uint16_t QOpcodes[] = { ARM::VLD1q8wb_fixed,
2908                                          ARM::VLD1q16wb_fixed,
2909                                          ARM::VLD1q32wb_fixed,
2910                                          ARM::VLD1q64wb_fixed };
2911     return SelectVLD(N, true, 1, DOpcodes, QOpcodes, nullptr);
2912   }
2913
2914   case ARMISD::VLD2_UPD: {
2915     static const uint16_t DOpcodes[] = { ARM::VLD2d8wb_fixed,
2916                                          ARM::VLD2d16wb_fixed,
2917                                          ARM::VLD2d32wb_fixed,
2918                                          ARM::VLD1q64wb_fixed};
2919     static const uint16_t QOpcodes[] = { ARM::VLD2q8PseudoWB_fixed,
2920                                          ARM::VLD2q16PseudoWB_fixed,
2921                                          ARM::VLD2q32PseudoWB_fixed };
2922     return SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr);
2923   }
2924
2925   case ARMISD::VLD3_UPD: {
2926     static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo_UPD,
2927                                          ARM::VLD3d16Pseudo_UPD,
2928                                          ARM::VLD3d32Pseudo_UPD,
2929                                          ARM::VLD1d64TPseudoWB_fixed};
2930     static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
2931                                           ARM::VLD3q16Pseudo_UPD,
2932                                           ARM::VLD3q32Pseudo_UPD };
2933     static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD,
2934                                           ARM::VLD3q16oddPseudo_UPD,
2935                                           ARM::VLD3q32oddPseudo_UPD };
2936     return SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
2937   }
2938
2939   case ARMISD::VLD4_UPD: {
2940     static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo_UPD,
2941                                          ARM::VLD4d16Pseudo_UPD,
2942                                          ARM::VLD4d32Pseudo_UPD,
2943                                          ARM::VLD1d64QPseudoWB_fixed};
2944     static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
2945                                           ARM::VLD4q16Pseudo_UPD,
2946                                           ARM::VLD4q32Pseudo_UPD };
2947     static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo_UPD,
2948                                           ARM::VLD4q16oddPseudo_UPD,
2949                                           ARM::VLD4q32oddPseudo_UPD };
2950     return SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
2951   }
2952
2953   case ARMISD::VLD2LN_UPD: {
2954     static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo_UPD,
2955                                          ARM::VLD2LNd16Pseudo_UPD,
2956                                          ARM::VLD2LNd32Pseudo_UPD };
2957     static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo_UPD,
2958                                          ARM::VLD2LNq32Pseudo_UPD };
2959     return SelectVLDSTLane(N, true, true, 2, DOpcodes, QOpcodes);
2960   }
2961
2962   case ARMISD::VLD3LN_UPD: {
2963     static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo_UPD,
2964                                          ARM::VLD3LNd16Pseudo_UPD,
2965                                          ARM::VLD3LNd32Pseudo_UPD };
2966     static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo_UPD,
2967                                          ARM::VLD3LNq32Pseudo_UPD };
2968     return SelectVLDSTLane(N, true, true, 3, DOpcodes, QOpcodes);
2969   }
2970
2971   case ARMISD::VLD4LN_UPD: {
2972     static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo_UPD,
2973                                          ARM::VLD4LNd16Pseudo_UPD,
2974                                          ARM::VLD4LNd32Pseudo_UPD };
2975     static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo_UPD,
2976                                          ARM::VLD4LNq32Pseudo_UPD };
2977     return SelectVLDSTLane(N, true, true, 4, DOpcodes, QOpcodes);
2978   }
2979
2980   case ARMISD::VST1_UPD: {
2981     static const uint16_t DOpcodes[] = { ARM::VST1d8wb_fixed,
2982                                          ARM::VST1d16wb_fixed,
2983                                          ARM::VST1d32wb_fixed,
2984                                          ARM::VST1d64wb_fixed };
2985     static const uint16_t QOpcodes[] = { ARM::VST1q8wb_fixed,
2986                                          ARM::VST1q16wb_fixed,
2987                                          ARM::VST1q32wb_fixed,
2988                                          ARM::VST1q64wb_fixed };
2989     return SelectVST(N, true, 1, DOpcodes, QOpcodes, nullptr);
2990   }
2991
2992   case ARMISD::VST2_UPD: {
2993     static const uint16_t DOpcodes[] = { ARM::VST2d8wb_fixed,
2994                                          ARM::VST2d16wb_fixed,
2995                                          ARM::VST2d32wb_fixed,
2996                                          ARM::VST1q64wb_fixed};
2997     static const uint16_t QOpcodes[] = { ARM::VST2q8PseudoWB_fixed,
2998                                          ARM::VST2q16PseudoWB_fixed,
2999                                          ARM::VST2q32PseudoWB_fixed };
3000     return SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr);
3001   }
3002
3003   case ARMISD::VST3_UPD: {
3004     static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo_UPD,
3005                                          ARM::VST3d16Pseudo_UPD,
3006                                          ARM::VST3d32Pseudo_UPD,
3007                                          ARM::VST1d64TPseudoWB_fixed};
3008     static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
3009                                           ARM::VST3q16Pseudo_UPD,
3010                                           ARM::VST3q32Pseudo_UPD };
3011     static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD,
3012                                           ARM::VST3q16oddPseudo_UPD,
3013                                           ARM::VST3q32oddPseudo_UPD };
3014     return SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
3015   }
3016
3017   case ARMISD::VST4_UPD: {
3018     static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo_UPD,
3019                                          ARM::VST4d16Pseudo_UPD,
3020                                          ARM::VST4d32Pseudo_UPD,
3021                                          ARM::VST1d64QPseudoWB_fixed};
3022     static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
3023                                           ARM::VST4q16Pseudo_UPD,
3024                                           ARM::VST4q32Pseudo_UPD };
3025     static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo_UPD,
3026                                           ARM::VST4q16oddPseudo_UPD,
3027                                           ARM::VST4q32oddPseudo_UPD };
3028     return SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
3029   }
3030
3031   case ARMISD::VST2LN_UPD: {
3032     static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo_UPD,
3033                                          ARM::VST2LNd16Pseudo_UPD,
3034                                          ARM::VST2LNd32Pseudo_UPD };
3035     static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo_UPD,
3036                                          ARM::VST2LNq32Pseudo_UPD };
3037     return SelectVLDSTLane(N, false, true, 2, DOpcodes, QOpcodes);
3038   }
3039
3040   case ARMISD::VST3LN_UPD: {
3041     static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo_UPD,
3042                                          ARM::VST3LNd16Pseudo_UPD,
3043                                          ARM::VST3LNd32Pseudo_UPD };
3044     static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo_UPD,
3045                                          ARM::VST3LNq32Pseudo_UPD };
3046     return SelectVLDSTLane(N, false, true, 3, DOpcodes, QOpcodes);
3047   }
3048
3049   case ARMISD::VST4LN_UPD: {
3050     static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo_UPD,
3051                                          ARM::VST4LNd16Pseudo_UPD,
3052                                          ARM::VST4LNd32Pseudo_UPD };
3053     static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo_UPD,
3054                                          ARM::VST4LNq32Pseudo_UPD };
3055     return SelectVLDSTLane(N, false, true, 4, DOpcodes, QOpcodes);
3056   }
3057
3058   case ISD::INTRINSIC_VOID:
3059   case ISD::INTRINSIC_W_CHAIN: {
3060     unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
3061     switch (IntNo) {
3062     default:
3063       break;
3064
3065     case Intrinsic::arm_ldaexd:
3066     case Intrinsic::arm_ldrexd: {
3067       SDLoc dl(N);
3068       SDValue Chain = N->getOperand(0);
3069       SDValue MemAddr = N->getOperand(2);
3070       bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2();
3071
3072       bool IsAcquire = IntNo == Intrinsic::arm_ldaexd;
3073       unsigned NewOpc = isThumb ? (IsAcquire ? ARM::t2LDAEXD : ARM::t2LDREXD)
3074                                 : (IsAcquire ? ARM::LDAEXD : ARM::LDREXD);
3075
3076       // arm_ldrexd returns a i64 value in {i32, i32}
3077       std::vector<EVT> ResTys;
3078       if (isThumb) {
3079         ResTys.push_back(MVT::i32);
3080         ResTys.push_back(MVT::i32);
3081       } else
3082         ResTys.push_back(MVT::Untyped);
3083       ResTys.push_back(MVT::Other);
3084
3085       // Place arguments in the right order.
3086       SmallVector<SDValue, 7> Ops;
3087       Ops.push_back(MemAddr);
3088       Ops.push_back(getAL(CurDAG, dl));
3089       Ops.push_back(CurDAG->getRegister(0, MVT::i32));
3090       Ops.push_back(Chain);
3091       SDNode *Ld = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
3092       // Transfer memoperands.
3093       MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
3094       MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
3095       cast<MachineSDNode>(Ld)->setMemRefs(MemOp, MemOp + 1);
3096
3097       // Remap uses.
3098       SDValue OutChain = isThumb ? SDValue(Ld, 2) : SDValue(Ld, 1);
3099       if (!SDValue(N, 0).use_empty()) {
3100         SDValue Result;
3101         if (isThumb)
3102           Result = SDValue(Ld, 0);
3103         else {
3104           SDValue SubRegIdx =
3105             CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);
3106           SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
3107               dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
3108           Result = SDValue(ResNode,0);
3109         }
3110         ReplaceUses(SDValue(N, 0), Result);
3111       }
3112       if (!SDValue(N, 1).use_empty()) {
3113         SDValue Result;
3114         if (isThumb)
3115           Result = SDValue(Ld, 1);
3116         else {
3117           SDValue SubRegIdx =
3118             CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32);
3119           SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
3120               dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
3121           Result = SDValue(ResNode,0);
3122         }
3123         ReplaceUses(SDValue(N, 1), Result);
3124       }
3125       ReplaceUses(SDValue(N, 2), OutChain);
3126       return nullptr;
3127     }
3128     case Intrinsic::arm_stlexd:
3129     case Intrinsic::arm_strexd: {
3130       SDLoc dl(N);
3131       SDValue Chain = N->getOperand(0);
3132       SDValue Val0 = N->getOperand(2);
3133       SDValue Val1 = N->getOperand(3);
3134       SDValue MemAddr = N->getOperand(4);
3135
3136       // Store exclusive double return a i32 value which is the return status
3137       // of the issued store.
3138       const EVT ResTys[] = {MVT::i32, MVT::Other};
3139
3140       bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2();
3141       // Place arguments in the right order.
3142       SmallVector<SDValue, 7> Ops;
3143       if (isThumb) {
3144         Ops.push_back(Val0);
3145         Ops.push_back(Val1);
3146       } else
3147         // arm_strexd uses GPRPair.
3148         Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, Val0, Val1), 0));
3149       Ops.push_back(MemAddr);
3150       Ops.push_back(getAL(CurDAG, dl));
3151       Ops.push_back(CurDAG->getRegister(0, MVT::i32));
3152       Ops.push_back(Chain);
3153
3154       bool IsRelease = IntNo == Intrinsic::arm_stlexd;
3155       unsigned NewOpc = isThumb ? (IsRelease ? ARM::t2STLEXD : ARM::t2STREXD)
3156                                 : (IsRelease ? ARM::STLEXD : ARM::STREXD);
3157
3158       SDNode *St = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
3159       // Transfer memoperands.
3160       MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
3161       MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
3162       cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1);
3163
3164       return St;
3165     }
3166
3167     case Intrinsic::arm_neon_vld1: {
3168       static const uint16_t DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16,
3169                                            ARM::VLD1d32, ARM::VLD1d64 };
3170       static const uint16_t QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
3171                                            ARM::VLD1q32, ARM::VLD1q64};
3172       return SelectVLD(N, false, 1, DOpcodes, QOpcodes, nullptr);
3173     }
3174
3175     case Intrinsic::arm_neon_vld2: {
3176       static const uint16_t DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16,
3177                                            ARM::VLD2d32, ARM::VLD1q64 };
3178       static const uint16_t QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo,
3179                                            ARM::VLD2q32Pseudo };
3180       return SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr);
3181     }
3182
3183     case Intrinsic::arm_neon_vld3: {
3184       static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo,
3185                                            ARM::VLD3d16Pseudo,
3186                                            ARM::VLD3d32Pseudo,
3187                                            ARM::VLD1d64TPseudo };
3188       static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
3189                                             ARM::VLD3q16Pseudo_UPD,
3190                                             ARM::VLD3q32Pseudo_UPD };
3191       static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo,
3192                                             ARM::VLD3q16oddPseudo,
3193                                             ARM::VLD3q32oddPseudo };
3194       return SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
3195     }
3196
3197     case Intrinsic::arm_neon_vld4: {
3198       static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo,
3199                                            ARM::VLD4d16Pseudo,
3200                                            ARM::VLD4d32Pseudo,
3201                                            ARM::VLD1d64QPseudo };
3202       static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
3203                                             ARM::VLD4q16Pseudo_UPD,
3204                                             ARM::VLD4q32Pseudo_UPD };
3205       static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo,
3206                                             ARM::VLD4q16oddPseudo,
3207                                             ARM::VLD4q32oddPseudo };
3208       return SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
3209     }
3210
3211     case Intrinsic::arm_neon_vld2lane: {
3212       static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo,
3213                                            ARM::VLD2LNd16Pseudo,
3214                                            ARM::VLD2LNd32Pseudo };
3215       static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo,
3216                                            ARM::VLD2LNq32Pseudo };
3217       return SelectVLDSTLane(N, true, false, 2, DOpcodes, QOpcodes);
3218     }
3219
3220     case Intrinsic::arm_neon_vld3lane: {
3221       static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo,
3222                                            ARM::VLD3LNd16Pseudo,
3223                                            ARM::VLD3LNd32Pseudo };
3224       static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo,
3225                                            ARM::VLD3LNq32Pseudo };
3226       return SelectVLDSTLane(N, true, false, 3, DOpcodes, QOpcodes);
3227     }
3228
3229     case Intrinsic::arm_neon_vld4lane: {
3230       static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo,
3231                                            ARM::VLD4LNd16Pseudo,
3232                                            ARM::VLD4LNd32Pseudo };
3233       static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo,
3234                                            ARM::VLD4LNq32Pseudo };
3235       return SelectVLDSTLane(N, true, false, 4, DOpcodes, QOpcodes);
3236     }
3237
3238     case Intrinsic::arm_neon_vst1: {
3239       static const uint16_t DOpcodes[] = { ARM::VST1d8, ARM::VST1d16,
3240                                            ARM::VST1d32, ARM::VST1d64 };
3241       static const uint16_t QOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
3242                                            ARM::VST1q32, ARM::VST1q64 };
3243       return SelectVST(N, false, 1, DOpcodes, QOpcodes, nullptr);
3244     }
3245
3246     case Intrinsic::arm_neon_vst2: {
3247       static const uint16_t DOpcodes[] = { ARM::VST2d8, ARM::VST2d16,
3248                                            ARM::VST2d32, ARM::VST1q64 };
3249       static uint16_t QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo,
3250                                      ARM::VST2q32Pseudo };
3251       return SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr);
3252     }
3253
3254     case Intrinsic::arm_neon_vst3: {
3255       static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo,
3256                                            ARM::VST3d16Pseudo,
3257                                            ARM::VST3d32Pseudo,
3258                                            ARM::VST1d64TPseudo };
3259       static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
3260                                             ARM::VST3q16Pseudo_UPD,
3261                                             ARM::VST3q32Pseudo_UPD };
3262       static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo,
3263                                             ARM::VST3q16oddPseudo,
3264                                             ARM::VST3q32oddPseudo };
3265       return SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
3266     }
3267
3268     case Intrinsic::arm_neon_vst4: {
3269       static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo,
3270                                            ARM::VST4d16Pseudo,
3271                                            ARM::VST4d32Pseudo,
3272                                            ARM::VST1d64QPseudo };
3273       static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
3274                                             ARM::VST4q16Pseudo_UPD,
3275                                             ARM::VST4q32Pseudo_UPD };
3276       static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo,
3277                                             ARM::VST4q16oddPseudo,
3278                                             ARM::VST4q32oddPseudo };
3279       return SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
3280     }
3281
3282     case Intrinsic::arm_neon_vst2lane: {
3283       static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo,
3284                                            ARM::VST2LNd16Pseudo,
3285                                            ARM::VST2LNd32Pseudo };
3286       static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo,
3287                                            ARM::VST2LNq32Pseudo };
3288       return SelectVLDSTLane(N, false, false, 2, DOpcodes, QOpcodes);
3289     }
3290
3291     case Intrinsic::arm_neon_vst3lane: {
3292       static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo,
3293                                            ARM::VST3LNd16Pseudo,
3294                                            ARM::VST3LNd32Pseudo };
3295       static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo,
3296                                            ARM::VST3LNq32Pseudo };
3297       return SelectVLDSTLane(N, false, false, 3, DOpcodes, QOpcodes);
3298     }
3299
3300     case Intrinsic::arm_neon_vst4lane: {
3301       static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo,
3302                                            ARM::VST4LNd16Pseudo,
3303                                            ARM::VST4LNd32Pseudo };
3304       static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo,
3305                                            ARM::VST4LNq32Pseudo };
3306       return SelectVLDSTLane(N, false, false, 4, DOpcodes, QOpcodes);
3307     }
3308     }
3309     break;
3310   }
3311
3312   case ISD::INTRINSIC_WO_CHAIN: {
3313     unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
3314     switch (IntNo) {
3315     default:
3316       break;
3317
3318     case Intrinsic::arm_neon_vtbl2:
3319       return SelectVTBL(N, false, 2, ARM::VTBL2);
3320     case Intrinsic::arm_neon_vtbl3:
3321       return SelectVTBL(N, false, 3, ARM::VTBL3Pseudo);
3322     case Intrinsic::arm_neon_vtbl4:
3323       return SelectVTBL(N, false, 4, ARM::VTBL4Pseudo);
3324
3325     case Intrinsic::arm_neon_vtbx2:
3326       return SelectVTBL(N, true, 2, ARM::VTBX2);
3327     case Intrinsic::arm_neon_vtbx3:
3328       return SelectVTBL(N, true, 3, ARM::VTBX3Pseudo);
3329     case Intrinsic::arm_neon_vtbx4:
3330       return SelectVTBL(N, true, 4, ARM::VTBX4Pseudo);
3331     }
3332     break;
3333   }
3334
3335   case ARMISD::VTBL1: {
3336     SDLoc dl(N);
3337     EVT VT = N->getValueType(0);
3338     SmallVector<SDValue, 6> Ops;
3339
3340     Ops.push_back(N->getOperand(0));
3341     Ops.push_back(N->getOperand(1));
3342     Ops.push_back(getAL(CurDAG, dl));                // Predicate
3343     Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // Predicate Register
3344     return CurDAG->getMachineNode(ARM::VTBL1, dl, VT, Ops);
3345   }
3346   case ARMISD::VTBL2: {
3347     SDLoc dl(N);
3348     EVT VT = N->getValueType(0);
3349
3350     // Form a REG_SEQUENCE to force register allocation.
3351     SDValue V0 = N->getOperand(0);
3352     SDValue V1 = N->getOperand(1);
3353     SDValue RegSeq = SDValue(createDRegPairNode(MVT::v16i8, V0, V1), 0);
3354
3355     SmallVector<SDValue, 6> Ops;
3356     Ops.push_back(RegSeq);
3357     Ops.push_back(N->getOperand(2));
3358     Ops.push_back(getAL(CurDAG, dl));                // Predicate
3359     Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // Predicate Register
3360     return CurDAG->getMachineNode(ARM::VTBL2, dl, VT, Ops);
3361   }
3362
3363   case ISD::CONCAT_VECTORS:
3364     return SelectConcatVector(N);
3365   }
3366
3367   return SelectCode(N);
3368 }
3369
3370 // Inspect a register string of the form
3371 // cp<coprocessor>:<opc1>:c<CRn>:c<CRm>:<opc2> (32bit) or
3372 // cp<coprocessor>:<opc1>:c<CRm> (64bit) inspect the fields of the string
3373 // and obtain the integer operands from them, adding these operands to the
3374 // provided vector.
3375 static void getIntOperandsFromRegisterString(StringRef RegString,
3376                                              SelectionDAG *CurDAG, SDLoc DL,
3377                                              std::vector<SDValue>& Ops) {
3378   SmallVector<StringRef, 5> Fields;
3379   RegString.split(Fields, ":");
3380
3381   if (Fields.size() > 1) {
3382     bool AllIntFields = true;
3383
3384     for (StringRef Field : Fields) {
3385       // Need to trim out leading 'cp' characters and get the integer field.
3386       unsigned IntField;
3387       AllIntFields &= !Field.trim("CPcp").getAsInteger(10, IntField);
3388       Ops.push_back(CurDAG->getTargetConstant(IntField, DL, MVT::i32));
3389     }
3390
3391     assert(AllIntFields &&
3392             "Unexpected non-integer value in special register string.");
3393   }
3394 }
3395
3396 // Maps a Banked Register string to its mask value. The mask value returned is
3397 // for use in the MRSbanked / MSRbanked instruction nodes as the Banked Register
3398 // mask operand, which expresses which register is to be used, e.g. r8, and in
3399 // which mode it is to be used, e.g. usr. Returns -1 to signify that the string
3400 // was invalid.
3401 static inline int getBankedRegisterMask(StringRef RegString) {
3402   return StringSwitch<int>(RegString.lower())
3403           .Case("r8_usr", 0x00)
3404           .Case("r9_usr", 0x01)
3405           .Case("r10_usr", 0x02)
3406           .Case("r11_usr", 0x03)
3407           .Case("r12_usr", 0x04)
3408           .Case("sp_usr", 0x05)
3409           .Case("lr_usr", 0x06)
3410           .Case("r8_fiq", 0x08)
3411           .Case("r9_fiq", 0x09)
3412           .Case("r10_fiq", 0x0a)
3413           .Case("r11_fiq", 0x0b)
3414           .Case("r12_fiq", 0x0c)
3415           .Case("sp_fiq", 0x0d)
3416           .Case("lr_fiq", 0x0e)
3417           .Case("lr_irq", 0x10)
3418           .Case("sp_irq", 0x11)
3419           .Case("lr_svc", 0x12)
3420           .Case("sp_svc", 0x13)
3421           .Case("lr_abt", 0x14)
3422           .Case("sp_abt", 0x15)
3423           .Case("lr_und", 0x16)
3424           .Case("sp_und", 0x17)
3425           .Case("lr_mon", 0x1c)
3426           .Case("sp_mon", 0x1d)
3427           .Case("elr_hyp", 0x1e)
3428           .Case("sp_hyp", 0x1f)
3429           .Case("spsr_fiq", 0x2e)
3430           .Case("spsr_irq", 0x30)
3431           .Case("spsr_svc", 0x32)
3432           .Case("spsr_abt", 0x34)
3433           .Case("spsr_und", 0x36)
3434           .Case("spsr_mon", 0x3c)
3435           .Case("spsr_hyp", 0x3e)
3436           .Default(-1);
3437 }
3438
3439 // Maps a MClass special register string to its value for use in the
3440 // t2MRS_M / t2MSR_M instruction nodes as the SYSm value operand.
3441 // Returns -1 to signify that the string was invalid.
3442 static inline int getMClassRegisterSYSmValueMask(StringRef RegString) {
3443   return StringSwitch<int>(RegString.lower())
3444           .Case("apsr", 0x0)
3445           .Case("iapsr", 0x1)
3446           .Case("eapsr", 0x2)
3447           .Case("xpsr", 0x3)
3448           .Case("ipsr", 0x5)
3449           .Case("epsr", 0x6)
3450           .Case("iepsr", 0x7)
3451           .Case("msp", 0x8)
3452           .Case("psp", 0x9)
3453           .Case("primask", 0x10)
3454           .Case("basepri", 0x11)
3455           .Case("basepri_max", 0x12)
3456           .Case("faultmask", 0x13)
3457           .Case("control", 0x14)
3458           .Default(-1);
3459 }
3460
3461 // The flags here are common to those allowed for apsr in the A class cores and
3462 // those allowed for the special registers in the M class cores. Returns a
3463 // value representing which flags were present, -1 if invalid.
3464 static inline int getMClassFlagsMask(StringRef Flags) {
3465   if (Flags.empty())
3466     return 0x3;
3467
3468   return StringSwitch<int>(Flags)
3469           .Case("g", 0x1)
3470           .Case("nzcvq", 0x2)
3471           .Case("nzcvqg", 0x3)
3472           .Default(-1);
3473 }
3474
3475 static int getMClassRegisterMask(StringRef Reg, StringRef Flags, bool IsRead,
3476                                  const ARMSubtarget *Subtarget) {
3477   // Ensure that the register (without flags) was a valid M Class special
3478   // register.
3479   int SYSmvalue = getMClassRegisterSYSmValueMask(Reg);
3480   if (SYSmvalue == -1)
3481     return -1;
3482
3483   // basepri, basepri_max and faultmask are only valid for V7m.
3484   if (!Subtarget->hasV7Ops() && SYSmvalue >= 0x11 && SYSmvalue <= 0x13)
3485     return -1;
3486
3487   // If it was a read then we won't be expecting flags and so at this point
3488   // we can return the mask.
3489   if (IsRead) {
3490     assert (Flags.empty() && "Unexpected flags for reading M class register.");
3491     return SYSmvalue;
3492   }
3493
3494   // We know we are now handling a write so need to get the mask for the flags.
3495   int Mask = getMClassFlagsMask(Flags);
3496
3497   // Only apsr, iapsr, eapsr, xpsr can have flags. The other register values
3498   // shouldn't have flags present.
3499   if ((SYSmvalue < 0x4 && Mask == -1) || (SYSmvalue > 0x4 && !Flags.empty()))
3500     return -1;
3501
3502   // The _g and _nzcvqg versions are only valid if the DSP extension is
3503   // available.
3504   if (!Subtarget->hasThumb2DSP() && (Mask & 0x2))
3505     return -1;
3506
3507   // The register was valid so need to put the mask in the correct place
3508   // (the flags need to be in bits 11-10) and combine with the SYSmvalue to
3509   // construct the operand for the instruction node.
3510   if (SYSmvalue < 0x4)
3511     return SYSmvalue | Mask << 10;
3512
3513   return SYSmvalue;
3514 }
3515
3516 static int getARClassRegisterMask(StringRef Reg, StringRef Flags) {
3517   // The mask operand contains the special register (R Bit) in bit 4, whether
3518   // the register is spsr (R bit is 1) or one of cpsr/apsr (R bit is 0), and
3519   // bits 3-0 contains the fields to be accessed in the special register, set by
3520   // the flags provided with the register.
3521   int Mask = 0;
3522   if (Reg == "apsr") {
3523     // The flags permitted for apsr are the same flags that are allowed in
3524     // M class registers. We get the flag value and then shift the flags into
3525     // the correct place to combine with the mask.
3526     Mask = getMClassFlagsMask(Flags);
3527     if (Mask == -1)
3528       return -1;
3529     return Mask << 2;
3530   }
3531
3532   if (Reg != "cpsr" && Reg != "spsr") {
3533     return -1;
3534   }
3535
3536   // This is the same as if the flags were "fc"
3537   if (Flags.empty() || Flags == "all")
3538     return Mask | 0x9;
3539
3540   // Inspect the supplied flags string and set the bits in the mask for
3541   // the relevant and valid flags allowed for cpsr and spsr.
3542   for (char Flag : Flags) {
3543     int FlagVal;
3544     switch (Flag) {
3545       case 'c':
3546         FlagVal = 0x1;
3547         break;
3548       case 'x':
3549         FlagVal = 0x2;
3550         break;
3551       case 's':
3552         FlagVal = 0x4;
3553         break;
3554       case 'f':
3555         FlagVal = 0x8;
3556         break;
3557       default:
3558         FlagVal = 0;
3559     }
3560
3561     // This avoids allowing strings where the same flag bit appears twice.
3562     if (!FlagVal || (Mask & FlagVal))
3563       return -1;
3564     Mask |= FlagVal;
3565   }
3566
3567   // If the register is spsr then we need to set the R bit.
3568   if (Reg == "spsr")
3569     Mask |= 0x10;
3570
3571   return Mask;
3572 }
3573
3574 // Lower the read_register intrinsic to ARM specific DAG nodes
3575 // using the supplied metadata string to select the instruction node to use
3576 // and the registers/masks to construct as operands for the node.
3577 SDNode *ARMDAGToDAGISel::SelectReadRegister(SDNode *N){
3578   const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
3579   const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
3580   bool IsThumb2 = Subtarget->isThumb2();
3581   SDLoc DL(N);
3582
3583   std::vector<SDValue> Ops;
3584   getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops);
3585
3586   if (!Ops.empty()) {
3587     // If the special register string was constructed of fields (as defined
3588     // in the ACLE) then need to lower to MRC node (32 bit) or
3589     // MRRC node(64 bit), we can make the distinction based on the number of
3590     // operands we have.
3591     unsigned Opcode;
3592     SmallVector<EVT, 3> ResTypes;
3593     if (Ops.size() == 5){
3594       Opcode = IsThumb2 ? ARM::t2MRC : ARM::MRC;
3595       ResTypes.append({ MVT::i32, MVT::Other });
3596     } else {
3597       assert(Ops.size() == 3 &&
3598               "Invalid number of fields in special register string.");
3599       Opcode = IsThumb2 ? ARM::t2MRRC : ARM::MRRC;
3600       ResTypes.append({ MVT::i32, MVT::i32, MVT::Other });
3601     }
3602
3603     Ops.push_back(getAL(CurDAG, DL));
3604     Ops.push_back(CurDAG->getRegister(0, MVT::i32));
3605     Ops.push_back(N->getOperand(0));
3606     return CurDAG->getMachineNode(Opcode, DL, ResTypes, Ops);
3607   }
3608
3609   std::string SpecialReg = RegString->getString().lower();
3610
3611   int BankedReg = getBankedRegisterMask(SpecialReg);
3612   if (BankedReg != -1) {
3613     Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32),
3614             getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
3615             N->getOperand(0) };
3616     return CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSbanked : ARM::MRSbanked,
3617                                   DL, MVT::i32, MVT::Other, Ops);
3618   }
3619
3620   // The VFP registers are read by creating SelectionDAG nodes with opcodes
3621   // corresponding to the register that is being read from. So we switch on the
3622   // string to find which opcode we need to use.
3623   unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
3624                     .Case("fpscr", ARM::VMRS)
3625                     .Case("fpexc", ARM::VMRS_FPEXC)
3626                     .Case("fpsid", ARM::VMRS_FPSID)
3627                     .Case("mvfr0", ARM::VMRS_MVFR0)
3628                     .Case("mvfr1", ARM::VMRS_MVFR1)
3629                     .Case("mvfr2", ARM::VMRS_MVFR2)
3630                     .Case("fpinst", ARM::VMRS_FPINST)
3631                     .Case("fpinst2", ARM::VMRS_FPINST2)
3632                     .Default(0);
3633
3634   // If an opcode was found then we can lower the read to a VFP instruction.
3635   if (Opcode) {
3636     if (!Subtarget->hasVFP2())
3637       return nullptr;
3638     if (Opcode == ARM::VMRS_MVFR2 && !Subtarget->hasFPARMv8())
3639       return nullptr;
3640
3641     Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
3642             N->getOperand(0) };
3643     return CurDAG->getMachineNode(Opcode, DL, MVT::i32, MVT::Other, Ops);
3644   }
3645
3646   // If the target is M Class then need to validate that the register string
3647   // is an acceptable value, so check that a mask can be constructed from the
3648   // string.
3649   if (Subtarget->isMClass()) {
3650     int SYSmValue = getMClassRegisterMask(SpecialReg, "", true, Subtarget);
3651     if (SYSmValue == -1)
3652       return nullptr;
3653
3654     SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32),
3655                       getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
3656                       N->getOperand(0) };
3657     return CurDAG->getMachineNode(ARM::t2MRS_M, DL, MVT::i32, MVT::Other, Ops);
3658   }
3659
3660   // Here we know the target is not M Class so we need to check if it is one
3661   // of the remaining possible values which are apsr, cpsr or spsr.
3662   if (SpecialReg == "apsr" || SpecialReg == "cpsr") {
3663     Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
3664             N->getOperand(0) };
3665     return CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRS_AR : ARM::MRS, DL,
3666                                   MVT::i32, MVT::Other, Ops);
3667   }
3668
3669   if (SpecialReg == "spsr") {
3670     Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
3671             N->getOperand(0) };
3672     return CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSsys_AR : ARM::MRSsys,
3673                                   DL, MVT::i32, MVT::Other, Ops);
3674   }
3675
3676   return nullptr;
3677 }
3678
3679 // Lower the write_register intrinsic to ARM specific DAG nodes
3680 // using the supplied metadata string to select the instruction node to use
3681 // and the registers/masks to use in the nodes
3682 SDNode *ARMDAGToDAGISel::SelectWriteRegister(SDNode *N){
3683   const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
3684   const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
3685   bool IsThumb2 = Subtarget->isThumb2();
3686   SDLoc DL(N);
3687
3688   std::vector<SDValue> Ops;
3689   getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops);
3690
3691   if (!Ops.empty()) {
3692     // If the special register string was constructed of fields (as defined
3693     // in the ACLE) then need to lower to MCR node (32 bit) or
3694     // MCRR node(64 bit), we can make the distinction based on the number of
3695     // operands we have.
3696     unsigned Opcode;
3697     if (Ops.size() == 5) {
3698       Opcode = IsThumb2 ? ARM::t2MCR : ARM::MCR;
3699       Ops.insert(Ops.begin()+2, N->getOperand(2));
3700     } else {
3701       assert(Ops.size() == 3 &&
3702               "Invalid number of fields in special register string.");
3703       Opcode = IsThumb2 ? ARM::t2MCRR : ARM::MCRR;
3704       SDValue WriteValue[] = { N->getOperand(2), N->getOperand(3) };
3705       Ops.insert(Ops.begin()+2, WriteValue, WriteValue+2);
3706     }
3707
3708     Ops.push_back(getAL(CurDAG, DL));
3709     Ops.push_back(CurDAG->getRegister(0, MVT::i32));
3710     Ops.push_back(N->getOperand(0));
3711
3712     return CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops);
3713   }
3714
3715   std::string SpecialReg = RegString->getString().lower();
3716   int BankedReg = getBankedRegisterMask(SpecialReg);
3717   if (BankedReg != -1) {
3718     Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32), N->getOperand(2),
3719             getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
3720             N->getOperand(0) };
3721     return CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSRbanked : ARM::MSRbanked,
3722                                   DL, MVT::Other, Ops);
3723   }
3724
3725   // The VFP registers are written to by creating SelectionDAG nodes with
3726   // opcodes corresponding to the register that is being written. So we switch
3727   // on the string to find which opcode we need to use.
3728   unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
3729                     .Case("fpscr", ARM::VMSR)
3730                     .Case("fpexc", ARM::VMSR_FPEXC)
3731                     .Case("fpsid", ARM::VMSR_FPSID)
3732                     .Case("fpinst", ARM::VMSR_FPINST)
3733                     .Case("fpinst2", ARM::VMSR_FPINST2)
3734                     .Default(0);
3735
3736   if (Opcode) {
3737     if (!Subtarget->hasVFP2())
3738       return nullptr;
3739     Ops = { N->getOperand(2), getAL(CurDAG, DL),
3740             CurDAG->getRegister(0, MVT::i32), N->getOperand(0) };
3741     return CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops);
3742   }
3743
3744   SmallVector<StringRef, 5> Fields;
3745   StringRef(SpecialReg).split(Fields, "_", 1, false);
3746   std::string Reg = Fields[0].str();
3747   StringRef Flags = Fields.size() == 2 ? Fields[1] : "";
3748
3749   // If the target was M Class then need to validate the special register value
3750   // and retrieve the mask for use in the instruction node.
3751   if (Subtarget->isMClass()) {
3752     // basepri_max gets split so need to correct Reg and Flags.
3753     if (SpecialReg == "basepri_max") {
3754       Reg = SpecialReg;
3755       Flags = "";
3756     }
3757     int SYSmValue = getMClassRegisterMask(Reg, Flags, false, Subtarget);
3758     if (SYSmValue == -1)
3759       return nullptr;
3760
3761     SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32),
3762                       N->getOperand(2), getAL(CurDAG, DL),
3763                       CurDAG->getRegister(0, MVT::i32), N->getOperand(0) };
3764     return CurDAG->getMachineNode(ARM::t2MSR_M, DL, MVT::Other, Ops);
3765   }
3766
3767   // We then check to see if a valid mask can be constructed for one of the
3768   // register string values permitted for the A and R class cores. These values
3769   // are apsr, spsr and cpsr; these are also valid on older cores.
3770   int Mask = getARClassRegisterMask(Reg, Flags);
3771   if (Mask != -1) {
3772     Ops = { CurDAG->getTargetConstant(Mask, DL, MVT::i32), N->getOperand(2),
3773             getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
3774             N->getOperand(0) };
3775     return CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSR_AR : ARM::MSR,
3776                                   DL, MVT::Other, Ops);
3777   }
3778
3779   return nullptr;
3780 }
3781
3782 SDNode *ARMDAGToDAGISel::SelectInlineAsm(SDNode *N){
3783   std::vector<SDValue> AsmNodeOperands;
3784   unsigned Flag, Kind;
3785   bool Changed = false;
3786   unsigned NumOps = N->getNumOperands();
3787
3788   // Normally, i64 data is bounded to two arbitrary GRPs for "%r" constraint.
3789   // However, some instrstions (e.g. ldrexd/strexd in ARM mode) require
3790   // (even/even+1) GPRs and use %n and %Hn to refer to the individual regs
3791   // respectively. Since there is no constraint to explicitly specify a
3792   // reg pair, we use GPRPair reg class for "%r" for 64-bit data. For Thumb,
3793   // the 64-bit data may be referred by H, Q, R modifiers, so we still pack
3794   // them into a GPRPair.
3795
3796   SDLoc dl(N);
3797   SDValue Glue = N->getGluedNode() ? N->getOperand(NumOps-1)
3798                                    : SDValue(nullptr,0);
3799
3800   SmallVector<bool, 8> OpChanged;
3801   // Glue node will be appended late.
3802   for(unsigned i = 0, e = N->getGluedNode() ? NumOps - 1 : NumOps; i < e; ++i) {
3803     SDValue op = N->getOperand(i);
3804     AsmNodeOperands.push_back(op);
3805
3806     if (i < InlineAsm::Op_FirstOperand)
3807       continue;
3808
3809     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(i))) {
3810       Flag = C->getZExtValue();
3811       Kind = InlineAsm::getKind(Flag);
3812     }
3813     else
3814       continue;
3815
3816     // Immediate operands to inline asm in the SelectionDAG are modeled with
3817     // two operands. The first is a constant of value InlineAsm::Kind_Imm, and
3818     // the second is a constant with the value of the immediate. If we get here
3819     // and we have a Kind_Imm, skip the next operand, and continue.
3820     if (Kind == InlineAsm::Kind_Imm) {
3821       SDValue op = N->getOperand(++i);
3822       AsmNodeOperands.push_back(op);
3823       continue;
3824     }
3825
3826     unsigned NumRegs = InlineAsm::getNumOperandRegisters(Flag);
3827     if (NumRegs)
3828       OpChanged.push_back(false);
3829
3830     unsigned DefIdx = 0;
3831     bool IsTiedToChangedOp = false;
3832     // If it's a use that is tied with a previous def, it has no
3833     // reg class constraint.
3834     if (Changed && InlineAsm::isUseOperandTiedToDef(Flag, DefIdx))
3835       IsTiedToChangedOp = OpChanged[DefIdx];
3836
3837     if (Kind != InlineAsm::Kind_RegUse && Kind != InlineAsm::Kind_RegDef
3838         && Kind != InlineAsm::Kind_RegDefEarlyClobber)
3839       continue;
3840
3841     unsigned RC;
3842     bool HasRC = InlineAsm::hasRegClassConstraint(Flag, RC);
3843     if ((!IsTiedToChangedOp && (!HasRC || RC != ARM::GPRRegClassID))
3844         || NumRegs != 2)
3845       continue;
3846
3847     assert((i+2 < NumOps) && "Invalid number of operands in inline asm");
3848     SDValue V0 = N->getOperand(i+1);
3849     SDValue V1 = N->getOperand(i+2);
3850     unsigned Reg0 = cast<RegisterSDNode>(V0)->getReg();
3851     unsigned Reg1 = cast<RegisterSDNode>(V1)->getReg();
3852     SDValue PairedReg;
3853     MachineRegisterInfo &MRI = MF->getRegInfo();
3854
3855     if (Kind == InlineAsm::Kind_RegDef ||
3856         Kind == InlineAsm::Kind_RegDefEarlyClobber) {
3857       // Replace the two GPRs with 1 GPRPair and copy values from GPRPair to
3858       // the original GPRs.
3859
3860       unsigned GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
3861       PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
3862       SDValue Chain = SDValue(N,0);
3863
3864       SDNode *GU = N->getGluedUser();
3865       SDValue RegCopy = CurDAG->getCopyFromReg(Chain, dl, GPVR, MVT::Untyped,
3866                                                Chain.getValue(1));
3867
3868       // Extract values from a GPRPair reg and copy to the original GPR reg.
3869       SDValue Sub0 = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32,
3870                                                     RegCopy);
3871       SDValue Sub1 = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32,
3872                                                     RegCopy);
3873       SDValue T0 = CurDAG->getCopyToReg(Sub0, dl, Reg0, Sub0,
3874                                         RegCopy.getValue(1));
3875       SDValue T1 = CurDAG->getCopyToReg(Sub1, dl, Reg1, Sub1, T0.getValue(1));
3876
3877       // Update the original glue user.
3878       std::vector<SDValue> Ops(GU->op_begin(), GU->op_end()-1);
3879       Ops.push_back(T1.getValue(1));
3880       CurDAG->UpdateNodeOperands(GU, Ops);
3881     }
3882     else {
3883       // For Kind  == InlineAsm::Kind_RegUse, we first copy two GPRs into a
3884       // GPRPair and then pass the GPRPair to the inline asm.
3885       SDValue Chain = AsmNodeOperands[InlineAsm::Op_InputChain];
3886
3887       // As REG_SEQ doesn't take RegisterSDNode, we copy them first.
3888       SDValue T0 = CurDAG->getCopyFromReg(Chain, dl, Reg0, MVT::i32,
3889                                           Chain.getValue(1));
3890       SDValue T1 = CurDAG->getCopyFromReg(Chain, dl, Reg1, MVT::i32,
3891                                           T0.getValue(1));
3892       SDValue Pair = SDValue(createGPRPairNode(MVT::Untyped, T0, T1), 0);
3893
3894       // Copy REG_SEQ into a GPRPair-typed VR and replace the original two
3895       // i32 VRs of inline asm with it.
3896       unsigned GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
3897       PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
3898       Chain = CurDAG->getCopyToReg(T1, dl, GPVR, Pair, T1.getValue(1));
3899
3900       AsmNodeOperands[InlineAsm::Op_InputChain] = Chain;
3901       Glue = Chain.getValue(1);
3902     }
3903
3904     Changed = true;
3905
3906     if(PairedReg.getNode()) {
3907       OpChanged[OpChanged.size() -1 ] = true;
3908       Flag = InlineAsm::getFlagWord(Kind, 1 /* RegNum*/);
3909       if (IsTiedToChangedOp)
3910         Flag = InlineAsm::getFlagWordForMatchingOp(Flag, DefIdx);
3911       else
3912         Flag = InlineAsm::getFlagWordForRegClass(Flag, ARM::GPRPairRegClassID);
3913       // Replace the current flag.
3914       AsmNodeOperands[AsmNodeOperands.size() -1] = CurDAG->getTargetConstant(
3915           Flag, dl, MVT::i32);
3916       // Add the new register node and skip the original two GPRs.
3917       AsmNodeOperands.push_back(PairedReg);
3918       // Skip the next two GPRs.
3919       i += 2;
3920     }
3921   }
3922
3923   if (Glue.getNode())
3924     AsmNodeOperands.push_back(Glue);
3925   if (!Changed)
3926     return nullptr;
3927
3928   SDValue New = CurDAG->getNode(ISD::INLINEASM, SDLoc(N),
3929       CurDAG->getVTList(MVT::Other, MVT::Glue), AsmNodeOperands);
3930   New->setNodeId(-1);
3931   return New.getNode();
3932 }
3933
3934
3935 bool ARMDAGToDAGISel::
3936 SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
3937                              std::vector<SDValue> &OutOps) {
3938   switch(ConstraintID) {
3939   default:
3940     llvm_unreachable("Unexpected asm memory constraint");
3941   case InlineAsm::Constraint_i:
3942     // FIXME: It seems strange that 'i' is needed here since it's supposed to
3943     //        be an immediate and not a memory constraint.
3944     // Fallthrough.
3945   case InlineAsm::Constraint_m:
3946   case InlineAsm::Constraint_Q:
3947   case InlineAsm::Constraint_Um:
3948   case InlineAsm::Constraint_Un:
3949   case InlineAsm::Constraint_Uq:
3950   case InlineAsm::Constraint_Us:
3951   case InlineAsm::Constraint_Ut:
3952   case InlineAsm::Constraint_Uv:
3953   case InlineAsm::Constraint_Uy:
3954     // Require the address to be in a register.  That is safe for all ARM
3955     // variants and it is hard to do anything much smarter without knowing
3956     // how the operand is used.
3957     OutOps.push_back(Op);
3958     return false;
3959   }
3960   return true;
3961 }
3962
3963 /// createARMISelDag - This pass converts a legalized DAG into a
3964 /// ARM-specific DAG, ready for instruction scheduling.
3965 ///
3966 FunctionPass *llvm::createARMISelDag(ARMBaseTargetMachine &TM,
3967                                      CodeGenOpt::Level OptLevel) {
3968   return new ARMDAGToDAGISel(TM, OptLevel);
3969 }