lib/Target/ARM/ARMISelDAGToDAG.cpp

   1 //===-- ARMISelDAGToDAG.cpp - A dag to dag inst selector for ARM ----------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file defines an instruction selector for the ARM target.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #include "ARM.h"
  15 #include "ARMBaseInstrInfo.h"
  16 #include "ARMTargetMachine.h"
  17 #include "MCTargetDesc/ARMAddressingModes.h"
  18 #include "llvm/ADT/StringSwitch.h"
  19 #include "llvm/CodeGen/MachineFrameInfo.h"
  20 #include "llvm/CodeGen/MachineFunction.h"
  21 #include "llvm/CodeGen/MachineInstrBuilder.h"
  22 #include "llvm/CodeGen/MachineRegisterInfo.h"
  23 #include "llvm/CodeGen/SelectionDAG.h"
  24 #include "llvm/CodeGen/SelectionDAGISel.h"
  25 #include "llvm/IR/CallingConv.h"
  26 #include "llvm/IR/Constants.h"
  27 #include "llvm/IR/DerivedTypes.h"
  28 #include "llvm/IR/Function.h"
  29 #include "llvm/IR/Intrinsics.h"
  30 #include "llvm/IR/LLVMContext.h"
  31 #include "llvm/Support/CommandLine.h"
  32 #include "llvm/Support/Compiler.h"
  33 #include "llvm/Support/Debug.h"
  34 #include "llvm/Support/ErrorHandling.h"
  35 #include "llvm/Target/TargetLowering.h"
  36 #include "llvm/Target/TargetOptions.h"
  37
  38 using namespace llvm;
  39
  40 #define DEBUG_TYPE "arm-isel"
  41
  42 static cl::opt<bool>
  43 DisableShifterOp("disable-shifter-op", cl::Hidden,
  44   cl::desc("Disable isel of shifter-op"),
  45   cl::init(false));
  46
  47 static cl::opt<bool>
  48 CheckVMLxHazard("check-vmlx-hazard", cl::Hidden,
  49   cl::desc("Check fp vmla / vmls hazard at isel time"),
  50   cl::init(true));
  51
  52 //===--------------------------------------------------------------------===//
  53 /// ARMDAGToDAGISel - ARM specific code to select ARM machine
  54 /// instructions for SelectionDAG operations.
  55 ///
  56 namespace {
  57
  58 enum AddrMode2Type {
  59   AM2_BASE, // Simple AM2 (+-imm12)
  60   AM2_SHOP  // Shifter-op AM2
  61 };
  62
  63 class ARMDAGToDAGISel : public SelectionDAGISel {
  64   /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
  65   /// make the right decision when generating code for different targets.
  66   const ARMSubtarget *Subtarget;
  67
  68 public:
  69   explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm, CodeGenOpt::Level OptLevel)
  70       : SelectionDAGISel(tm, OptLevel) {}
  71
  72   bool runOnMachineFunction(MachineFunction &MF) override {
  73     // Reset the subtarget each time through.
  74     Subtarget = &MF.getSubtarget<ARMSubtarget>();
  75     SelectionDAGISel::runOnMachineFunction(MF);
  76     return true;
  77   }
  78
  79   const char *getPassName() const override {
  80     return "ARM Instruction Selection";
  81   }
  82
  83   void PreprocessISelDAG() override;
  84
  85   /// getI32Imm - Return a target constant of type i32 with the specified
  86   /// value.
  87   inline SDValue getI32Imm(unsigned Imm, SDLoc dl) {
  88     return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
  89   }
  90
  91   SDNode *Select(SDNode *N) override;
  92
  93
  94   bool hasNoVMLxHazardUse(SDNode *N) const;
  95   bool isShifterOpProfitable(const SDValue &Shift,
  96                              ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt);
  97   bool SelectRegShifterOperand(SDValue N, SDValue &A,
  98                                SDValue &B, SDValue &C,
  99                                bool CheckProfitability = true);
 100   bool SelectImmShifterOperand(SDValue N, SDValue &A,
 101                                SDValue &B, bool CheckProfitability = true);
 102   bool SelectShiftRegShifterOperand(SDValue N, SDValue &A,
 103                                     SDValue &B, SDValue &C) {
 104     // Don't apply the profitability check
 105     return SelectRegShifterOperand(N, A, B, C, false);
 106   }
 107   bool SelectShiftImmShifterOperand(SDValue N, SDValue &A,
 108                                     SDValue &B) {
 109     // Don't apply the profitability check
 110     return SelectImmShifterOperand(N, A, B, false);
 111   }
 112
 113   bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
 114   bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc);
 115
 116   AddrMode2Type SelectAddrMode2Worker(SDValue N, SDValue &Base,
 117                                       SDValue &Offset, SDValue &Opc);
 118   bool SelectAddrMode2Base(SDValue N, SDValue &Base, SDValue &Offset,
 119                            SDValue &Opc) {
 120     return SelectAddrMode2Worker(N, Base, Offset, Opc) == AM2_BASE;
 121   }
 122
 123   bool SelectAddrMode2ShOp(SDValue N, SDValue &Base, SDValue &Offset,
 124                            SDValue &Opc) {
 125     return SelectAddrMode2Worker(N, Base, Offset, Opc) == AM2_SHOP;
 126   }
 127
 128   bool SelectAddrMode2(SDValue N, SDValue &Base, SDValue &Offset,
 129                        SDValue &Opc) {
 130     SelectAddrMode2Worker(N, Base, Offset, Opc);
 131 //    return SelectAddrMode2ShOp(N, Base, Offset, Opc);
 132     // This always matches one way or another.
 133     return true;
 134   }
 135
 136   bool SelectCMOVPred(SDValue N, SDValue &Pred, SDValue &Reg) {
 137     const ConstantSDNode *CN = cast<ConstantSDNode>(N);
 138     Pred = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(N), MVT::i32);
 139     Reg = CurDAG->getRegister(ARM::CPSR, MVT::i32);
 140     return true;
 141   }
 142
 143   bool SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
 144                              SDValue &Offset, SDValue &Opc);
 145   bool SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
 146                              SDValue &Offset, SDValue &Opc);
 147   bool SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
 148                              SDValue &Offset, SDValue &Opc);
 149   bool SelectAddrOffsetNone(SDValue N, SDValue &Base);
 150   bool SelectAddrMode3(SDValue N, SDValue &Base,
 151                        SDValue &Offset, SDValue &Opc);
 152   bool SelectAddrMode3Offset(SDNode *Op, SDValue N,
 153                              SDValue &Offset, SDValue &Opc);
 154   bool SelectAddrMode5(SDValue N, SDValue &Base,
 155                        SDValue &Offset);
 156   bool SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,SDValue &Align);
 157   bool SelectAddrMode6Offset(SDNode *Op, SDValue N, SDValue &Offset);
 158
 159   bool SelectAddrModePC(SDValue N, SDValue &Offset, SDValue &Label);
 160
 161   // Thumb Addressing Modes:
 162   bool SelectThumbAddrModeRR(SDValue N, SDValue &Base, SDValue &Offset);
 163   bool SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, SDValue &Base,
 164                                 SDValue &OffImm);
 165   bool SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
 166                                  SDValue &OffImm);
 167   bool SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
 168                                  SDValue &OffImm);
 169   bool SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
 170                                  SDValue &OffImm);
 171   bool SelectThumbAddrModeSP(SDValue N, SDValue &Base, SDValue &OffImm);
 172
 173   // Thumb 2 Addressing Modes:
 174   bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
 175   bool SelectT2AddrModeImm8(SDValue N, SDValue &Base,
 176                             SDValue &OffImm);
 177   bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
 178                                  SDValue &OffImm);
 179   bool SelectT2AddrModeSoReg(SDValue N, SDValue &Base,
 180                              SDValue &OffReg, SDValue &ShImm);
 181   bool SelectT2AddrModeExclusive(SDValue N, SDValue &Base, SDValue &OffImm);
 182
 183   inline bool is_so_imm(unsigned Imm) const {
 184     return ARM_AM::getSOImmVal(Imm) != -1;
 185   }
 186
 187   inline bool is_so_imm_not(unsigned Imm) const {
 188     return ARM_AM::getSOImmVal(~Imm) != -1;
 189   }
 190
 191   inline bool is_t2_so_imm(unsigned Imm) const {
 192     return ARM_AM::getT2SOImmVal(Imm) != -1;
 193   }
 194
 195   inline bool is_t2_so_imm_not(unsigned Imm) const {
 196     return ARM_AM::getT2SOImmVal(~Imm) != -1;
 197   }
 198
 199   // Include the pieces autogenerated from the target description.
 200 #include "ARMGenDAGISel.inc"
 201
 202 private:
 203   /// SelectARMIndexedLoad - Indexed (pre/post inc/dec) load matching code for
 204   /// ARM.
 205   SDNode *SelectARMIndexedLoad(SDNode *N);
 206   SDNode *SelectT2IndexedLoad(SDNode *N);
 207
 208   /// SelectVLD - Select NEON load intrinsics.  NumVecs should be
 209   /// 1, 2, 3 or 4.  The opcode arrays specify the instructions used for
 210   /// loads of D registers and even subregs and odd subregs of Q registers.
 211   /// For NumVecs <= 2, QOpcodes1 is not used.
 212   SDNode *SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
 213                     const uint16_t *DOpcodes,
 214                     const uint16_t *QOpcodes0, const uint16_t *QOpcodes1);
 215
 216   /// SelectVST - Select NEON store intrinsics.  NumVecs should
 217   /// be 1, 2, 3 or 4.  The opcode arrays specify the instructions used for
 218   /// stores of D registers and even subregs and odd subregs of Q registers.
 219   /// For NumVecs <= 2, QOpcodes1 is not used.
 220   SDNode *SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
 221                     const uint16_t *DOpcodes,
 222                     const uint16_t *QOpcodes0, const uint16_t *QOpcodes1);
 223
 224   /// SelectVLDSTLane - Select NEON load/store lane intrinsics.  NumVecs should
 225   /// be 2, 3 or 4.  The opcode arrays specify the instructions used for
 226   /// load/store of D registers and Q registers.
 227   SDNode *SelectVLDSTLane(SDNode *N, bool IsLoad,
 228                           bool isUpdating, unsigned NumVecs,
 229                           const uint16_t *DOpcodes, const uint16_t *QOpcodes);
 230
 231   /// SelectVLDDup - Select NEON load-duplicate intrinsics.  NumVecs
 232   /// should be 2, 3 or 4.  The opcode array specifies the instructions used
 233   /// for loading D registers.  (Q registers are not supported.)
 234   SDNode *SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs,
 235                        const uint16_t *Opcodes);
 236
 237   /// SelectVTBL - Select NEON VTBL and VTBX intrinsics.  NumVecs should be 2,
 238   /// 3 or 4.  These are custom-selected so that a REG_SEQUENCE can be
 239   /// generated to force the table registers to be consecutive.
 240   SDNode *SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs, unsigned Opc);
 241
 242   /// SelectV6T2BitfieldExtractOp - Select SBFX/UBFX instructions for ARM.
 243   SDNode *SelectV6T2BitfieldExtractOp(SDNode *N, bool isSigned);
 244
 245   // Select special operations if node forms integer ABS pattern
 246   SDNode *SelectABSOp(SDNode *N);
 247
 248   SDNode *SelectReadRegister(SDNode *N);
 249   SDNode *SelectWriteRegister(SDNode *N);
 250
 251   SDNode *SelectInlineAsm(SDNode *N);
 252
 253   SDNode *SelectConcatVector(SDNode *N);
 254
 255   /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
 256   /// inline asm expressions.
 257   bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
 258                                     std::vector<SDValue> &OutOps) override;
 259
 260   // Form pairs of consecutive R, S, D, or Q registers.
 261   SDNode *createGPRPairNode(EVT VT, SDValue V0, SDValue V1);
 262   SDNode *createSRegPairNode(EVT VT, SDValue V0, SDValue V1);
 263   SDNode *createDRegPairNode(EVT VT, SDValue V0, SDValue V1);
 264   SDNode *createQRegPairNode(EVT VT, SDValue V0, SDValue V1);
 265
 266   // Form sequences of 4 consecutive S, D, or Q registers.
 267   SDNode *createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
 268   SDNode *createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
 269   SDNode *createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
 270
 271   // Get the alignment operand for a NEON VLD or VST instruction.
 272   SDValue GetVLDSTAlign(SDValue Align, SDLoc dl, unsigned NumVecs,
 273                         bool is64BitVector);
 274
 275   /// Returns the number of instructions required to materialize the given
 276   /// constant in a register, or 3 if a literal pool load is needed.
 277   unsigned ConstantMaterializationCost(unsigned Val) const;
 278
 279   /// Checks if N is a multiplication by a constant where we can extract out a
 280   /// power of two from the constant so that it can be used in a shift, but only
 281   /// if it simplifies the materialization of the constant. Returns true if it
 282   /// is, and assigns to PowerOfTwo the power of two that should be extracted
 283   /// out and to NewMulConst the new constant to be multiplied by.
 284   bool canExtractShiftFromMul(const SDValue &N, unsigned MaxShift,
 285                               unsigned &PowerOfTwo, SDValue &NewMulConst) const;
 286
 287   /// Replace N with M in CurDAG, in a way that also ensures that M gets
 288   /// selected when N would have been selected.
 289   void replaceDAGValue(const SDValue &N, SDValue M);
 290 };
 291 }
 292
 293 /// isInt32Immediate - This method tests to see if the node is a 32-bit constant
 294 /// operand. If so Imm will receive the 32-bit value.
 295 static bool isInt32Immediate(SDNode *N, unsigned &Imm) {
 296   if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) {
 297     Imm = cast<ConstantSDNode>(N)->getZExtValue();
 298     return true;
 299   }
 300   return false;
 301 }
 302
 303 // isInt32Immediate - This method tests to see if a constant operand.
 304 // If so Imm will receive the 32 bit value.
 305 static bool isInt32Immediate(SDValue N, unsigned &Imm) {
 306   return isInt32Immediate(N.getNode(), Imm);
 307 }
 308
 309 // isOpcWithIntImmediate - This method tests to see if the node is a specific
 310 // opcode and that it has a immediate integer right operand.
 311 // If so Imm will receive the 32 bit value.
 312 static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
 313   return N->getOpcode() == Opc &&
 314          isInt32Immediate(N->getOperand(1).getNode(), Imm);
 315 }
 316
 317 /// \brief Check whether a particular node is a constant value representable as
 318 /// (N * Scale) where (N in [\p RangeMin, \p RangeMax).
 319 ///
 320 /// \param ScaledConstant [out] - On success, the pre-scaled constant value.
 321 static bool isScaledConstantInRange(SDValue Node, int Scale,
 322                                     int RangeMin, int RangeMax,
 323                                     int &ScaledConstant) {
 324   assert(Scale > 0 && "Invalid scale!");
 325
 326   // Check that this is a constant.
 327   const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Node);
 328   if (!C)
 329     return false;
 330
 331   ScaledConstant = (int) C->getZExtValue();
 332   if ((ScaledConstant % Scale) != 0)
 333     return false;
 334
 335   ScaledConstant /= Scale;
 336   return ScaledConstant >= RangeMin && ScaledConstant < RangeMax;
 337 }
 338
 339 void ARMDAGToDAGISel::PreprocessISelDAG() {
 340   if (!Subtarget->hasV6T2Ops())
 341     return;
 342
 343   bool isThumb2 = Subtarget->isThumb();
 344   for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
 345        E = CurDAG->allnodes_end(); I != E; ) {
 346     SDNode *N = I++;  // Preincrement iterator to avoid invalidation issues.
 347
 348     if (N->getOpcode() != ISD::ADD)
 349       continue;
 350
 351     // Look for (add X1, (and (srl X2, c1), c2)) where c2 is constant with
 352     // leading zeros, followed by consecutive set bits, followed by 1 or 2
 353     // trailing zeros, e.g. 1020.
 354     // Transform the expression to
 355     // (add X1, (shl (and (srl X2, c1), (c2>>tz)), tz)) where tz is the number
 356     // of trailing zeros of c2. The left shift would be folded as an shifter
 357     // operand of 'add' and the 'and' and 'srl' would become a bits extraction
 358     // node (UBFX).
 359
 360     SDValue N0 = N->getOperand(0);
 361     SDValue N1 = N->getOperand(1);
 362     unsigned And_imm = 0;
 363     if (!isOpcWithIntImmediate(N1.getNode(), ISD::AND, And_imm)) {
 364       if (isOpcWithIntImmediate(N0.getNode(), ISD::AND, And_imm))
 365         std::swap(N0, N1);
 366     }
 367     if (!And_imm)
 368       continue;
 369
 370     // Check if the AND mask is an immediate of the form: 000.....1111111100
 371     unsigned TZ = countTrailingZeros(And_imm);
 372     if (TZ != 1 && TZ != 2)
 373       // Be conservative here. Shifter operands aren't always free. e.g. On
 374       // Swift, left shifter operand of 1 / 2 for free but others are not.
 375       // e.g.
 376       //  ubfx   r3, r1, #16, #8
 377       //  ldr.w  r3, [r0, r3, lsl #2]
 378       // vs.
 379       //  mov.w  r9, #1020
 380       //  and.w  r2, r9, r1, lsr #14
 381       //  ldr    r2, [r0, r2]
 382       continue;
 383     And_imm >>= TZ;
 384     if (And_imm & (And_imm + 1))
 385       continue;
 386
 387     // Look for (and (srl X, c1), c2).
 388     SDValue Srl = N1.getOperand(0);
 389     unsigned Srl_imm = 0;
 390     if (!isOpcWithIntImmediate(Srl.getNode(), ISD::SRL, Srl_imm) ||
 391         (Srl_imm <= 2))
 392       continue;
 393
 394     // Make sure first operand is not a shifter operand which would prevent
 395     // folding of the left shift.
 396     SDValue CPTmp0;
 397     SDValue CPTmp1;
 398     SDValue CPTmp2;
 399     if (isThumb2) {
 400       if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1))
 401         continue;
 402     } else {
 403       if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1) ||
 404           SelectRegShifterOperand(N0, CPTmp0, CPTmp1, CPTmp2))
 405         continue;
 406     }
 407
 408     // Now make the transformation.
 409     Srl = CurDAG->getNode(ISD::SRL, SDLoc(Srl), MVT::i32,
 410                           Srl.getOperand(0),
 411                           CurDAG->getConstant(Srl_imm + TZ, SDLoc(Srl),
 412                                               MVT::i32));
 413     N1 = CurDAG->getNode(ISD::AND, SDLoc(N1), MVT::i32,
 414                          Srl,
 415                          CurDAG->getConstant(And_imm, SDLoc(Srl), MVT::i32));
 416     N1 = CurDAG->getNode(ISD::SHL, SDLoc(N1), MVT::i32,
 417                          N1, CurDAG->getConstant(TZ, SDLoc(Srl), MVT::i32));
 418     CurDAG->UpdateNodeOperands(N, N0, N1);
 419   }
 420 }
 421
 422 /// hasNoVMLxHazardUse - Return true if it's desirable to select a FP MLA / MLS
 423 /// node. VFP / NEON fp VMLA / VMLS instructions have special RAW hazards (at
 424 /// least on current ARM implementations) which should be avoidded.
 425 bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const {
 426   if (OptLevel == CodeGenOpt::None)
 427     return true;
 428
 429   if (!CheckVMLxHazard)
 430     return true;
 431
 432   if (!Subtarget->isCortexA7() && !Subtarget->isCortexA8() &&
 433       !Subtarget->isCortexA9() && !Subtarget->isSwift())
 434     return true;
 435
 436   if (!N->hasOneUse())
 437     return false;
 438
 439   SDNode *Use = *N->use_begin();
 440   if (Use->getOpcode() == ISD::CopyToReg)
 441     return true;
 442   if (Use->isMachineOpcode()) {
 443     const ARMBaseInstrInfo *TII = static_cast<const ARMBaseInstrInfo *>(
 444         CurDAG->getSubtarget().getInstrInfo());
 445
 446     const MCInstrDesc &MCID = TII->get(Use->getMachineOpcode());
 447     if (MCID.mayStore())
 448       return true;
 449     unsigned Opcode = MCID.getOpcode();
 450     if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
 451       return true;
 452     // vmlx feeding into another vmlx. We actually want to unfold
 453     // the use later in the MLxExpansion pass. e.g.
 454     // vmla
 455     // vmla (stall 8 cycles)
 456     //
 457     // vmul (5 cycles)
 458     // vadd (5 cycles)
 459     // vmla
 460     // This adds up to about 18 - 19 cycles.
 461     //
 462     // vmla
 463     // vmul (stall 4 cycles)
 464     // vadd adds up to about 14 cycles.
 465     return TII->isFpMLxInstruction(Opcode);
 466   }
 467
 468   return false;
 469 }
 470
 471 bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift,
 472                                             ARM_AM::ShiftOpc ShOpcVal,
 473                                             unsigned ShAmt) {
 474   if (!Subtarget->isLikeA9() && !Subtarget->isSwift())
 475     return true;
 476   if (Shift.hasOneUse())
 477     return true;
 478   // R << 2 is free.
 479   return ShOpcVal == ARM_AM::lsl &&
 480          (ShAmt == 2 || (Subtarget->isSwift() && ShAmt == 1));
 481 }
 482
 483 unsigned ARMDAGToDAGISel::ConstantMaterializationCost(unsigned Val) const {
 484   if (Subtarget->isThumb()) {
 485     if (Val <= 255) return 1;                               // MOV
 486     if (Subtarget->hasV6T2Ops() && Val <= 0xffff) return 1; // MOVW
 487     if (~Val <= 255) return 2;                              // MOV + MVN
 488     if (ARM_AM::isThumbImmShiftedVal(Val)) return 2;        // MOV + LSL
 489   } else {
 490     if (ARM_AM::getSOImmVal(Val) != -1) return 1;           // MOV
 491     if (ARM_AM::getSOImmVal(~Val) != -1) return 1;          // MVN
 492     if (Subtarget->hasV6T2Ops() && Val <= 0xffff) return 1; // MOVW
 493     if (ARM_AM::isSOImmTwoPartVal(Val)) return 2;           // two instrs
 494   }
 495   if (Subtarget->useMovt(*MF)) return 2; // MOVW + MOVT
 496   return 3; // Literal pool load
 497 }
 498
 499 bool ARMDAGToDAGISel::canExtractShiftFromMul(const SDValue &N,
 500                                              unsigned MaxShift,
 501                                              unsigned &PowerOfTwo,
 502                                              SDValue &NewMulConst) const {
 503   assert(N.getOpcode() == ISD::MUL);
 504   assert(MaxShift > 0);
 505
 506   // If the multiply is used in more than one place then changing the constant
 507   // will make other uses incorrect, so don't.
 508   if (!N.hasOneUse()) return false;
 509   // Check if the multiply is by a constant
 510   ConstantSDNode *MulConst = dyn_cast<ConstantSDNode>(N.getOperand(1));
 511   if (!MulConst) return false;
 512   // If the constant is used in more than one place then modifying it will mean
 513   // we need to materialize two constants instead of one, which is a bad idea.
 514   if (!MulConst->hasOneUse()) return false;
 515   unsigned MulConstVal = MulConst->getZExtValue();
 516   if (MulConstVal == 0) return false;
 517
 518   // Find the largest power of 2 that MulConstVal is a multiple of
 519   PowerOfTwo = MaxShift;
 520   while ((MulConstVal % (1 << PowerOfTwo)) != 0) {
 521     --PowerOfTwo;
 522     if (PowerOfTwo == 0) return false;
 523   }
 524
 525   // Only optimise if the new cost is better
 526   unsigned NewMulConstVal = MulConstVal / (1 << PowerOfTwo);
 527   NewMulConst = CurDAG->getConstant(NewMulConstVal, SDLoc(N), MVT::i32);
 528   unsigned OldCost = ConstantMaterializationCost(MulConstVal);
 529   unsigned NewCost = ConstantMaterializationCost(NewMulConstVal);
 530   return NewCost < OldCost;
 531 }
 532
 533 void ARMDAGToDAGISel::replaceDAGValue(const SDValue &N, SDValue M) {
 534   CurDAG->RepositionNode(N.getNode(), M.getNode());
 535   CurDAG->ReplaceAllUsesWith(N, M);
 536 }
 537
 538 bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N,
 539                                               SDValue &BaseReg,
 540                                               SDValue &Opc,
 541                                               bool CheckProfitability) {
 542   if (DisableShifterOp)
 543     return false;
 544
 545   // If N is a multiply-by-constant and it's profitable to extract a shift and
 546   // use it in a shifted operand do so.
 547   if (N.getOpcode() == ISD::MUL) {
 548     unsigned PowerOfTwo = 0;
 549     SDValue NewMulConst;
 550     if (canExtractShiftFromMul(N, 31, PowerOfTwo, NewMulConst)) {
 551       replaceDAGValue(N.getOperand(1), NewMulConst);
 552       BaseReg = N;
 553       Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ARM_AM::lsl,
 554                                                           PowerOfTwo),
 555                                       SDLoc(N), MVT::i32);
 556       return true;
 557     }
 558   }
 559
 560   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
 561
 562   // Don't match base register only case. That is matched to a separate
 563   // lower complexity pattern with explicit register operand.
 564   if (ShOpcVal == ARM_AM::no_shift) return false;
 565
 566   BaseReg = N.getOperand(0);
 567   unsigned ShImmVal = 0;
 568   ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
 569   if (!RHS) return false;
 570   ShImmVal = RHS->getZExtValue() & 31;
 571   Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
 572                                   SDLoc(N), MVT::i32);
 573   return true;
 574 }
 575
 576 bool ARMDAGToDAGISel::SelectRegShifterOperand(SDValue N,
 577                                               SDValue &BaseReg,
 578                                               SDValue &ShReg,
 579                                               SDValue &Opc,
 580                                               bool CheckProfitability) {
 581   if (DisableShifterOp)
 582     return false;
 583
 584   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
 585
 586   // Don't match base register only case. That is matched to a separate
 587   // lower complexity pattern with explicit register operand.
 588   if (ShOpcVal == ARM_AM::no_shift) return false;
 589
 590   BaseReg = N.getOperand(0);
 591   unsigned ShImmVal = 0;
 592   ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
 593   if (RHS) return false;
 594
 595   ShReg = N.getOperand(1);
 596   if (CheckProfitability && !isShifterOpProfitable(N, ShOpcVal, ShImmVal))
 597     return false;
 598   Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
 599                                   SDLoc(N), MVT::i32);
 600   return true;
 601 }
 602
 603
 604 bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,
 605                                           SDValue &Base,
 606                                           SDValue &OffImm) {
 607   // Match simple R + imm12 operands.
 608
 609   // Base only.
 610   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
 611       !CurDAG->isBaseWithConstantOffset(N)) {
 612     if (N.getOpcode() == ISD::FrameIndex) {
 613       // Match frame index.
 614       int FI = cast<FrameIndexSDNode>(N)->getIndex();
 615       Base = CurDAG->getTargetFrameIndex(
 616           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
 617       OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
 618       return true;
 619     }
 620
 621     if (N.getOpcode() == ARMISD::Wrapper &&
 622         N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) {
 623       Base = N.getOperand(0);
 624     } else
 625       Base = N;
 626     OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
 627     return true;
 628   }
 629
 630   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
 631     int RHSC = (int)RHS->getSExtValue();
 632     if (N.getOpcode() == ISD::SUB)
 633       RHSC = -RHSC;
 634
 635     if (RHSC > -0x1000 && RHSC < 0x1000) { // 12 bits
 636       Base   = N.getOperand(0);
 637       if (Base.getOpcode() == ISD::FrameIndex) {
 638         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
 639         Base = CurDAG->getTargetFrameIndex(
 640             FI, TLI->getPointerTy(CurDAG->getDataLayout()));
 641       }
 642       OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
 643       return true;
 644     }
 645   }
 646
 647   // Base only.
 648   Base = N;
 649   OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
 650   return true;
 651 }
 652
 653
 654
 655 bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset,
 656                                       SDValue &Opc) {
 657   if (N.getOpcode() == ISD::MUL &&
 658       ((!Subtarget->isLikeA9() && !Subtarget->isSwift()) || N.hasOneUse())) {
 659     if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
 660       // X * [3,5,9] -> X + X * [2,4,8] etc.
 661       int RHSC = (int)RHS->getZExtValue();
 662       if (RHSC & 1) {
 663         RHSC = RHSC & ~1;
 664         ARM_AM::AddrOpc AddSub = ARM_AM::add;
 665         if (RHSC < 0) {
 666           AddSub = ARM_AM::sub;
 667           RHSC = - RHSC;
 668         }
 669         if (isPowerOf2_32(RHSC)) {
 670           unsigned ShAmt = Log2_32(RHSC);
 671           Base = Offset = N.getOperand(0);
 672           Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt,
 673                                                             ARM_AM::lsl),
 674                                           SDLoc(N), MVT::i32);
 675           return true;
 676         }
 677       }
 678     }
 679   }
 680
 681   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
 682       // ISD::OR that is equivalent to an ISD::ADD.
 683       !CurDAG->isBaseWithConstantOffset(N))
 684     return false;
 685
 686   // Leave simple R +/- imm12 operands for LDRi12
 687   if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::OR) {
 688     int RHSC;
 689     if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
 690                                 -0x1000+1, 0x1000, RHSC)) // 12 bits.
 691       return false;
 692   }
 693
 694   // Otherwise this is R +/- [possibly shifted] R.
 695   ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::SUB ? ARM_AM::sub:ARM_AM::add;
 696   ARM_AM::ShiftOpc ShOpcVal =
 697     ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode());
 698   unsigned ShAmt = 0;
 699
 700   Base   = N.getOperand(0);
 701   Offset = N.getOperand(1);
 702
 703   if (ShOpcVal != ARM_AM::no_shift) {
 704     // Check to see if the RHS of the shift is a constant, if not, we can't fold
 705     // it.
 706     if (ConstantSDNode *Sh =
 707            dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) {
 708       ShAmt = Sh->getZExtValue();
 709       if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt))
 710         Offset = N.getOperand(1).getOperand(0);
 711       else {
 712         ShAmt = 0;
 713         ShOpcVal = ARM_AM::no_shift;
 714       }
 715     } else {
 716       ShOpcVal = ARM_AM::no_shift;
 717     }
 718   }
 719
 720   // Try matching (R shl C) + (R).
 721   if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift &&
 722       !(Subtarget->isLikeA9() || Subtarget->isSwift() ||
 723         N.getOperand(0).hasOneUse())) {
 724     ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode());
 725     if (ShOpcVal != ARM_AM::no_shift) {
 726       // Check to see if the RHS of the shift is a constant, if not, we can't
 727       // fold it.
 728       if (ConstantSDNode *Sh =
 729           dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) {
 730         ShAmt = Sh->getZExtValue();
 731         if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) {
 732           Offset = N.getOperand(0).getOperand(0);
 733           Base = N.getOperand(1);
 734         } else {
 735           ShAmt = 0;
 736           ShOpcVal = ARM_AM::no_shift;
 737         }
 738       } else {
 739         ShOpcVal = ARM_AM::no_shift;
 740       }
 741     }
 742   }
 743
 744   // If Offset is a multiply-by-constant and it's profitable to extract a shift
 745   // and use it in a shifted operand do so.
 746   if (Offset.getOpcode() == ISD::MUL) {
 747     unsigned PowerOfTwo = 0;
 748     SDValue NewMulConst;
 749     if (canExtractShiftFromMul(Offset, 31, PowerOfTwo, NewMulConst)) {
 750       replaceDAGValue(Offset.getOperand(1), NewMulConst);
 751       ShAmt = PowerOfTwo;
 752       ShOpcVal = ARM_AM::lsl;
 753     }
 754   }
 755
 756   Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
 757                                   SDLoc(N), MVT::i32);
 758   return true;
 759 }
 760
 761
 762 //-----
 763
 764 AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N,
 765                                                      SDValue &Base,
 766                                                      SDValue &Offset,
 767                                                      SDValue &Opc) {
 768   if (N.getOpcode() == ISD::MUL &&
 769       (!(Subtarget->isLikeA9() || Subtarget->isSwift()) || N.hasOneUse())) {
 770     if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
 771       // X * [3,5,9] -> X + X * [2,4,8] etc.
 772       int RHSC = (int)RHS->getZExtValue();
 773       if (RHSC & 1) {
 774         RHSC = RHSC & ~1;
 775         ARM_AM::AddrOpc AddSub = ARM_AM::add;
 776         if (RHSC < 0) {
 777           AddSub = ARM_AM::sub;
 778           RHSC = - RHSC;
 779         }
 780         if (isPowerOf2_32(RHSC)) {
 781           unsigned ShAmt = Log2_32(RHSC);
 782           Base = Offset = N.getOperand(0);
 783           Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt,
 784                                                             ARM_AM::lsl),
 785                                           SDLoc(N), MVT::i32);
 786           return AM2_SHOP;
 787         }
 788       }
 789     }
 790   }
 791
 792   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
 793       // ISD::OR that is equivalent to an ADD.
 794       !CurDAG->isBaseWithConstantOffset(N)) {
 795     Base = N;
 796     if (N.getOpcode() == ISD::FrameIndex) {
 797       int FI = cast<FrameIndexSDNode>(N)->getIndex();
 798       Base = CurDAG->getTargetFrameIndex(
 799           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
 800     } else if (N.getOpcode() == ARMISD::Wrapper &&
 801                N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) {
 802       Base = N.getOperand(0);
 803     }
 804     Offset = CurDAG->getRegister(0, MVT::i32);
 805     Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(ARM_AM::add, 0,
 806                                                       ARM_AM::no_shift),
 807                                     SDLoc(N), MVT::i32);
 808     return AM2_BASE;
 809   }
 810
 811   // Match simple R +/- imm12 operands.
 812   if (N.getOpcode() != ISD::SUB) {
 813     int RHSC;
 814     if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
 815                                 -0x1000+1, 0x1000, RHSC)) { // 12 bits.
 816       Base = N.getOperand(0);
 817       if (Base.getOpcode() == ISD::FrameIndex) {
 818         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
 819         Base = CurDAG->getTargetFrameIndex(
 820             FI, TLI->getPointerTy(CurDAG->getDataLayout()));
 821       }
 822       Offset = CurDAG->getRegister(0, MVT::i32);
 823
 824       ARM_AM::AddrOpc AddSub = ARM_AM::add;
 825       if (RHSC < 0) {
 826         AddSub = ARM_AM::sub;
 827         RHSC = - RHSC;
 828       }
 829       Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, RHSC,
 830                                                         ARM_AM::no_shift),
 831                                       SDLoc(N), MVT::i32);
 832       return AM2_BASE;
 833     }
 834   }
 835
 836   if ((Subtarget->isLikeA9() || Subtarget->isSwift()) && !N.hasOneUse()) {
 837     // Compute R +/- (R << N) and reuse it.
 838     Base = N;
 839     Offset = CurDAG->getRegister(0, MVT::i32);
 840     Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(ARM_AM::add, 0,
 841                                                       ARM_AM::no_shift),
 842                                     SDLoc(N), MVT::i32);
 843     return AM2_BASE;
 844   }
 845
 846   // Otherwise this is R +/- [possibly shifted] R.
 847   ARM_AM::AddrOpc AddSub = N.getOpcode() != ISD::SUB ? ARM_AM::add:ARM_AM::sub;
 848   ARM_AM::ShiftOpc ShOpcVal =
 849     ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode());
 850   unsigned ShAmt = 0;
 851
 852   Base   = N.getOperand(0);
 853   Offset = N.getOperand(1);
 854
 855   if (ShOpcVal != ARM_AM::no_shift) {
 856     // Check to see if the RHS of the shift is a constant, if not, we can't fold
 857     // it.
 858     if (ConstantSDNode *Sh =
 859            dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) {
 860       ShAmt = Sh->getZExtValue();
 861       if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt))
 862         Offset = N.getOperand(1).getOperand(0);
 863       else {
 864         ShAmt = 0;
 865         ShOpcVal = ARM_AM::no_shift;
 866       }
 867     } else {
 868       ShOpcVal = ARM_AM::no_shift;
 869     }
 870   }
 871
 872   // Try matching (R shl C) + (R).
 873   if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift &&
 874       !(Subtarget->isLikeA9() || Subtarget->isSwift() ||
 875         N.getOperand(0).hasOneUse())) {
 876     ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode());
 877     if (ShOpcVal != ARM_AM::no_shift) {
 878       // Check to see if the RHS of the shift is a constant, if not, we can't
 879       // fold it.
 880       if (ConstantSDNode *Sh =
 881           dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) {
 882         ShAmt = Sh->getZExtValue();
 883         if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) {
 884           Offset = N.getOperand(0).getOperand(0);
 885           Base = N.getOperand(1);
 886         } else {
 887           ShAmt = 0;
 888           ShOpcVal = ARM_AM::no_shift;
 889         }
 890       } else {
 891         ShOpcVal = ARM_AM::no_shift;
 892       }
 893     }
 894   }
 895
 896   Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
 897                                   SDLoc(N), MVT::i32);
 898   return AM2_SHOP;
 899 }
 900
 901 bool ARMDAGToDAGISel::SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
 902                                             SDValue &Offset, SDValue &Opc) {
 903   unsigned Opcode = Op->getOpcode();
 904   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
 905     ? cast<LoadSDNode>(Op)->getAddressingMode()
 906     : cast<StoreSDNode>(Op)->getAddressingMode();
 907   ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
 908     ? ARM_AM::add : ARM_AM::sub;
 909   int Val;
 910   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val))
 911     return false;
 912
 913   Offset = N;
 914   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
 915   unsigned ShAmt = 0;
 916   if (ShOpcVal != ARM_AM::no_shift) {
 917     // Check to see if the RHS of the shift is a constant, if not, we can't fold
 918     // it.
 919     if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
 920       ShAmt = Sh->getZExtValue();
 921       if (isShifterOpProfitable(N, ShOpcVal, ShAmt))
 922         Offset = N.getOperand(0);
 923       else {
 924         ShAmt = 0;
 925         ShOpcVal = ARM_AM::no_shift;
 926       }
 927     } else {
 928       ShOpcVal = ARM_AM::no_shift;
 929     }
 930   }
 931
 932   Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
 933                                   SDLoc(N), MVT::i32);
 934   return true;
 935 }
 936
 937 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
 938                                             SDValue &Offset, SDValue &Opc) {
 939   unsigned Opcode = Op->getOpcode();
 940   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
 941     ? cast<LoadSDNode>(Op)->getAddressingMode()
 942     : cast<StoreSDNode>(Op)->getAddressingMode();
 943   ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
 944     ? ARM_AM::add : ARM_AM::sub;
 945   int Val;
 946   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
 947     if (AddSub == ARM_AM::sub) Val *= -1;
 948     Offset = CurDAG->getRegister(0, MVT::i32);
 949     Opc = CurDAG->getTargetConstant(Val, SDLoc(Op), MVT::i32);
 950     return true;
 951   }
 952
 953   return false;
 954 }
 955
 956
 957 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
 958                                             SDValue &Offset, SDValue &Opc) {
 959   unsigned Opcode = Op->getOpcode();
 960   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
 961     ? cast<LoadSDNode>(Op)->getAddressingMode()
 962     : cast<StoreSDNode>(Op)->getAddressingMode();
 963   ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
 964     ? ARM_AM::add : ARM_AM::sub;
 965   int Val;
 966   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
 967     Offset = CurDAG->getRegister(0, MVT::i32);
 968     Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, Val,
 969                                                       ARM_AM::no_shift),
 970                                     SDLoc(Op), MVT::i32);
 971     return true;
 972   }
 973
 974   return false;
 975 }
 976
 977 bool ARMDAGToDAGISel::SelectAddrOffsetNone(SDValue N, SDValue &Base) {
 978   Base = N;
 979   return true;
 980 }
 981
 982 bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N,
 983                                       SDValue &Base, SDValue &Offset,
 984                                       SDValue &Opc) {
 985   if (N.getOpcode() == ISD::SUB) {
 986     // X - C  is canonicalize to X + -C, no need to handle it here.
 987     Base = N.getOperand(0);
 988     Offset = N.getOperand(1);
 989     Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::sub, 0), SDLoc(N),
 990                                     MVT::i32);
 991     return true;
 992   }
 993
 994   if (!CurDAG->isBaseWithConstantOffset(N)) {
 995     Base = N;
 996     if (N.getOpcode() == ISD::FrameIndex) {
 997       int FI = cast<FrameIndexSDNode>(N)->getIndex();
 998       Base = CurDAG->getTargetFrameIndex(
 999           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1000     }
1001     Offset = CurDAG->getRegister(0, MVT::i32);
1002     Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),
1003                                     MVT::i32);
1004     return true;
1005   }
1006
1007   // If the RHS is +/- imm8, fold into addr mode.
1008   int RHSC;
1009   if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
1010                               -256 + 1, 256, RHSC)) { // 8 bits.
1011     Base = N.getOperand(0);
1012     if (Base.getOpcode() == ISD::FrameIndex) {
1013       int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1014       Base = CurDAG->getTargetFrameIndex(
1015           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1016     }
1017     Offset = CurDAG->getRegister(0, MVT::i32);
1018
1019     ARM_AM::AddrOpc AddSub = ARM_AM::add;
1020     if (RHSC < 0) {
1021       AddSub = ARM_AM::sub;
1022       RHSC = -RHSC;
1023     }
1024     Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, RHSC), SDLoc(N),
1025                                     MVT::i32);
1026     return true;
1027   }
1028
1029   Base = N.getOperand(0);
1030   Offset = N.getOperand(1);
1031   Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),
1032                                   MVT::i32);
1033   return true;
1034 }
1035
1036 bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode *Op, SDValue N,
1037                                             SDValue &Offset, SDValue &Opc) {
1038   unsigned Opcode = Op->getOpcode();
1039   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
1040     ? cast<LoadSDNode>(Op)->getAddressingMode()
1041     : cast<StoreSDNode>(Op)->getAddressingMode();
1042   ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
1043     ? ARM_AM::add : ARM_AM::sub;
1044   int Val;
1045   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 256, Val)) { // 12 bits.
1046     Offset = CurDAG->getRegister(0, MVT::i32);
1047     Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, Val), SDLoc(Op),
1048                                     MVT::i32);
1049     return true;
1050   }
1051
1052   Offset = N;
1053   Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, 0), SDLoc(Op),
1054                                   MVT::i32);
1055   return true;
1056 }
1057
1058 bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N,
1059                                       SDValue &Base, SDValue &Offset) {
1060   if (!CurDAG->isBaseWithConstantOffset(N)) {
1061     Base = N;
1062     if (N.getOpcode() == ISD::FrameIndex) {
1063       int FI = cast<FrameIndexSDNode>(N)->getIndex();
1064       Base = CurDAG->getTargetFrameIndex(
1065           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1066     } else if (N.getOpcode() == ARMISD::Wrapper &&
1067                N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) {
1068       Base = N.getOperand(0);
1069     }
1070     Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
1071                                        SDLoc(N), MVT::i32);
1072     return true;
1073   }
1074
1075   // If the RHS is +/- imm8, fold into addr mode.
1076   int RHSC;
1077   if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4,
1078                               -256 + 1, 256, RHSC)) {
1079     Base = N.getOperand(0);
1080     if (Base.getOpcode() == ISD::FrameIndex) {
1081       int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1082       Base = CurDAG->getTargetFrameIndex(
1083           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1084     }
1085
1086     ARM_AM::AddrOpc AddSub = ARM_AM::add;
1087     if (RHSC < 0) {
1088       AddSub = ARM_AM::sub;
1089       RHSC = -RHSC;
1090     }
1091     Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(AddSub, RHSC),
1092                                        SDLoc(N), MVT::i32);
1093     return true;
1094   }
1095
1096   Base = N;
1097   Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
1098                                      SDLoc(N), MVT::i32);
1099   return true;
1100 }
1101
1102 bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,
1103                                       SDValue &Align) {
1104   Addr = N;
1105
1106   unsigned Alignment = 0;
1107
1108   MemSDNode *MemN = cast<MemSDNode>(Parent);
1109
1110   if (isa<LSBaseSDNode>(MemN) ||
1111       ((MemN->getOpcode() == ARMISD::VST1_UPD ||
1112         MemN->getOpcode() == ARMISD::VLD1_UPD) &&
1113        MemN->getConstantOperandVal(MemN->getNumOperands() - 1) == 1)) {
1114     // This case occurs only for VLD1-lane/dup and VST1-lane instructions.
1115     // The maximum alignment is equal to the memory size being referenced.
1116     unsigned MMOAlign = MemN->getAlignment();
1117     unsigned MemSize = MemN->getMemoryVT().getSizeInBits() / 8;
1118     if (MMOAlign >= MemSize && MemSize > 1)
1119       Alignment = MemSize;
1120   } else {
1121     // All other uses of addrmode6 are for intrinsics.  For now just record
1122     // the raw alignment value; it will be refined later based on the legal
1123     // alignment operands for the intrinsic.
1124     Alignment = MemN->getAlignment();
1125   }
1126
1127   Align = CurDAG->getTargetConstant(Alignment, SDLoc(N), MVT::i32);
1128   return true;
1129 }
1130
1131 bool ARMDAGToDAGISel::SelectAddrMode6Offset(SDNode *Op, SDValue N,
1132                                             SDValue &Offset) {
1133   LSBaseSDNode *LdSt = cast<LSBaseSDNode>(Op);
1134   ISD::MemIndexedMode AM = LdSt->getAddressingMode();
1135   if (AM != ISD::POST_INC)
1136     return false;
1137   Offset = N;
1138   if (ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N)) {
1139     if (NC->getZExtValue() * 8 == LdSt->getMemoryVT().getSizeInBits())
1140       Offset = CurDAG->getRegister(0, MVT::i32);
1141   }
1142   return true;
1143 }
1144
1145 bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N,
1146                                        SDValue &Offset, SDValue &Label) {
1147   if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) {
1148     Offset = N.getOperand(0);
1149     SDValue N1 = N.getOperand(1);
1150     Label = CurDAG->getTargetConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
1151                                       SDLoc(N), MVT::i32);
1152     return true;
1153   }
1154
1155   return false;
1156 }
1157
1158
1159 //===----------------------------------------------------------------------===//
1160 //                         Thumb Addressing Modes
1161 //===----------------------------------------------------------------------===//
1162
1163 bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue N,
1164                                             SDValue &Base, SDValue &Offset){
1165   if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) {
1166     ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N);
1167     if (!NC || !NC->isNullValue())
1168       return false;
1169
1170     Base = Offset = N;
1171     return true;
1172   }
1173
1174   Base = N.getOperand(0);
1175   Offset = N.getOperand(1);
1176   return true;
1177 }
1178
1179 bool
1180 ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale,
1181                                           SDValue &Base, SDValue &OffImm) {
1182   if (!CurDAG->isBaseWithConstantOffset(N)) {
1183     if (N.getOpcode() == ISD::ADD) {
1184       return false; // We want to select register offset instead
1185     } else if (N.getOpcode() == ARMISD::Wrapper &&
1186                N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) {
1187       Base = N.getOperand(0);
1188     } else {
1189       Base = N;
1190     }
1191
1192     OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1193     return true;
1194   }
1195
1196   // If the RHS is + imm5 * scale, fold into addr mode.
1197   int RHSC;
1198   if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC)) {
1199     Base = N.getOperand(0);
1200     OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1201     return true;
1202   }
1203
1204   // Offset is too large, so use register offset instead.
1205   return false;
1206 }
1207
1208 bool
1209 ARMDAGToDAGISel::SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
1210                                            SDValue &OffImm) {
1211   return SelectThumbAddrModeImm5S(N, 4, Base, OffImm);
1212 }
1213
1214 bool
1215 ARMDAGToDAGISel::SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
1216                                            SDValue &OffImm) {
1217   return SelectThumbAddrModeImm5S(N, 2, Base, OffImm);
1218 }
1219
1220 bool
1221 ARMDAGToDAGISel::SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
1222                                            SDValue &OffImm) {
1223   return SelectThumbAddrModeImm5S(N, 1, Base, OffImm);
1224 }
1225
1226 bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N,
1227                                             SDValue &Base, SDValue &OffImm) {
1228   if (N.getOpcode() == ISD::FrameIndex) {
1229     int FI = cast<FrameIndexSDNode>(N)->getIndex();
1230     // Only multiples of 4 are allowed for the offset, so the frame object
1231     // alignment must be at least 4.
1232     MachineFrameInfo *MFI = MF->getFrameInfo();
1233     if (MFI->getObjectAlignment(FI) < 4)
1234       MFI->setObjectAlignment(FI, 4);
1235     Base = CurDAG->getTargetFrameIndex(
1236         FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1237     OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1238     return true;
1239   }
1240
1241   if (!CurDAG->isBaseWithConstantOffset(N))
1242     return false;
1243
1244   RegisterSDNode *LHSR = dyn_cast<RegisterSDNode>(N.getOperand(0));
1245   if (N.getOperand(0).getOpcode() == ISD::FrameIndex ||
1246       (LHSR && LHSR->getReg() == ARM::SP)) {
1247     // If the RHS is + imm8 * scale, fold into addr mode.
1248     int RHSC;
1249     if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4, 0, 256, RHSC)) {
1250       Base = N.getOperand(0);
1251       if (Base.getOpcode() == ISD::FrameIndex) {
1252         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1253         // For LHS+RHS to result in an offset that's a multiple of 4 the object
1254         // indexed by the LHS must be 4-byte aligned.
1255         MachineFrameInfo *MFI = MF->getFrameInfo();
1256         if (MFI->getObjectAlignment(FI) < 4)
1257           MFI->setObjectAlignment(FI, 4);
1258         Base = CurDAG->getTargetFrameIndex(
1259             FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1260       }
1261       OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1262       return true;
1263     }
1264   }
1265
1266   return false;
1267 }
1268
1269
1270 //===----------------------------------------------------------------------===//
1271 //                        Thumb 2 Addressing Modes
1272 //===----------------------------------------------------------------------===//
1273
1274
1275 bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N,
1276                                             SDValue &Base, SDValue &OffImm) {
1277   // Match simple R + imm12 operands.
1278
1279   // Base only.
1280   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1281       !CurDAG->isBaseWithConstantOffset(N)) {
1282     if (N.getOpcode() == ISD::FrameIndex) {
1283       // Match frame index.
1284       int FI = cast<FrameIndexSDNode>(N)->getIndex();
1285       Base = CurDAG->getTargetFrameIndex(
1286           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1287       OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1288       return true;
1289     }
1290
1291     if (N.getOpcode() == ARMISD::Wrapper &&
1292         N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) {
1293       Base = N.getOperand(0);
1294       if (Base.getOpcode() == ISD::TargetConstantPool)
1295         return false;  // We want to select t2LDRpci instead.
1296     } else
1297       Base = N;
1298     OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1299     return true;
1300   }
1301
1302   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1303     if (SelectT2AddrModeImm8(N, Base, OffImm))
1304       // Let t2LDRi8 handle (R - imm8).
1305       return false;
1306
1307     int RHSC = (int)RHS->getZExtValue();
1308     if (N.getOpcode() == ISD::SUB)
1309       RHSC = -RHSC;
1310
1311     if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits (unsigned)
1312       Base   = N.getOperand(0);
1313       if (Base.getOpcode() == ISD::FrameIndex) {
1314         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1315         Base = CurDAG->getTargetFrameIndex(
1316             FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1317       }
1318       OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1319       return true;
1320     }
1321   }
1322
1323   // Base only.
1324   Base = N;
1325   OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1326   return true;
1327 }
1328
1329 bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N,
1330                                            SDValue &Base, SDValue &OffImm) {
1331   // Match simple R - imm8 operands.
1332   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1333       !CurDAG->isBaseWithConstantOffset(N))
1334     return false;
1335
1336   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1337     int RHSC = (int)RHS->getSExtValue();
1338     if (N.getOpcode() == ISD::SUB)
1339       RHSC = -RHSC;
1340
1341     if ((RHSC >= -255) && (RHSC < 0)) { // 8 bits (always negative)
1342       Base = N.getOperand(0);
1343       if (Base.getOpcode() == ISD::FrameIndex) {
1344         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1345         Base = CurDAG->getTargetFrameIndex(
1346             FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1347       }
1348       OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1349       return true;
1350     }
1351   }
1352
1353   return false;
1354 }
1355
1356 bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
1357                                                  SDValue &OffImm){
1358   unsigned Opcode = Op->getOpcode();
1359   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
1360     ? cast<LoadSDNode>(Op)->getAddressingMode()
1361     : cast<StoreSDNode>(Op)->getAddressingMode();
1362   int RHSC;
1363   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x100, RHSC)) { // 8 bits.
1364     OffImm = ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC))
1365       ? CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32)
1366       : CurDAG->getTargetConstant(-RHSC, SDLoc(N), MVT::i32);
1367     return true;
1368   }
1369
1370   return false;
1371 }
1372
1373 bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N,
1374                                             SDValue &Base,
1375                                             SDValue &OffReg, SDValue &ShImm) {
1376   // (R - imm8) should be handled by t2LDRi8. The rest are handled by t2LDRi12.
1377   if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N))
1378     return false;
1379
1380   // Leave (R + imm12) for t2LDRi12, (R - imm8) for t2LDRi8.
1381   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1382     int RHSC = (int)RHS->getZExtValue();
1383     if (RHSC >= 0 && RHSC < 0x1000) // 12 bits (unsigned)
1384       return false;
1385     else if (RHSC < 0 && RHSC >= -255) // 8 bits
1386       return false;
1387   }
1388
1389   // Look for (R + R) or (R + (R << [1,2,3])).
1390   unsigned ShAmt = 0;
1391   Base   = N.getOperand(0);
1392   OffReg = N.getOperand(1);
1393
1394   // Swap if it is ((R << c) + R).
1395   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(OffReg.getOpcode());
1396   if (ShOpcVal != ARM_AM::lsl) {
1397     ShOpcVal = ARM_AM::getShiftOpcForNode(Base.getOpcode());
1398     if (ShOpcVal == ARM_AM::lsl)
1399       std::swap(Base, OffReg);
1400   }
1401
1402   if (ShOpcVal == ARM_AM::lsl) {
1403     // Check to see if the RHS of the shift is a constant, if not, we can't fold
1404     // it.
1405     if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(OffReg.getOperand(1))) {
1406       ShAmt = Sh->getZExtValue();
1407       if (ShAmt < 4 && isShifterOpProfitable(OffReg, ShOpcVal, ShAmt))
1408         OffReg = OffReg.getOperand(0);
1409       else {
1410         ShAmt = 0;
1411       }
1412     }
1413   }
1414
1415   // If OffReg is a multiply-by-constant and it's profitable to extract a shift
1416   // and use it in a shifted operand do so.
1417   if (OffReg.getOpcode() == ISD::MUL) {
1418     unsigned PowerOfTwo = 0;
1419     SDValue NewMulConst;
1420     if (canExtractShiftFromMul(OffReg, 3, PowerOfTwo, NewMulConst)) {
1421       replaceDAGValue(OffReg.getOperand(1), NewMulConst);
1422       ShAmt = PowerOfTwo;
1423     }
1424   }
1425
1426   ShImm = CurDAG->getTargetConstant(ShAmt, SDLoc(N), MVT::i32);
1427
1428   return true;
1429 }
1430
1431 bool ARMDAGToDAGISel::SelectT2AddrModeExclusive(SDValue N, SDValue &Base,
1432                                                 SDValue &OffImm) {
1433   // This *must* succeed since it's used for the irreplaceable ldrex and strex
1434   // instructions.
1435   Base = N;
1436   OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1437
1438   if (N.getOpcode() != ISD::ADD || !CurDAG->isBaseWithConstantOffset(N))
1439     return true;
1440
1441   ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
1442   if (!RHS)
1443     return true;
1444
1445   uint32_t RHSC = (int)RHS->getZExtValue();
1446   if (RHSC > 1020 || RHSC % 4 != 0)
1447     return true;
1448
1449   Base = N.getOperand(0);
1450   if (Base.getOpcode() == ISD::FrameIndex) {
1451     int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1452     Base = CurDAG->getTargetFrameIndex(
1453         FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1454   }
1455
1456   OffImm = CurDAG->getTargetConstant(RHSC/4, SDLoc(N), MVT::i32);
1457   return true;
1458 }
1459
1460 //===--------------------------------------------------------------------===//
1461
1462 /// getAL - Returns a ARMCC::AL immediate node.
1463 static inline SDValue getAL(SelectionDAG *CurDAG, SDLoc dl) {
1464   return CurDAG->getTargetConstant((uint64_t)ARMCC::AL, dl, MVT::i32);
1465 }
1466
1467 SDNode *ARMDAGToDAGISel::SelectARMIndexedLoad(SDNode *N) {
1468   LoadSDNode *LD = cast<LoadSDNode>(N);
1469   ISD::MemIndexedMode AM = LD->getAddressingMode();
1470   if (AM == ISD::UNINDEXED)
1471     return nullptr;
1472
1473   EVT LoadedVT = LD->getMemoryVT();
1474   SDValue Offset, AMOpc;
1475   bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1476   unsigned Opcode = 0;
1477   bool Match = false;
1478   if (LoadedVT == MVT::i32 && isPre &&
1479       SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
1480     Opcode = ARM::LDR_PRE_IMM;
1481     Match = true;
1482   } else if (LoadedVT == MVT::i32 && !isPre &&
1483       SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
1484     Opcode = ARM::LDR_POST_IMM;
1485     Match = true;
1486   } else if (LoadedVT == MVT::i32 &&
1487       SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
1488     Opcode = isPre ? ARM::LDR_PRE_REG : ARM::LDR_POST_REG;
1489     Match = true;
1490
1491   } else if (LoadedVT == MVT::i16 &&
1492              SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
1493     Match = true;
1494     Opcode = (LD->getExtensionType() == ISD::SEXTLOAD)
1495       ? (isPre ? ARM::LDRSH_PRE : ARM::LDRSH_POST)
1496       : (isPre ? ARM::LDRH_PRE : ARM::LDRH_POST);
1497   } else if (LoadedVT == MVT::i8 || LoadedVT == MVT::i1) {
1498     if (LD->getExtensionType() == ISD::SEXTLOAD) {
1499       if (SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
1500         Match = true;
1501         Opcode = isPre ? ARM::LDRSB_PRE : ARM::LDRSB_POST;
1502       }
1503     } else {
1504       if (isPre &&
1505           SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
1506         Match = true;
1507         Opcode = ARM::LDRB_PRE_IMM;
1508       } else if (!isPre &&
1509                   SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
1510         Match = true;
1511         Opcode = ARM::LDRB_POST_IMM;
1512       } else if (SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
1513         Match = true;
1514         Opcode = isPre ? ARM::LDRB_PRE_REG : ARM::LDRB_POST_REG;
1515       }
1516     }
1517   }
1518
1519   if (Match) {
1520     if (Opcode == ARM::LDR_PRE_IMM || Opcode == ARM::LDRB_PRE_IMM) {
1521       SDValue Chain = LD->getChain();
1522       SDValue Base = LD->getBasePtr();
1523       SDValue Ops[]= { Base, AMOpc, getAL(CurDAG, SDLoc(N)),
1524                        CurDAG->getRegister(0, MVT::i32), Chain };
1525       return CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32,
1526                                     MVT::i32, MVT::Other, Ops);
1527     } else {
1528       SDValue Chain = LD->getChain();
1529       SDValue Base = LD->getBasePtr();
1530       SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG, SDLoc(N)),
1531                        CurDAG->getRegister(0, MVT::i32), Chain };
1532       return CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32,
1533                                     MVT::i32, MVT::Other, Ops);
1534     }
1535   }
1536
1537   return nullptr;
1538 }
1539
1540 SDNode *ARMDAGToDAGISel::SelectT2IndexedLoad(SDNode *N) {
1541   LoadSDNode *LD = cast<LoadSDNode>(N);
1542   ISD::MemIndexedMode AM = LD->getAddressingMode();
1543   if (AM == ISD::UNINDEXED)
1544     return nullptr;
1545
1546   EVT LoadedVT = LD->getMemoryVT();
1547   bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
1548   SDValue Offset;
1549   bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1550   unsigned Opcode = 0;
1551   bool Match = false;
1552   if (SelectT2AddrModeImm8Offset(N, LD->getOffset(), Offset)) {
1553     switch (LoadedVT.getSimpleVT().SimpleTy) {
1554     case MVT::i32:
1555       Opcode = isPre ? ARM::t2LDR_PRE : ARM::t2LDR_POST;
1556       break;
1557     case MVT::i16:
1558       if (isSExtLd)
1559         Opcode = isPre ? ARM::t2LDRSH_PRE : ARM::t2LDRSH_POST;
1560       else
1561         Opcode = isPre ? ARM::t2LDRH_PRE : ARM::t2LDRH_POST;
1562       break;
1563     case MVT::i8:
1564     case MVT::i1:
1565       if (isSExtLd)
1566         Opcode = isPre ? ARM::t2LDRSB_PRE : ARM::t2LDRSB_POST;
1567       else
1568         Opcode = isPre ? ARM::t2LDRB_PRE : ARM::t2LDRB_POST;
1569       break;
1570     default:
1571       return nullptr;
1572     }
1573     Match = true;
1574   }
1575
1576   if (Match) {
1577     SDValue Chain = LD->getChain();
1578     SDValue Base = LD->getBasePtr();
1579     SDValue Ops[]= { Base, Offset, getAL(CurDAG, SDLoc(N)),
1580                      CurDAG->getRegister(0, MVT::i32), Chain };
1581     return CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1582                                   MVT::Other, Ops);
1583   }
1584
1585   return nullptr;
1586 }
1587
1588 /// \brief Form a GPRPair pseudo register from a pair of GPR regs.
1589 SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) {
1590   SDLoc dl(V0.getNode());
1591   SDValue RegClass =
1592     CurDAG->getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32);
1593   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);
1594   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32);
1595   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1596   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1597 }
1598
1599 /// \brief Form a D register from a pair of S registers.
1600 SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1601   SDLoc dl(V0.getNode());
1602   SDValue RegClass =
1603     CurDAG->getTargetConstant(ARM::DPR_VFP2RegClassID, dl, MVT::i32);
1604   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32);
1605   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32);
1606   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1607   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1608 }
1609
1610 /// \brief Form a quad register from a pair of D registers.
1611 SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1612   SDLoc dl(V0.getNode());
1613   SDValue RegClass = CurDAG->getTargetConstant(ARM::QPRRegClassID, dl,
1614                                                MVT::i32);
1615   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32);
1616   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32);
1617   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1618   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1619 }
1620
1621 /// \brief Form 4 consecutive D registers from a pair of Q registers.
1622 SDNode *ARMDAGToDAGISel::createQRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1623   SDLoc dl(V0.getNode());
1624   SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl,
1625                                                MVT::i32);
1626   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32);
1627   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32);
1628   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1629   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1630 }
1631
1632 /// \brief Form 4 consecutive S registers.
1633 SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1,
1634                                    SDValue V2, SDValue V3) {
1635   SDLoc dl(V0.getNode());
1636   SDValue RegClass =
1637     CurDAG->getTargetConstant(ARM::QPR_VFP2RegClassID, dl, MVT::i32);
1638   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32);
1639   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32);
1640   SDValue SubReg2 = CurDAG->getTargetConstant(ARM::ssub_2, dl, MVT::i32);
1641   SDValue SubReg3 = CurDAG->getTargetConstant(ARM::ssub_3, dl, MVT::i32);
1642   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1643                                     V2, SubReg2, V3, SubReg3 };
1644   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1645 }
1646
1647 /// \brief Form 4 consecutive D registers.
1648 SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1,
1649                                    SDValue V2, SDValue V3) {
1650   SDLoc dl(V0.getNode());
1651   SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl,
1652                                                MVT::i32);
1653   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32);
1654   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32);
1655   SDValue SubReg2 = CurDAG->getTargetConstant(ARM::dsub_2, dl, MVT::i32);
1656   SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, dl, MVT::i32);
1657   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1658                                     V2, SubReg2, V3, SubReg3 };
1659   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1660 }
1661
1662 /// \brief Form 4 consecutive Q registers.
1663 SDNode *ARMDAGToDAGISel::createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1,
1664                                    SDValue V2, SDValue V3) {
1665   SDLoc dl(V0.getNode());
1666   SDValue RegClass = CurDAG->getTargetConstant(ARM::QQQQPRRegClassID, dl,
1667                                                MVT::i32);
1668   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32);
1669   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32);
1670   SDValue SubReg2 = CurDAG->getTargetConstant(ARM::qsub_2, dl, MVT::i32);
1671   SDValue SubReg3 = CurDAG->getTargetConstant(ARM::qsub_3, dl, MVT::i32);
1672   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1673                                     V2, SubReg2, V3, SubReg3 };
1674   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1675 }
1676
1677 /// GetVLDSTAlign - Get the alignment (in bytes) for the alignment operand
1678 /// of a NEON VLD or VST instruction.  The supported values depend on the
1679 /// number of registers being loaded.
1680 SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, SDLoc dl,
1681                                        unsigned NumVecs, bool is64BitVector) {
1682   unsigned NumRegs = NumVecs;
1683   if (!is64BitVector && NumVecs < 3)
1684     NumRegs *= 2;
1685
1686   unsigned Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
1687   if (Alignment >= 32 && NumRegs == 4)
1688     Alignment = 32;
1689   else if (Alignment >= 16 && (NumRegs == 2 || NumRegs == 4))
1690     Alignment = 16;
1691   else if (Alignment >= 8)
1692     Alignment = 8;
1693   else
1694     Alignment = 0;
1695
1696   return CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
1697 }
1698
1699 static bool isVLDfixed(unsigned Opc)
1700 {
1701   switch (Opc) {
1702   default: return false;
1703   case ARM::VLD1d8wb_fixed : return true;
1704   case ARM::VLD1d16wb_fixed : return true;
1705   case ARM::VLD1d64Qwb_fixed : return true;
1706   case ARM::VLD1d32wb_fixed : return true;
1707   case ARM::VLD1d64wb_fixed : return true;
1708   case ARM::VLD1d64TPseudoWB_fixed : return true;
1709   case ARM::VLD1d64QPseudoWB_fixed : return true;
1710   case ARM::VLD1q8wb_fixed : return true;
1711   case ARM::VLD1q16wb_fixed : return true;
1712   case ARM::VLD1q32wb_fixed : return true;
1713   case ARM::VLD1q64wb_fixed : return true;
1714   case ARM::VLD2d8wb_fixed : return true;
1715   case ARM::VLD2d16wb_fixed : return true;
1716   case ARM::VLD2d32wb_fixed : return true;
1717   case ARM::VLD2q8PseudoWB_fixed : return true;
1718   case ARM::VLD2q16PseudoWB_fixed : return true;
1719   case ARM::VLD2q32PseudoWB_fixed : return true;
1720   case ARM::VLD2DUPd8wb_fixed : return true;
1721   case ARM::VLD2DUPd16wb_fixed : return true;
1722   case ARM::VLD2DUPd32wb_fixed : return true;
1723   }
1724 }
1725
1726 static bool isVSTfixed(unsigned Opc)
1727 {
1728   switch (Opc) {
1729   default: return false;
1730   case ARM::VST1d8wb_fixed : return true;
1731   case ARM::VST1d16wb_fixed : return true;
1732   case ARM::VST1d32wb_fixed : return true;
1733   case ARM::VST1d64wb_fixed : return true;
1734   case ARM::VST1q8wb_fixed : return true;
1735   case ARM::VST1q16wb_fixed : return true;
1736   case ARM::VST1q32wb_fixed : return true;
1737   case ARM::VST1q64wb_fixed : return true;
1738   case ARM::VST1d64TPseudoWB_fixed : return true;
1739   case ARM::VST1d64QPseudoWB_fixed : return true;
1740   case ARM::VST2d8wb_fixed : return true;
1741   case ARM::VST2d16wb_fixed : return true;
1742   case ARM::VST2d32wb_fixed : return true;
1743   case ARM::VST2q8PseudoWB_fixed : return true;
1744   case ARM::VST2q16PseudoWB_fixed : return true;
1745   case ARM::VST2q32PseudoWB_fixed : return true;
1746   }
1747 }
1748
1749 // Get the register stride update opcode of a VLD/VST instruction that
1750 // is otherwise equivalent to the given fixed stride updating instruction.
1751 static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) {
1752   assert((isVLDfixed(Opc) || isVSTfixed(Opc))
1753     && "Incorrect fixed stride updating instruction.");
1754   switch (Opc) {
1755   default: break;
1756   case ARM::VLD1d8wb_fixed: return ARM::VLD1d8wb_register;
1757   case ARM::VLD1d16wb_fixed: return ARM::VLD1d16wb_register;
1758   case ARM::VLD1d32wb_fixed: return ARM::VLD1d32wb_register;
1759   case ARM::VLD1d64wb_fixed: return ARM::VLD1d64wb_register;
1760   case ARM::VLD1q8wb_fixed: return ARM::VLD1q8wb_register;
1761   case ARM::VLD1q16wb_fixed: return ARM::VLD1q16wb_register;
1762   case ARM::VLD1q32wb_fixed: return ARM::VLD1q32wb_register;
1763   case ARM::VLD1q64wb_fixed: return ARM::VLD1q64wb_register;
1764   case ARM::VLD1d64Twb_fixed: return ARM::VLD1d64Twb_register;
1765   case ARM::VLD1d64Qwb_fixed: return ARM::VLD1d64Qwb_register;
1766   case ARM::VLD1d64TPseudoWB_fixed: return ARM::VLD1d64TPseudoWB_register;
1767   case ARM::VLD1d64QPseudoWB_fixed: return ARM::VLD1d64QPseudoWB_register;
1768
1769   case ARM::VST1d8wb_fixed: return ARM::VST1d8wb_register;
1770   case ARM::VST1d16wb_fixed: return ARM::VST1d16wb_register;
1771   case ARM::VST1d32wb_fixed: return ARM::VST1d32wb_register;
1772   case ARM::VST1d64wb_fixed: return ARM::VST1d64wb_register;
1773   case ARM::VST1q8wb_fixed: return ARM::VST1q8wb_register;
1774   case ARM::VST1q16wb_fixed: return ARM::VST1q16wb_register;
1775   case ARM::VST1q32wb_fixed: return ARM::VST1q32wb_register;
1776   case ARM::VST1q64wb_fixed: return ARM::VST1q64wb_register;
1777   case ARM::VST1d64TPseudoWB_fixed: return ARM::VST1d64TPseudoWB_register;
1778   case ARM::VST1d64QPseudoWB_fixed: return ARM::VST1d64QPseudoWB_register;
1779
1780   case ARM::VLD2d8wb_fixed: return ARM::VLD2d8wb_register;
1781   case ARM::VLD2d16wb_fixed: return ARM::VLD2d16wb_register;
1782   case ARM::VLD2d32wb_fixed: return ARM::VLD2d32wb_register;
1783   case ARM::VLD2q8PseudoWB_fixed: return ARM::VLD2q8PseudoWB_register;
1784   case ARM::VLD2q16PseudoWB_fixed: return ARM::VLD2q16PseudoWB_register;
1785   case ARM::VLD2q32PseudoWB_fixed: return ARM::VLD2q32PseudoWB_register;
1786
1787   case ARM::VST2d8wb_fixed: return ARM::VST2d8wb_register;
1788   case ARM::VST2d16wb_fixed: return ARM::VST2d16wb_register;
1789   case ARM::VST2d32wb_fixed: return ARM::VST2d32wb_register;
1790   case ARM::VST2q8PseudoWB_fixed: return ARM::VST2q8PseudoWB_register;
1791   case ARM::VST2q16PseudoWB_fixed: return ARM::VST2q16PseudoWB_register;
1792   case ARM::VST2q32PseudoWB_fixed: return ARM::VST2q32PseudoWB_register;
1793
1794   case ARM::VLD2DUPd8wb_fixed: return ARM::VLD2DUPd8wb_register;
1795   case ARM::VLD2DUPd16wb_fixed: return ARM::VLD2DUPd16wb_register;
1796   case ARM::VLD2DUPd32wb_fixed: return ARM::VLD2DUPd32wb_register;
1797   }
1798   return Opc; // If not one we handle, return it unchanged.
1799 }
1800
1801 SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
1802                                    const uint16_t *DOpcodes,
1803                                    const uint16_t *QOpcodes0,
1804                                    const uint16_t *QOpcodes1) {
1805   assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range");
1806   SDLoc dl(N);
1807
1808   SDValue MemAddr, Align;
1809   unsigned AddrOpIdx = isUpdating ? 1 : 2;
1810   if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
1811     return nullptr;
1812
1813   SDValue Chain = N->getOperand(0);
1814   EVT VT = N->getValueType(0);
1815   bool is64BitVector = VT.is64BitVector();
1816   Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
1817
1818   unsigned OpcodeIndex;
1819   switch (VT.getSimpleVT().SimpleTy) {
1820   default: llvm_unreachable("unhandled vld type");
1821     // Double-register operations:
1822   case MVT::v8i8:  OpcodeIndex = 0; break;
1823   case MVT::v4i16: OpcodeIndex = 1; break;
1824   case MVT::v2f32:
1825   case MVT::v2i32: OpcodeIndex = 2; break;
1826   case MVT::v1i64: OpcodeIndex = 3; break;
1827     // Quad-register operations:
1828   case MVT::v16i8: OpcodeIndex = 0; break;
1829   case MVT::v8i16: OpcodeIndex = 1; break;
1830   case MVT::v4f32:
1831   case MVT::v4i32: OpcodeIndex = 2; break;
1832   case MVT::v2f64:
1833   case MVT::v2i64: OpcodeIndex = 3;
1834     assert(NumVecs == 1 && "v2i64 type only supported for VLD1");
1835     break;
1836   }
1837
1838   EVT ResTy;
1839   if (NumVecs == 1)
1840     ResTy = VT;
1841   else {
1842     unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
1843     if (!is64BitVector)
1844       ResTyElts *= 2;
1845     ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
1846   }
1847   std::vector<EVT> ResTys;
1848   ResTys.push_back(ResTy);
1849   if (isUpdating)
1850     ResTys.push_back(MVT::i32);
1851   ResTys.push_back(MVT::Other);
1852
1853   SDValue Pred = getAL(CurDAG, dl);
1854   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
1855   SDNode *VLd;
1856   SmallVector<SDValue, 7> Ops;
1857
1858   // Double registers and VLD1/VLD2 quad registers are directly supported.
1859   if (is64BitVector || NumVecs <= 2) {
1860     unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
1861                     QOpcodes0[OpcodeIndex]);
1862     Ops.push_back(MemAddr);
1863     Ops.push_back(Align);
1864     if (isUpdating) {
1865       SDValue Inc = N->getOperand(AddrOpIdx + 1);
1866       // FIXME: VLD1/VLD2 fixed increment doesn't need Reg0. Remove the reg0
1867       // case entirely when the rest are updated to that form, too.
1868       if ((NumVecs <= 2) && !isa<ConstantSDNode>(Inc.getNode()))
1869         Opc = getVLDSTRegisterUpdateOpcode(Opc);
1870       // FIXME: We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so
1871       // check for that explicitly too. Horribly hacky, but temporary.
1872       if ((NumVecs > 2 && !isVLDfixed(Opc)) ||
1873           !isa<ConstantSDNode>(Inc.getNode()))
1874         Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc);
1875     }
1876     Ops.push_back(Pred);
1877     Ops.push_back(Reg0);
1878     Ops.push_back(Chain);
1879     VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1880
1881   } else {
1882     // Otherwise, quad registers are loaded with two separate instructions,
1883     // where one loads the even registers and the other loads the odd registers.
1884     EVT AddrTy = MemAddr.getValueType();
1885
1886     // Load the even subregs.  This is always an updating load, so that it
1887     // provides the address to the second load for the odd subregs.
1888     SDValue ImplDef =
1889       SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
1890     const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain };
1891     SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
1892                                           ResTy, AddrTy, MVT::Other, OpsA);
1893     Chain = SDValue(VLdA, 2);
1894
1895     // Load the odd subregs.
1896     Ops.push_back(SDValue(VLdA, 1));
1897     Ops.push_back(Align);
1898     if (isUpdating) {
1899       SDValue Inc = N->getOperand(AddrOpIdx + 1);
1900       assert(isa<ConstantSDNode>(Inc.getNode()) &&
1901              "only constant post-increment update allowed for VLD3/4");
1902       (void)Inc;
1903       Ops.push_back(Reg0);
1904     }
1905     Ops.push_back(SDValue(VLdA, 0));
1906     Ops.push_back(Pred);
1907     Ops.push_back(Reg0);
1908     Ops.push_back(Chain);
1909     VLd = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, Ops);
1910   }
1911
1912   // Transfer memoperands.
1913   MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
1914   MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1915   cast<MachineSDNode>(VLd)->setMemRefs(MemOp, MemOp + 1);
1916
1917   if (NumVecs == 1)
1918     return VLd;
1919
1920   // Extract out the subregisters.
1921   SDValue SuperReg = SDValue(VLd, 0);
1922   assert(ARM::dsub_7 == ARM::dsub_0+7 &&
1923          ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering");
1924   unsigned Sub0 = (is64BitVector ? ARM::dsub_0 : ARM::qsub_0);
1925   for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
1926     ReplaceUses(SDValue(N, Vec),
1927                 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
1928   ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1));
1929   if (isUpdating)
1930     ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLd, 2));
1931   return nullptr;
1932 }
1933
1934 SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
1935                                    const uint16_t *DOpcodes,
1936                                    const uint16_t *QOpcodes0,
1937                                    const uint16_t *QOpcodes1) {
1938   assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range");
1939   SDLoc dl(N);
1940
1941   SDValue MemAddr, Align;
1942   unsigned AddrOpIdx = isUpdating ? 1 : 2;
1943   unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
1944   if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
1945     return nullptr;
1946
1947   MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
1948   MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1949
1950   SDValue Chain = N->getOperand(0);
1951   EVT VT = N->getOperand(Vec0Idx).getValueType();
1952   bool is64BitVector = VT.is64BitVector();
1953   Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
1954
1955   unsigned OpcodeIndex;
1956   switch (VT.getSimpleVT().SimpleTy) {
1957   default: llvm_unreachable("unhandled vst type");
1958     // Double-register operations:
1959   case MVT::v8i8:  OpcodeIndex = 0; break;
1960   case MVT::v4i16: OpcodeIndex = 1; break;
1961   case MVT::v2f32:
1962   case MVT::v2i32: OpcodeIndex = 2; break;
1963   case MVT::v1i64: OpcodeIndex = 3; break;
1964     // Quad-register operations:
1965   case MVT::v16i8: OpcodeIndex = 0; break;
1966   case MVT::v8i16: OpcodeIndex = 1; break;
1967   case MVT::v4f32:
1968   case MVT::v4i32: OpcodeIndex = 2; break;
1969   case MVT::v2f64:
1970   case MVT::v2i64: OpcodeIndex = 3;
1971     assert(NumVecs == 1 && "v2i64 type only supported for VST1");
1972     break;
1973   }
1974
1975   std::vector<EVT> ResTys;
1976   if (isUpdating)
1977     ResTys.push_back(MVT::i32);
1978   ResTys.push_back(MVT::Other);
1979
1980   SDValue Pred = getAL(CurDAG, dl);
1981   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
1982   SmallVector<SDValue, 7> Ops;
1983
1984   // Double registers and VST1/VST2 quad registers are directly supported.
1985   if (is64BitVector || NumVecs <= 2) {
1986     SDValue SrcReg;
1987     if (NumVecs == 1) {
1988       SrcReg = N->getOperand(Vec0Idx);
1989     } else if (is64BitVector) {
1990       // Form a REG_SEQUENCE to force register allocation.
1991       SDValue V0 = N->getOperand(Vec0Idx + 0);
1992       SDValue V1 = N->getOperand(Vec0Idx + 1);
1993       if (NumVecs == 2)
1994         SrcReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
1995       else {
1996         SDValue V2 = N->getOperand(Vec0Idx + 2);
1997         // If it's a vst3, form a quad D-register and leave the last part as
1998         // an undef.
1999         SDValue V3 = (NumVecs == 3)
2000           ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
2001           : N->getOperand(Vec0Idx + 3);
2002         SrcReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
2003       }
2004     } else {
2005       // Form a QQ register.
2006       SDValue Q0 = N->getOperand(Vec0Idx);
2007       SDValue Q1 = N->getOperand(Vec0Idx + 1);
2008       SrcReg = SDValue(createQRegPairNode(MVT::v4i64, Q0, Q1), 0);
2009     }
2010
2011     unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2012                     QOpcodes0[OpcodeIndex]);
2013     Ops.push_back(MemAddr);
2014     Ops.push_back(Align);
2015     if (isUpdating) {
2016       SDValue Inc = N->getOperand(AddrOpIdx + 1);
2017       // FIXME: VST1/VST2 fixed increment doesn't need Reg0. Remove the reg0
2018       // case entirely when the rest are updated to that form, too.
2019       if (NumVecs <= 2 && !isa<ConstantSDNode>(Inc.getNode()))
2020         Opc = getVLDSTRegisterUpdateOpcode(Opc);
2021       // FIXME: We use a VST1 for v1i64 even if the pseudo says vld2/3/4, so
2022       // check for that explicitly too. Horribly hacky, but temporary.
2023       if  (!isa<ConstantSDNode>(Inc.getNode()))
2024         Ops.push_back(Inc);
2025       else if (NumVecs > 2 && !isVSTfixed(Opc))
2026         Ops.push_back(Reg0);
2027     }
2028     Ops.push_back(SrcReg);
2029     Ops.push_back(Pred);
2030     Ops.push_back(Reg0);
2031     Ops.push_back(Chain);
2032     SDNode *VSt = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2033
2034     // Transfer memoperands.
2035     cast<MachineSDNode>(VSt)->setMemRefs(MemOp, MemOp + 1);
2036
2037     return VSt;
2038   }
2039
2040   // Otherwise, quad registers are stored with two separate instructions,
2041   // where one stores the even registers and the other stores the odd registers.
2042
2043   // Form the QQQQ REG_SEQUENCE.
2044   SDValue V0 = N->getOperand(Vec0Idx + 0);
2045   SDValue V1 = N->getOperand(Vec0Idx + 1);
2046   SDValue V2 = N->getOperand(Vec0Idx + 2);
2047   SDValue V3 = (NumVecs == 3)
2048     ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2049     : N->getOperand(Vec0Idx + 3);
2050   SDValue RegSeq = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
2051
2052   // Store the even D registers.  This is always an updating store, so that it
2053   // provides the address to the second store for the odd subregs.
2054   const SDValue OpsA[] = { MemAddr, Align, Reg0, RegSeq, Pred, Reg0, Chain };
2055   SDNode *VStA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
2056                                         MemAddr.getValueType(),
2057                                         MVT::Other, OpsA);
2058   cast<MachineSDNode>(VStA)->setMemRefs(MemOp, MemOp + 1);
2059   Chain = SDValue(VStA, 1);
2060
2061   // Store the odd D registers.
2062   Ops.push_back(SDValue(VStA, 0));
2063   Ops.push_back(Align);
2064   if (isUpdating) {
2065     SDValue Inc = N->getOperand(AddrOpIdx + 1);
2066     assert(isa<ConstantSDNode>(Inc.getNode()) &&
2067            "only constant post-increment update allowed for VST3/4");
2068     (void)Inc;
2069     Ops.push_back(Reg0);
2070   }
2071   Ops.push_back(RegSeq);
2072   Ops.push_back(Pred);
2073   Ops.push_back(Reg0);
2074   Ops.push_back(Chain);
2075   SDNode *VStB = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys,
2076                                         Ops);
2077   cast<MachineSDNode>(VStB)->setMemRefs(MemOp, MemOp + 1);
2078   return VStB;
2079 }
2080
2081 SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad,
2082                                          bool isUpdating, unsigned NumVecs,
2083                                          const uint16_t *DOpcodes,
2084                                          const uint16_t *QOpcodes) {
2085   assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range");
2086   SDLoc dl(N);
2087
2088   SDValue MemAddr, Align;
2089   unsigned AddrOpIdx = isUpdating ? 1 : 2;
2090   unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
2091   if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2092     return nullptr;
2093
2094   MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
2095   MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2096
2097   SDValue Chain = N->getOperand(0);
2098   unsigned Lane =
2099     cast<ConstantSDNode>(N->getOperand(Vec0Idx + NumVecs))->getZExtValue();
2100   EVT VT = N->getOperand(Vec0Idx).getValueType();
2101   bool is64BitVector = VT.is64BitVector();
2102
2103   unsigned Alignment = 0;
2104   if (NumVecs != 3) {
2105     Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
2106     unsigned NumBytes = NumVecs * VT.getVectorElementType().getSizeInBits()/8;
2107     if (Alignment > NumBytes)
2108       Alignment = NumBytes;
2109     if (Alignment < 8 && Alignment < NumBytes)
2110       Alignment = 0;
2111     // Alignment must be a power of two; make sure of that.
2112     Alignment = (Alignment & -Alignment);
2113     if (Alignment == 1)
2114       Alignment = 0;
2115   }
2116   Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
2117
2118   unsigned OpcodeIndex;
2119   switch (VT.getSimpleVT().SimpleTy) {
2120   default: llvm_unreachable("unhandled vld/vst lane type");
2121     // Double-register operations:
2122   case MVT::v8i8:  OpcodeIndex = 0; break;
2123   case MVT::v4i16: OpcodeIndex = 1; break;
2124   case MVT::v2f32:
2125   case MVT::v2i32: OpcodeIndex = 2; break;
2126     // Quad-register operations:
2127   case MVT::v8i16: OpcodeIndex = 0; break;
2128   case MVT::v4f32:
2129   case MVT::v4i32: OpcodeIndex = 1; break;
2130   }
2131
2132   std::vector<EVT> ResTys;
2133   if (IsLoad) {
2134     unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2135     if (!is64BitVector)
2136       ResTyElts *= 2;
2137     ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(),
2138                                       MVT::i64, ResTyElts));
2139   }
2140   if (isUpdating)
2141     ResTys.push_back(MVT::i32);
2142   ResTys.push_back(MVT::Other);
2143
2144   SDValue Pred = getAL(CurDAG, dl);
2145   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2146
2147   SmallVector<SDValue, 8> Ops;
2148   Ops.push_back(MemAddr);
2149   Ops.push_back(Align);
2150   if (isUpdating) {
2151     SDValue Inc = N->getOperand(AddrOpIdx + 1);
2152     Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc);
2153   }
2154
2155   SDValue SuperReg;
2156   SDValue V0 = N->getOperand(Vec0Idx + 0);
2157   SDValue V1 = N->getOperand(Vec0Idx + 1);
2158   if (NumVecs == 2) {
2159     if (is64BitVector)
2160       SuperReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
2161     else
2162       SuperReg = SDValue(createQRegPairNode(MVT::v4i64, V0, V1), 0);
2163   } else {
2164     SDValue V2 = N->getOperand(Vec0Idx + 2);
2165     SDValue V3 = (NumVecs == 3)
2166       ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2167       : N->getOperand(Vec0Idx + 3);
2168     if (is64BitVector)
2169       SuperReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
2170     else
2171       SuperReg = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
2172   }
2173   Ops.push_back(SuperReg);
2174   Ops.push_back(getI32Imm(Lane, dl));
2175   Ops.push_back(Pred);
2176   Ops.push_back(Reg0);
2177   Ops.push_back(Chain);
2178
2179   unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2180                                   QOpcodes[OpcodeIndex]);
2181   SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2182   cast<MachineSDNode>(VLdLn)->setMemRefs(MemOp, MemOp + 1);
2183   if (!IsLoad)
2184     return VLdLn;
2185
2186   // Extract the subregisters.
2187   SuperReg = SDValue(VLdLn, 0);
2188   assert(ARM::dsub_7 == ARM::dsub_0+7 &&
2189          ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering");
2190   unsigned Sub0 = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
2191   for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
2192     ReplaceUses(SDValue(N, Vec),
2193                 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
2194   ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, 1));
2195   if (isUpdating)
2196     ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdLn, 2));
2197   return nullptr;
2198 }
2199
2200 SDNode *ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating,
2201                                       unsigned NumVecs,
2202                                       const uint16_t *Opcodes) {
2203   assert(NumVecs >=2 && NumVecs <= 4 && "VLDDup NumVecs out-of-range");
2204   SDLoc dl(N);
2205
2206   SDValue MemAddr, Align;
2207   if (!SelectAddrMode6(N, N->getOperand(1), MemAddr, Align))
2208     return nullptr;
2209
2210   MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
2211   MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2212
2213   SDValue Chain = N->getOperand(0);
2214   EVT VT = N->getValueType(0);
2215
2216   unsigned Alignment = 0;
2217   if (NumVecs != 3) {
2218     Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
2219     unsigned NumBytes = NumVecs * VT.getVectorElementType().getSizeInBits()/8;
2220     if (Alignment > NumBytes)
2221       Alignment = NumBytes;
2222     if (Alignment < 8 && Alignment < NumBytes)
2223       Alignment = 0;
2224     // Alignment must be a power of two; make sure of that.
2225     Alignment = (Alignment & -Alignment);
2226     if (Alignment == 1)
2227       Alignment = 0;
2228   }
2229   Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
2230
2231   unsigned OpcodeIndex;
2232   switch (VT.getSimpleVT().SimpleTy) {
2233   default: llvm_unreachable("unhandled vld-dup type");
2234   case MVT::v8i8:  OpcodeIndex = 0; break;
2235   case MVT::v4i16: OpcodeIndex = 1; break;
2236   case MVT::v2f32:
2237   case MVT::v2i32: OpcodeIndex = 2; break;
2238   }
2239
2240   SDValue Pred = getAL(CurDAG, dl);
2241   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2242   SDValue SuperReg;
2243   unsigned Opc = Opcodes[OpcodeIndex];
2244   SmallVector<SDValue, 6> Ops;
2245   Ops.push_back(MemAddr);
2246   Ops.push_back(Align);
2247   if (isUpdating) {
2248     // fixed-stride update instructions don't have an explicit writeback
2249     // operand. It's implicit in the opcode itself.
2250     SDValue Inc = N->getOperand(2);
2251     if (!isa<ConstantSDNode>(Inc.getNode()))
2252       Ops.push_back(Inc);
2253     // FIXME: VLD3 and VLD4 haven't been updated to that form yet.
2254     else if (NumVecs > 2)
2255       Ops.push_back(Reg0);
2256   }
2257   Ops.push_back(Pred);
2258   Ops.push_back(Reg0);
2259   Ops.push_back(Chain);
2260
2261   unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2262   std::vector<EVT> ResTys;
2263   ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(), MVT::i64,ResTyElts));
2264   if (isUpdating)
2265     ResTys.push_back(MVT::i32);
2266   ResTys.push_back(MVT::Other);
2267   SDNode *VLdDup = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2268   cast<MachineSDNode>(VLdDup)->setMemRefs(MemOp, MemOp + 1);
2269   SuperReg = SDValue(VLdDup, 0);
2270
2271   // Extract the subregisters.
2272   assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
2273   unsigned SubIdx = ARM::dsub_0;
2274   for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
2275     ReplaceUses(SDValue(N, Vec),
2276                 CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, SuperReg));
2277   ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1));
2278   if (isUpdating)
2279     ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdDup, 2));
2280   return nullptr;
2281 }
2282
2283 SDNode *ARMDAGToDAGISel::SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs,
2284                                     unsigned Opc) {
2285   assert(NumVecs >= 2 && NumVecs <= 4 && "VTBL NumVecs out-of-range");
2286   SDLoc dl(N);
2287   EVT VT = N->getValueType(0);
2288   unsigned FirstTblReg = IsExt ? 2 : 1;
2289
2290   // Form a REG_SEQUENCE to force register allocation.
2291   SDValue RegSeq;
2292   SDValue V0 = N->getOperand(FirstTblReg + 0);
2293   SDValue V1 = N->getOperand(FirstTblReg + 1);
2294   if (NumVecs == 2)
2295     RegSeq = SDValue(createDRegPairNode(MVT::v16i8, V0, V1), 0);
2296   else {
2297     SDValue V2 = N->getOperand(FirstTblReg + 2);
2298     // If it's a vtbl3, form a quad D-register and leave the last part as
2299     // an undef.
2300     SDValue V3 = (NumVecs == 3)
2301       ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2302       : N->getOperand(FirstTblReg + 3);
2303     RegSeq = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
2304   }
2305
2306   SmallVector<SDValue, 6> Ops;
2307   if (IsExt)
2308     Ops.push_back(N->getOperand(1));
2309   Ops.push_back(RegSeq);
2310   Ops.push_back(N->getOperand(FirstTblReg + NumVecs));
2311   Ops.push_back(getAL(CurDAG, dl)); // predicate
2312   Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // predicate register
2313   return CurDAG->getMachineNode(Opc, dl, VT, Ops);
2314 }
2315
2316 SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDNode *N,
2317                                                      bool isSigned) {
2318   if (!Subtarget->hasV6T2Ops())
2319     return nullptr;
2320
2321   unsigned Opc = isSigned
2322     ? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX)
2323     : (Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX);
2324   SDLoc dl(N);
2325
2326   // For unsigned extracts, check for a shift right and mask
2327   unsigned And_imm = 0;
2328   if (N->getOpcode() == ISD::AND) {
2329     if (isOpcWithIntImmediate(N, ISD::AND, And_imm)) {
2330
2331       // The immediate is a mask of the low bits iff imm & (imm+1) == 0
2332       if (And_imm & (And_imm + 1))
2333         return nullptr;
2334
2335       unsigned Srl_imm = 0;
2336       if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL,
2337                                 Srl_imm)) {
2338         assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
2339
2340         // Note: The width operand is encoded as width-1.
2341         unsigned Width = countTrailingOnes(And_imm) - 1;
2342         unsigned LSB = Srl_imm;
2343
2344         SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2345
2346         if ((LSB + Width + 1) == N->getValueType(0).getSizeInBits()) {
2347           // It's cheaper to use a right shift to extract the top bits.
2348           if (Subtarget->isThumb()) {
2349             Opc = isSigned ? ARM::t2ASRri : ARM::t2LSRri;
2350             SDValue Ops[] = { N->getOperand(0).getOperand(0),
2351                               CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2352                               getAL(CurDAG, dl), Reg0, Reg0 };
2353             return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2354           }
2355
2356           // ARM models shift instructions as MOVsi with shifter operand.
2357           ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(ISD::SRL);
2358           SDValue ShOpc =
2359             CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, LSB), dl,
2360                                       MVT::i32);
2361           SDValue Ops[] = { N->getOperand(0).getOperand(0), ShOpc,
2362                             getAL(CurDAG, dl), Reg0, Reg0 };
2363           return CurDAG->SelectNodeTo(N, ARM::MOVsi, MVT::i32, Ops);
2364         }
2365
2366         SDValue Ops[] = { N->getOperand(0).getOperand(0),
2367                           CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2368                           CurDAG->getTargetConstant(Width, dl, MVT::i32),
2369                           getAL(CurDAG, dl), Reg0 };
2370         return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2371       }
2372     }
2373     return nullptr;
2374   }
2375
2376   // Otherwise, we're looking for a shift of a shift
2377   unsigned Shl_imm = 0;
2378   if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) {
2379     assert(Shl_imm > 0 && Shl_imm < 32 && "bad amount in shift node!");
2380     unsigned Srl_imm = 0;
2381     if (isInt32Immediate(N->getOperand(1), Srl_imm)) {
2382       assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
2383       // Note: The width operand is encoded as width-1.
2384       unsigned Width = 32 - Srl_imm - 1;
2385       int LSB = Srl_imm - Shl_imm;
2386       if (LSB < 0)
2387         return nullptr;
2388       SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2389       SDValue Ops[] = { N->getOperand(0).getOperand(0),
2390                         CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2391                         CurDAG->getTargetConstant(Width, dl, MVT::i32),
2392                         getAL(CurDAG, dl), Reg0 };
2393       return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2394     }
2395   }
2396
2397   if (N->getOpcode() == ISD::SIGN_EXTEND_INREG) {
2398     unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
2399     unsigned LSB = 0;
2400     if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL, LSB) &&
2401         !isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRA, LSB))
2402       return nullptr;
2403
2404     if (LSB + Width > 32)
2405       return nullptr;
2406
2407     SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2408     SDValue Ops[] = { N->getOperand(0).getOperand(0),
2409                       CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2410                       CurDAG->getTargetConstant(Width - 1, dl, MVT::i32),
2411                       getAL(CurDAG, dl), Reg0 };
2412     return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2413   }
2414
2415   return nullptr;
2416 }
2417
2418 /// Target-specific DAG combining for ISD::XOR.
2419 /// Target-independent combining lowers SELECT_CC nodes of the form
2420 /// select_cc setg[ge] X,  0,  X, -X
2421 /// select_cc setgt    X, -1,  X, -X
2422 /// select_cc setl[te] X,  0, -X,  X
2423 /// select_cc setlt    X,  1, -X,  X
2424 /// which represent Integer ABS into:
2425 /// Y = sra (X, size(X)-1); xor (add (X, Y), Y)
2426 /// ARM instruction selection detects the latter and matches it to
2427 /// ARM::ABS or ARM::t2ABS machine node.
2428 SDNode *ARMDAGToDAGISel::SelectABSOp(SDNode *N){
2429   SDValue XORSrc0 = N->getOperand(0);
2430   SDValue XORSrc1 = N->getOperand(1);
2431   EVT VT = N->getValueType(0);
2432
2433   if (Subtarget->isThumb1Only())
2434     return nullptr;
2435
2436   if (XORSrc0.getOpcode() != ISD::ADD || XORSrc1.getOpcode() != ISD::SRA)
2437     return nullptr;
2438
2439   SDValue ADDSrc0 = XORSrc0.getOperand(0);
2440   SDValue ADDSrc1 = XORSrc0.getOperand(1);
2441   SDValue SRASrc0 = XORSrc1.getOperand(0);
2442   SDValue SRASrc1 = XORSrc1.getOperand(1);
2443   ConstantSDNode *SRAConstant =  dyn_cast<ConstantSDNode>(SRASrc1);
2444   EVT XType = SRASrc0.getValueType();
2445   unsigned Size = XType.getSizeInBits() - 1;
2446
2447   if (ADDSrc1 == XORSrc1 && ADDSrc0 == SRASrc0 &&
2448       XType.isInteger() && SRAConstant != nullptr &&
2449       Size == SRAConstant->getZExtValue()) {
2450     unsigned Opcode = Subtarget->isThumb2() ? ARM::t2ABS : ARM::ABS;
2451     return CurDAG->SelectNodeTo(N, Opcode, VT, ADDSrc0);
2452   }
2453
2454   return nullptr;
2455 }
2456
2457 SDNode *ARMDAGToDAGISel::SelectConcatVector(SDNode *N) {
2458   // The only time a CONCAT_VECTORS operation can have legal types is when
2459   // two 64-bit vectors are concatenated to a 128-bit vector.
2460   EVT VT = N->getValueType(0);
2461   if (!VT.is128BitVector() || N->getNumOperands() != 2)
2462     llvm_unreachable("unexpected CONCAT_VECTORS");
2463   return createDRegPairNode(VT, N->getOperand(0), N->getOperand(1));
2464 }
2465
2466 SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
2467   SDLoc dl(N);
2468
2469   if (N->isMachineOpcode()) {
2470     N->setNodeId(-1);
2471     return nullptr;   // Already selected.
2472   }
2473
2474   switch (N->getOpcode()) {
2475   default: break;
2476   case ISD::WRITE_REGISTER: {
2477     SDNode *ResNode = SelectWriteRegister(N);
2478     if (ResNode)
2479       return ResNode;
2480     break;
2481   }
2482   case ISD::READ_REGISTER: {
2483     SDNode *ResNode = SelectReadRegister(N);
2484     if (ResNode)
2485       return ResNode;
2486     break;
2487   }
2488   case ISD::INLINEASM: {
2489     SDNode *ResNode = SelectInlineAsm(N);
2490     if (ResNode)
2491       return ResNode;
2492     break;
2493   }
2494   case ISD::XOR: {
2495     // Select special operations if XOR node forms integer ABS pattern
2496     SDNode *ResNode = SelectABSOp(N);
2497     if (ResNode)
2498       return ResNode;
2499     // Other cases are autogenerated.
2500     break;
2501   }
2502   case ISD::Constant: {
2503     unsigned Val = cast<ConstantSDNode>(N)->getZExtValue();
2504     // If we can't materialize the constant we need to use a literal pool
2505     if (ConstantMaterializationCost(Val) > 2) {
2506       SDValue CPIdx = CurDAG->getTargetConstantPool(
2507           ConstantInt::get(Type::getInt32Ty(*CurDAG->getContext()), Val),
2508           TLI->getPointerTy(CurDAG->getDataLayout()));
2509
2510       SDNode *ResNode;
2511       if (Subtarget->isThumb()) {
2512         SDValue Pred = getAL(CurDAG, dl);
2513         SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
2514         SDValue Ops[] = { CPIdx, Pred, PredReg, CurDAG->getEntryNode() };
2515         ResNode = CurDAG->getMachineNode(ARM::tLDRpci, dl, MVT::i32, MVT::Other,
2516                                          Ops);
2517       } else {
2518         SDValue Ops[] = {
2519           CPIdx,
2520           CurDAG->getTargetConstant(0, dl, MVT::i32),
2521           getAL(CurDAG, dl),
2522           CurDAG->getRegister(0, MVT::i32),
2523           CurDAG->getEntryNode()
2524         };
2525         ResNode=CurDAG->getMachineNode(ARM::LDRcp, dl, MVT::i32, MVT::Other,
2526                                        Ops);
2527       }
2528       ReplaceUses(SDValue(N, 0), SDValue(ResNode, 0));
2529       return nullptr;
2530     }
2531
2532     // Other cases are autogenerated.
2533     break;
2534   }
2535   case ISD::FrameIndex: {
2536     // Selects to ADDri FI, 0 which in turn will become ADDri SP, imm.
2537     int FI = cast<FrameIndexSDNode>(N)->getIndex();
2538     SDValue TFI = CurDAG->getTargetFrameIndex(
2539         FI, TLI->getPointerTy(CurDAG->getDataLayout()));
2540     if (Subtarget->isThumb1Only()) {
2541       // Set the alignment of the frame object to 4, to avoid having to generate
2542       // more than one ADD
2543       MachineFrameInfo *MFI = MF->getFrameInfo();
2544       if (MFI->getObjectAlignment(FI) < 4)
2545         MFI->setObjectAlignment(FI, 4);
2546       return CurDAG->SelectNodeTo(N, ARM::tADDframe, MVT::i32, TFI,
2547                                   CurDAG->getTargetConstant(0, dl, MVT::i32));
2548     } else {
2549       unsigned Opc = ((Subtarget->isThumb() && Subtarget->hasThumb2()) ?
2550                       ARM::t2ADDri : ARM::ADDri);
2551       SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, dl, MVT::i32),
2552                         getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
2553                         CurDAG->getRegister(0, MVT::i32) };
2554       return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2555     }
2556   }
2557   case ISD::SRL:
2558     if (SDNode *I = SelectV6T2BitfieldExtractOp(N, false))
2559       return I;
2560     break;
2561   case ISD::SIGN_EXTEND_INREG:
2562   case ISD::SRA:
2563     if (SDNode *I = SelectV6T2BitfieldExtractOp(N, true))
2564       return I;
2565     break;
2566   case ISD::MUL:
2567     if (Subtarget->isThumb1Only())
2568       break;
2569     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
2570       unsigned RHSV = C->getZExtValue();
2571       if (!RHSV) break;
2572       if (isPowerOf2_32(RHSV-1)) {  // 2^n+1?
2573         unsigned ShImm = Log2_32(RHSV-1);
2574         if (ShImm >= 32)
2575           break;
2576         SDValue V = N->getOperand(0);
2577         ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
2578         SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
2579         SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2580         if (Subtarget->isThumb()) {
2581           SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
2582           return CurDAG->SelectNodeTo(N, ARM::t2ADDrs, MVT::i32, Ops);
2583         } else {
2584           SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
2585                             Reg0 };
2586           return CurDAG->SelectNodeTo(N, ARM::ADDrsi, MVT::i32, Ops);
2587         }
2588       }
2589       if (isPowerOf2_32(RHSV+1)) {  // 2^n-1?
2590         unsigned ShImm = Log2_32(RHSV+1);
2591         if (ShImm >= 32)
2592           break;
2593         SDValue V = N->getOperand(0);
2594         ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
2595         SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
2596         SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2597         if (Subtarget->isThumb()) {
2598           SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
2599           return CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops);
2600         } else {
2601           SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
2602                             Reg0 };
2603           return CurDAG->SelectNodeTo(N, ARM::RSBrsi, MVT::i32, Ops);
2604         }
2605       }
2606     }
2607     break;
2608   case ISD::AND: {
2609     // Check for unsigned bitfield extract
2610     if (SDNode *I = SelectV6T2BitfieldExtractOp(N, false))
2611       return I;
2612
2613     // (and (or x, c2), c1) and top 16-bits of c1 and c2 match, lower 16-bits
2614     // of c1 are 0xffff, and lower 16-bit of c2 are 0. That is, the top 16-bits
2615     // are entirely contributed by c2 and lower 16-bits are entirely contributed
2616     // by x. That's equal to (or (and x, 0xffff), (and c1, 0xffff0000)).
2617     // Select it to: "movt x, ((c1 & 0xffff) >> 16)
2618     EVT VT = N->getValueType(0);
2619     if (VT != MVT::i32)
2620       break;
2621     unsigned Opc = (Subtarget->isThumb() && Subtarget->hasThumb2())
2622       ? ARM::t2MOVTi16
2623       : (Subtarget->hasV6T2Ops() ? ARM::MOVTi16 : 0);
2624     if (!Opc)
2625       break;
2626     SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
2627     ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2628     if (!N1C)
2629       break;
2630     if (N0.getOpcode() == ISD::OR && N0.getNode()->hasOneUse()) {
2631       SDValue N2 = N0.getOperand(1);
2632       ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
2633       if (!N2C)
2634         break;
2635       unsigned N1CVal = N1C->getZExtValue();
2636       unsigned N2CVal = N2C->getZExtValue();
2637       if ((N1CVal & 0xffff0000U) == (N2CVal & 0xffff0000U) &&
2638           (N1CVal & 0xffffU) == 0xffffU &&
2639           (N2CVal & 0xffffU) == 0x0U) {
2640         SDValue Imm16 = CurDAG->getTargetConstant((N2CVal & 0xFFFF0000U) >> 16,
2641                                                   dl, MVT::i32);
2642         SDValue Ops[] = { N0.getOperand(0), Imm16,
2643                           getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) };
2644         return CurDAG->getMachineNode(Opc, dl, VT, Ops);
2645       }
2646     }
2647     break;
2648   }
2649   case ARMISD::VMOVRRD:
2650     return CurDAG->getMachineNode(ARM::VMOVRRD, dl, MVT::i32, MVT::i32,
2651                                   N->getOperand(0), getAL(CurDAG, dl),
2652                                   CurDAG->getRegister(0, MVT::i32));
2653   case ISD::UMUL_LOHI: {
2654     if (Subtarget->isThumb1Only())
2655       break;
2656     if (Subtarget->isThumb()) {
2657       SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
2658                         getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) };
2659       return CurDAG->getMachineNode(ARM::t2UMULL, dl, MVT::i32, MVT::i32, Ops);
2660     } else {
2661       SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
2662                         getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
2663                         CurDAG->getRegister(0, MVT::i32) };
2664       return CurDAG->getMachineNode(Subtarget->hasV6Ops() ?
2665                                     ARM::UMULL : ARM::UMULLv5,
2666                                     dl, MVT::i32, MVT::i32, Ops);
2667     }
2668   }
2669   case ISD::SMUL_LOHI: {
2670     if (Subtarget->isThumb1Only())
2671       break;
2672     if (Subtarget->isThumb()) {
2673       SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
2674                         getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) };
2675       return CurDAG->getMachineNode(ARM::t2SMULL, dl, MVT::i32, MVT::i32, Ops);
2676     } else {
2677       SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
2678                         getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
2679                         CurDAG->getRegister(0, MVT::i32) };
2680       return CurDAG->getMachineNode(Subtarget->hasV6Ops() ?
2681                                     ARM::SMULL : ARM::SMULLv5,
2682                                     dl, MVT::i32, MVT::i32, Ops);
2683     }
2684   }
2685   case ARMISD::UMLAL:{
2686     if (Subtarget->isThumb()) {
2687       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
2688                         N->getOperand(3), getAL(CurDAG, dl),
2689                         CurDAG->getRegister(0, MVT::i32)};
2690       return CurDAG->getMachineNode(ARM::t2UMLAL, dl, MVT::i32, MVT::i32, Ops);
2691     }else{
2692       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
2693                         N->getOperand(3), getAL(CurDAG, dl),
2694                         CurDAG->getRegister(0, MVT::i32),
2695                         CurDAG->getRegister(0, MVT::i32) };
2696       return CurDAG->getMachineNode(Subtarget->hasV6Ops() ?
2697                                       ARM::UMLAL : ARM::UMLALv5,
2698                                       dl, MVT::i32, MVT::i32, Ops);
2699     }
2700   }
2701   case ARMISD::SMLAL:{
2702     if (Subtarget->isThumb()) {
2703       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
2704                         N->getOperand(3), getAL(CurDAG, dl),
2705                         CurDAG->getRegister(0, MVT::i32)};
2706       return CurDAG->getMachineNode(ARM::t2SMLAL, dl, MVT::i32, MVT::i32, Ops);
2707     }else{
2708       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
2709                         N->getOperand(3), getAL(CurDAG, dl),
2710                         CurDAG->getRegister(0, MVT::i32),
2711                         CurDAG->getRegister(0, MVT::i32) };
2712       return CurDAG->getMachineNode(Subtarget->hasV6Ops() ?
2713                                       ARM::SMLAL : ARM::SMLALv5,
2714                                       dl, MVT::i32, MVT::i32, Ops);
2715     }
2716   }
2717   case ISD::LOAD: {
2718     SDNode *ResNode = nullptr;
2719     if (Subtarget->isThumb() && Subtarget->hasThumb2())
2720       ResNode = SelectT2IndexedLoad(N);
2721     else
2722       ResNode = SelectARMIndexedLoad(N);
2723     if (ResNode)
2724       return ResNode;
2725     // Other cases are autogenerated.
2726     break;
2727   }
2728   case ARMISD::BRCOND: {
2729     // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
2730     // Emits: (Bcc:void (bb:Other):$dst, (imm:i32):$cc)
2731     // Pattern complexity = 6  cost = 1  size = 0
2732
2733     // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
2734     // Emits: (tBcc:void (bb:Other):$dst, (imm:i32):$cc)
2735     // Pattern complexity = 6  cost = 1  size = 0
2736
2737     // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
2738     // Emits: (t2Bcc:void (bb:Other):$dst, (imm:i32):$cc)
2739     // Pattern complexity = 6  cost = 1  size = 0
2740
2741     unsigned Opc = Subtarget->isThumb() ?
2742       ((Subtarget->hasThumb2()) ? ARM::t2Bcc : ARM::tBcc) : ARM::Bcc;
2743     SDValue Chain = N->getOperand(0);
2744     SDValue N1 = N->getOperand(1);
2745     SDValue N2 = N->getOperand(2);
2746     SDValue N3 = N->getOperand(3);
2747     SDValue InFlag = N->getOperand(4);
2748     assert(N1.getOpcode() == ISD::BasicBlock);
2749     assert(N2.getOpcode() == ISD::Constant);
2750     assert(N3.getOpcode() == ISD::Register);
2751
2752     SDValue Tmp2 = CurDAG->getTargetConstant(((unsigned)
2753                                cast<ConstantSDNode>(N2)->getZExtValue()), dl,
2754                                MVT::i32);
2755     SDValue Ops[] = { N1, Tmp2, N3, Chain, InFlag };
2756     SDNode *ResNode = CurDAG->getMachineNode(Opc, dl, MVT::Other,
2757                                              MVT::Glue, Ops);
2758     Chain = SDValue(ResNode, 0);
2759     if (N->getNumValues() == 2) {
2760       InFlag = SDValue(ResNode, 1);
2761       ReplaceUses(SDValue(N, 1), InFlag);
2762     }
2763     ReplaceUses(SDValue(N, 0),
2764                 SDValue(Chain.getNode(), Chain.getResNo()));
2765     return nullptr;
2766   }
2767   case ARMISD::VZIP: {
2768     unsigned Opc = 0;
2769     EVT VT = N->getValueType(0);
2770     switch (VT.getSimpleVT().SimpleTy) {
2771     default: return nullptr;
2772     case MVT::v8i8:  Opc = ARM::VZIPd8; break;
2773     case MVT::v4i16: Opc = ARM::VZIPd16; break;
2774     case MVT::v2f32:
2775     // vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
2776     case MVT::v2i32: Opc = ARM::VTRNd32; break;
2777     case MVT::v16i8: Opc = ARM::VZIPq8; break;
2778     case MVT::v8i16: Opc = ARM::VZIPq16; break;
2779     case MVT::v4f32:
2780     case MVT::v4i32: Opc = ARM::VZIPq32; break;
2781     }
2782     SDValue Pred = getAL(CurDAG, dl);
2783     SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
2784     SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
2785     return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops);
2786   }
2787   case ARMISD::VUZP: {
2788     unsigned Opc = 0;
2789     EVT VT = N->getValueType(0);
2790     switch (VT.getSimpleVT().SimpleTy) {
2791     default: return nullptr;
2792     case MVT::v8i8:  Opc = ARM::VUZPd8; break;
2793     case MVT::v4i16: Opc = ARM::VUZPd16; break;
2794     case MVT::v2f32:
2795     // vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
2796     case MVT::v2i32: Opc = ARM::VTRNd32; break;
2797     case MVT::v16i8: Opc = ARM::VUZPq8; break;
2798     case MVT::v8i16: Opc = ARM::VUZPq16; break;
2799     case MVT::v4f32:
2800     case MVT::v4i32: Opc = ARM::VUZPq32; break;
2801     }
2802     SDValue Pred = getAL(CurDAG, dl);
2803     SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
2804     SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
2805     return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops);
2806   }
2807   case ARMISD::VTRN: {
2808     unsigned Opc = 0;
2809     EVT VT = N->getValueType(0);
2810     switch (VT.getSimpleVT().SimpleTy) {
2811     default: return nullptr;
2812     case MVT::v8i8:  Opc = ARM::VTRNd8; break;
2813     case MVT::v4i16: Opc = ARM::VTRNd16; break;
2814     case MVT::v2f32:
2815     case MVT::v2i32: Opc = ARM::VTRNd32; break;
2816     case MVT::v16i8: Opc = ARM::VTRNq8; break;
2817     case MVT::v8i16: Opc = ARM::VTRNq16; break;
2818     case MVT::v4f32:
2819     case MVT::v4i32: Opc = ARM::VTRNq32; break;
2820     }
2821     SDValue Pred = getAL(CurDAG, dl);
2822     SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
2823     SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
2824     return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops);
2825   }
2826   case ARMISD::BUILD_VECTOR: {
2827     EVT VecVT = N->getValueType(0);
2828     EVT EltVT = VecVT.getVectorElementType();
2829     unsigned NumElts = VecVT.getVectorNumElements();
2830     if (EltVT == MVT::f64) {
2831       assert(NumElts == 2 && "unexpected type for BUILD_VECTOR");
2832       return createDRegPairNode(VecVT, N->getOperand(0), N->getOperand(1));
2833     }
2834     assert(EltVT == MVT::f32 && "unexpected type for BUILD_VECTOR");
2835     if (NumElts == 2)
2836       return createSRegPairNode(VecVT, N->getOperand(0), N->getOperand(1));
2837     assert(NumElts == 4 && "unexpected type for BUILD_VECTOR");
2838     return createQuadSRegsNode(VecVT, N->getOperand(0), N->getOperand(1),
2839                      N->getOperand(2), N->getOperand(3));
2840   }
2841
2842   case ARMISD::VLD2DUP: {
2843     static const uint16_t Opcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
2844                                         ARM::VLD2DUPd32 };
2845     return SelectVLDDup(N, false, 2, Opcodes);
2846   }
2847
2848   case ARMISD::VLD3DUP: {
2849     static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo,
2850                                         ARM::VLD3DUPd16Pseudo,
2851                                         ARM::VLD3DUPd32Pseudo };
2852     return SelectVLDDup(N, false, 3, Opcodes);
2853   }
2854
2855   case ARMISD::VLD4DUP: {
2856     static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo,
2857                                         ARM::VLD4DUPd16Pseudo,
2858                                         ARM::VLD4DUPd32Pseudo };
2859     return SelectVLDDup(N, false, 4, Opcodes);
2860   }
2861
2862   case ARMISD::VLD2DUP_UPD: {
2863     static const uint16_t Opcodes[] = { ARM::VLD2DUPd8wb_fixed,
2864                                         ARM::VLD2DUPd16wb_fixed,
2865                                         ARM::VLD2DUPd32wb_fixed };
2866     return SelectVLDDup(N, true, 2, Opcodes);
2867   }
2868
2869   case ARMISD::VLD3DUP_UPD: {
2870     static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo_UPD,
2871                                         ARM::VLD3DUPd16Pseudo_UPD,
2872                                         ARM::VLD3DUPd32Pseudo_UPD };
2873     return SelectVLDDup(N, true, 3, Opcodes);
2874   }
2875
2876   case ARMISD::VLD4DUP_UPD: {
2877     static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo_UPD,
2878                                         ARM::VLD4DUPd16Pseudo_UPD,
2879                                         ARM::VLD4DUPd32Pseudo_UPD };
2880     return SelectVLDDup(N, true, 4, Opcodes);
2881   }
2882
2883   case ARMISD::VLD1_UPD: {
2884     static const uint16_t DOpcodes[] = { ARM::VLD1d8wb_fixed,
2885                                          ARM::VLD1d16wb_fixed,
2886                                          ARM::VLD1d32wb_fixed,
2887                                          ARM::VLD1d64wb_fixed };
2888     static const uint16_t QOpcodes[] = { ARM::VLD1q8wb_fixed,
2889                                          ARM::VLD1q16wb_fixed,
2890                                          ARM::VLD1q32wb_fixed,
2891                                          ARM::VLD1q64wb_fixed };
2892     return SelectVLD(N, true, 1, DOpcodes, QOpcodes, nullptr);
2893   }
2894
2895   case ARMISD::VLD2_UPD: {
2896     static const uint16_t DOpcodes[] = { ARM::VLD2d8wb_fixed,
2897                                          ARM::VLD2d16wb_fixed,
2898                                          ARM::VLD2d32wb_fixed,
2899                                          ARM::VLD1q64wb_fixed};
2900     static const uint16_t QOpcodes[] = { ARM::VLD2q8PseudoWB_fixed,
2901                                          ARM::VLD2q16PseudoWB_fixed,
2902                                          ARM::VLD2q32PseudoWB_fixed };
2903     return SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr);
2904   }
2905
2906   case ARMISD::VLD3_UPD: {
2907     static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo_UPD,
2908                                          ARM::VLD3d16Pseudo_UPD,
2909                                          ARM::VLD3d32Pseudo_UPD,
2910                                          ARM::VLD1d64TPseudoWB_fixed};
2911     static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
2912                                           ARM::VLD3q16Pseudo_UPD,
2913                                           ARM::VLD3q32Pseudo_UPD };
2914     static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD,
2915                                           ARM::VLD3q16oddPseudo_UPD,
2916                                           ARM::VLD3q32oddPseudo_UPD };
2917     return SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
2918   }
2919
2920   case ARMISD::VLD4_UPD: {
2921     static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo_UPD,
2922                                          ARM::VLD4d16Pseudo_UPD,
2923                                          ARM::VLD4d32Pseudo_UPD,
2924                                          ARM::VLD1d64QPseudoWB_fixed};
2925     static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
2926                                           ARM::VLD4q16Pseudo_UPD,
2927                                           ARM::VLD4q32Pseudo_UPD };
2928     static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo_UPD,
2929                                           ARM::VLD4q16oddPseudo_UPD,
2930                                           ARM::VLD4q32oddPseudo_UPD };
2931     return SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
2932   }
2933
2934   case ARMISD::VLD2LN_UPD: {
2935     static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo_UPD,
2936                                          ARM::VLD2LNd16Pseudo_UPD,
2937                                          ARM::VLD2LNd32Pseudo_UPD };
2938     static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo_UPD,
2939                                          ARM::VLD2LNq32Pseudo_UPD };
2940     return SelectVLDSTLane(N, true, true, 2, DOpcodes, QOpcodes);
2941   }
2942
2943   case ARMISD::VLD3LN_UPD: {
2944     static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo_UPD,
2945                                          ARM::VLD3LNd16Pseudo_UPD,
2946                                          ARM::VLD3LNd32Pseudo_UPD };
2947     static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo_UPD,
2948                                          ARM::VLD3LNq32Pseudo_UPD };
2949     return SelectVLDSTLane(N, true, true, 3, DOpcodes, QOpcodes);
2950   }
2951
2952   case ARMISD::VLD4LN_UPD: {
2953     static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo_UPD,
2954                                          ARM::VLD4LNd16Pseudo_UPD,
2955                                          ARM::VLD4LNd32Pseudo_UPD };
2956     static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo_UPD,
2957                                          ARM::VLD4LNq32Pseudo_UPD };
2958     return SelectVLDSTLane(N, true, true, 4, DOpcodes, QOpcodes);
2959   }
2960
2961   case ARMISD::VST1_UPD: {
2962     static const uint16_t DOpcodes[] = { ARM::VST1d8wb_fixed,
2963                                          ARM::VST1d16wb_fixed,
2964                                          ARM::VST1d32wb_fixed,
2965                                          ARM::VST1d64wb_fixed };
2966     static const uint16_t QOpcodes[] = { ARM::VST1q8wb_fixed,
2967                                          ARM::VST1q16wb_fixed,
2968                                          ARM::VST1q32wb_fixed,
2969                                          ARM::VST1q64wb_fixed };
2970     return SelectVST(N, true, 1, DOpcodes, QOpcodes, nullptr);
2971   }
2972
2973   case ARMISD::VST2_UPD: {
2974     static const uint16_t DOpcodes[] = { ARM::VST2d8wb_fixed,
2975                                          ARM::VST2d16wb_fixed,
2976                                          ARM::VST2d32wb_fixed,
2977                                          ARM::VST1q64wb_fixed};
2978     static const uint16_t QOpcodes[] = { ARM::VST2q8PseudoWB_fixed,
2979                                          ARM::VST2q16PseudoWB_fixed,
2980                                          ARM::VST2q32PseudoWB_fixed };
2981     return SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr);
2982   }
2983
2984   case ARMISD::VST3_UPD: {
2985     static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo_UPD,
2986                                          ARM::VST3d16Pseudo_UPD,
2987                                          ARM::VST3d32Pseudo_UPD,
2988                                          ARM::VST1d64TPseudoWB_fixed};
2989     static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
2990                                           ARM::VST3q16Pseudo_UPD,
2991                                           ARM::VST3q32Pseudo_UPD };
2992     static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD,
2993                                           ARM::VST3q16oddPseudo_UPD,
2994                                           ARM::VST3q32oddPseudo_UPD };
2995     return SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
2996   }
2997
2998   case ARMISD::VST4_UPD: {
2999     static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo_UPD,
3000                                          ARM::VST4d16Pseudo_UPD,
3001                                          ARM::VST4d32Pseudo_UPD,
3002                                          ARM::VST1d64QPseudoWB_fixed};
3003     static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
3004                                           ARM::VST4q16Pseudo_UPD,
3005                                           ARM::VST4q32Pseudo_UPD };
3006     static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo_UPD,
3007                                           ARM::VST4q16oddPseudo_UPD,
3008                                           ARM::VST4q32oddPseudo_UPD };
3009     return SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
3010   }
3011
3012   case ARMISD::VST2LN_UPD: {
3013     static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo_UPD,
3014                                          ARM::VST2LNd16Pseudo_UPD,
3015                                          ARM::VST2LNd32Pseudo_UPD };
3016     static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo_UPD,
3017                                          ARM::VST2LNq32Pseudo_UPD };
3018     return SelectVLDSTLane(N, false, true, 2, DOpcodes, QOpcodes);
3019   }
3020
3021   case ARMISD::VST3LN_UPD: {
3022     static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo_UPD,
3023                                          ARM::VST3LNd16Pseudo_UPD,
3024                                          ARM::VST3LNd32Pseudo_UPD };
3025     static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo_UPD,
3026                                          ARM::VST3LNq32Pseudo_UPD };
3027     return SelectVLDSTLane(N, false, true, 3, DOpcodes, QOpcodes);
3028   }
3029
3030   case ARMISD::VST4LN_UPD: {
3031     static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo_UPD,
3032                                          ARM::VST4LNd16Pseudo_UPD,
3033                                          ARM::VST4LNd32Pseudo_UPD };
3034     static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo_UPD,
3035                                          ARM::VST4LNq32Pseudo_UPD };
3036     return SelectVLDSTLane(N, false, true, 4, DOpcodes, QOpcodes);
3037   }
3038
3039   case ISD::INTRINSIC_VOID:
3040   case ISD::INTRINSIC_W_CHAIN: {
3041     unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
3042     switch (IntNo) {
3043     default:
3044       break;
3045
3046     case Intrinsic::arm_ldaexd:
3047     case Intrinsic::arm_ldrexd: {
3048       SDLoc dl(N);
3049       SDValue Chain = N->getOperand(0);
3050       SDValue MemAddr = N->getOperand(2);
3051       bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2();
3052
3053       bool IsAcquire = IntNo == Intrinsic::arm_ldaexd;
3054       unsigned NewOpc = isThumb ? (IsAcquire ? ARM::t2LDAEXD : ARM::t2LDREXD)
3055                                 : (IsAcquire ? ARM::LDAEXD : ARM::LDREXD);
3056
3057       // arm_ldrexd returns a i64 value in {i32, i32}
3058       std::vector<EVT> ResTys;
3059       if (isThumb) {
3060         ResTys.push_back(MVT::i32);
3061         ResTys.push_back(MVT::i32);
3062       } else
3063         ResTys.push_back(MVT::Untyped);
3064       ResTys.push_back(MVT::Other);
3065
3066       // Place arguments in the right order.
3067       SmallVector<SDValue, 7> Ops;
3068       Ops.push_back(MemAddr);
3069       Ops.push_back(getAL(CurDAG, dl));
3070       Ops.push_back(CurDAG->getRegister(0, MVT::i32));
3071       Ops.push_back(Chain);
3072       SDNode *Ld = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
3073       // Transfer memoperands.
3074       MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
3075       MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
3076       cast<MachineSDNode>(Ld)->setMemRefs(MemOp, MemOp + 1);
3077
3078       // Remap uses.
3079       SDValue OutChain = isThumb ? SDValue(Ld, 2) : SDValue(Ld, 1);
3080       if (!SDValue(N, 0).use_empty()) {
3081         SDValue Result;
3082         if (isThumb)
3083           Result = SDValue(Ld, 0);
3084         else {
3085           SDValue SubRegIdx =
3086             CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);
3087           SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
3088               dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
3089           Result = SDValue(ResNode,0);
3090         }
3091         ReplaceUses(SDValue(N, 0), Result);
3092       }
3093       if (!SDValue(N, 1).use_empty()) {
3094         SDValue Result;
3095         if (isThumb)
3096           Result = SDValue(Ld, 1);
3097         else {
3098           SDValue SubRegIdx =
3099             CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32);
3100           SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
3101               dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
3102           Result = SDValue(ResNode,0);
3103         }
3104         ReplaceUses(SDValue(N, 1), Result);
3105       }
3106       ReplaceUses(SDValue(N, 2), OutChain);
3107       return nullptr;
3108     }
3109     case Intrinsic::arm_stlexd:
3110     case Intrinsic::arm_strexd: {
3111       SDLoc dl(N);
3112       SDValue Chain = N->getOperand(0);
3113       SDValue Val0 = N->getOperand(2);
3114       SDValue Val1 = N->getOperand(3);
3115       SDValue MemAddr = N->getOperand(4);
3116
3117       // Store exclusive double return a i32 value which is the return status
3118       // of the issued store.
3119       const EVT ResTys[] = {MVT::i32, MVT::Other};
3120
3121       bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2();
3122       // Place arguments in the right order.
3123       SmallVector<SDValue, 7> Ops;
3124       if (isThumb) {
3125         Ops.push_back(Val0);
3126         Ops.push_back(Val1);
3127       } else
3128         // arm_strexd uses GPRPair.
3129         Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, Val0, Val1), 0));
3130       Ops.push_back(MemAddr);
3131       Ops.push_back(getAL(CurDAG, dl));
3132       Ops.push_back(CurDAG->getRegister(0, MVT::i32));
3133       Ops.push_back(Chain);
3134
3135       bool IsRelease = IntNo == Intrinsic::arm_stlexd;
3136       unsigned NewOpc = isThumb ? (IsRelease ? ARM::t2STLEXD : ARM::t2STREXD)
3137                                 : (IsRelease ? ARM::STLEXD : ARM::STREXD);
3138
3139       SDNode *St = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
3140       // Transfer memoperands.
3141       MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
3142       MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
3143       cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1);
3144
3145       return St;
3146     }
3147
3148     case Intrinsic::arm_neon_vld1: {
3149       static const uint16_t DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16,
3150                                            ARM::VLD1d32, ARM::VLD1d64 };
3151       static const uint16_t QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
3152                                            ARM::VLD1q32, ARM::VLD1q64};
3153       return SelectVLD(N, false, 1, DOpcodes, QOpcodes, nullptr);
3154     }
3155
3156     case Intrinsic::arm_neon_vld2: {
3157       static const uint16_t DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16,
3158                                            ARM::VLD2d32, ARM::VLD1q64 };
3159       static const uint16_t QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo,
3160                                            ARM::VLD2q32Pseudo };
3161       return SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr);
3162     }
3163
3164     case Intrinsic::arm_neon_vld3: {
3165       static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo,
3166                                            ARM::VLD3d16Pseudo,
3167                                            ARM::VLD3d32Pseudo,
3168                                            ARM::VLD1d64TPseudo };
3169       static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
3170                                             ARM::VLD3q16Pseudo_UPD,
3171                                             ARM::VLD3q32Pseudo_UPD };
3172       static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo,
3173                                             ARM::VLD3q16oddPseudo,
3174                                             ARM::VLD3q32oddPseudo };
3175       return SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
3176     }
3177
3178     case Intrinsic::arm_neon_vld4: {
3179       static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo,
3180                                            ARM::VLD4d16Pseudo,
3181                                            ARM::VLD4d32Pseudo,
3182                                            ARM::VLD1d64QPseudo };
3183       static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
3184                                             ARM::VLD4q16Pseudo_UPD,
3185                                             ARM::VLD4q32Pseudo_UPD };
3186       static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo,
3187                                             ARM::VLD4q16oddPseudo,
3188                                             ARM::VLD4q32oddPseudo };
3189       return SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
3190     }
3191
3192     case Intrinsic::arm_neon_vld2lane: {
3193       static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo,
3194                                            ARM::VLD2LNd16Pseudo,
3195                                            ARM::VLD2LNd32Pseudo };
3196       static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo,
3197                                            ARM::VLD2LNq32Pseudo };
3198       return SelectVLDSTLane(N, true, false, 2, DOpcodes, QOpcodes);
3199     }
3200
3201     case Intrinsic::arm_neon_vld3lane: {
3202       static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo,
3203                                            ARM::VLD3LNd16Pseudo,
3204                                            ARM::VLD3LNd32Pseudo };
3205       static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo,
3206                                            ARM::VLD3LNq32Pseudo };
3207       return SelectVLDSTLane(N, true, false, 3, DOpcodes, QOpcodes);
3208     }
3209
3210     case Intrinsic::arm_neon_vld4lane: {
3211       static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo,
3212                                            ARM::VLD4LNd16Pseudo,
3213                                            ARM::VLD4LNd32Pseudo };
3214       static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo,
3215                                            ARM::VLD4LNq32Pseudo };
3216       return SelectVLDSTLane(N, true, false, 4, DOpcodes, QOpcodes);
3217     }
3218
3219     case Intrinsic::arm_neon_vst1: {
3220       static const uint16_t DOpcodes[] = { ARM::VST1d8, ARM::VST1d16,
3221                                            ARM::VST1d32, ARM::VST1d64 };
3222       static const uint16_t QOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
3223                                            ARM::VST1q32, ARM::VST1q64 };
3224       return SelectVST(N, false, 1, DOpcodes, QOpcodes, nullptr);
3225     }
3226
3227     case Intrinsic::arm_neon_vst2: {
3228       static const uint16_t DOpcodes[] = { ARM::VST2d8, ARM::VST2d16,
3229                                            ARM::VST2d32, ARM::VST1q64 };
3230       static uint16_t QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo,
3231                                      ARM::VST2q32Pseudo };
3232       return SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr);
3233     }
3234
3235     case Intrinsic::arm_neon_vst3: {
3236       static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo,
3237                                            ARM::VST3d16Pseudo,
3238                                            ARM::VST3d32Pseudo,
3239                                            ARM::VST1d64TPseudo };
3240       static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
3241                                             ARM::VST3q16Pseudo_UPD,
3242                                             ARM::VST3q32Pseudo_UPD };
3243       static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo,
3244                                             ARM::VST3q16oddPseudo,
3245                                             ARM::VST3q32oddPseudo };
3246       return SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
3247     }
3248
3249     case Intrinsic::arm_neon_vst4: {
3250       static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo,
3251                                            ARM::VST4d16Pseudo,
3252                                            ARM::VST4d32Pseudo,
3253                                            ARM::VST1d64QPseudo };
3254       static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
3255                                             ARM::VST4q16Pseudo_UPD,
3256                                             ARM::VST4q32Pseudo_UPD };
3257       static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo,
3258                                             ARM::VST4q16oddPseudo,
3259                                             ARM::VST4q32oddPseudo };
3260       return SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
3261     }
3262
3263     case Intrinsic::arm_neon_vst2lane: {
3264       static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo,
3265                                            ARM::VST2LNd16Pseudo,
3266                                            ARM::VST2LNd32Pseudo };
3267       static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo,
3268                                            ARM::VST2LNq32Pseudo };
3269       return SelectVLDSTLane(N, false, false, 2, DOpcodes, QOpcodes);
3270     }
3271
3272     case Intrinsic::arm_neon_vst3lane: {
3273       static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo,
3274                                            ARM::VST3LNd16Pseudo,
3275                                            ARM::VST3LNd32Pseudo };
3276       static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo,
3277                                            ARM::VST3LNq32Pseudo };
3278       return SelectVLDSTLane(N, false, false, 3, DOpcodes, QOpcodes);
3279     }
3280
3281     case Intrinsic::arm_neon_vst4lane: {
3282       static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo,
3283                                            ARM::VST4LNd16Pseudo,
3284                                            ARM::VST4LNd32Pseudo };
3285       static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo,
3286                                            ARM::VST4LNq32Pseudo };
3287       return SelectVLDSTLane(N, false, false, 4, DOpcodes, QOpcodes);
3288     }
3289     }
3290     break;
3291   }
3292
3293   case ISD::INTRINSIC_WO_CHAIN: {
3294     unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
3295     switch (IntNo) {
3296     default:
3297       break;
3298
3299     case Intrinsic::arm_neon_vtbl2:
3300       return SelectVTBL(N, false, 2, ARM::VTBL2);
3301     case Intrinsic::arm_neon_vtbl3:
3302       return SelectVTBL(N, false, 3, ARM::VTBL3Pseudo);
3303     case Intrinsic::arm_neon_vtbl4:
3304       return SelectVTBL(N, false, 4, ARM::VTBL4Pseudo);
3305
3306     case Intrinsic::arm_neon_vtbx2:
3307       return SelectVTBL(N, true, 2, ARM::VTBX2);
3308     case Intrinsic::arm_neon_vtbx3:
3309       return SelectVTBL(N, true, 3, ARM::VTBX3Pseudo);
3310     case Intrinsic::arm_neon_vtbx4:
3311       return SelectVTBL(N, true, 4, ARM::VTBX4Pseudo);
3312     }
3313     break;
3314   }
3315
3316   case ARMISD::VTBL1: {
3317     SDLoc dl(N);
3318     EVT VT = N->getValueType(0);
3319     SmallVector<SDValue, 6> Ops;
3320
3321     Ops.push_back(N->getOperand(0));
3322     Ops.push_back(N->getOperand(1));
3323     Ops.push_back(getAL(CurDAG, dl));                // Predicate
3324     Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // Predicate Register
3325     return CurDAG->getMachineNode(ARM::VTBL1, dl, VT, Ops);
3326   }
3327   case ARMISD::VTBL2: {
3328     SDLoc dl(N);
3329     EVT VT = N->getValueType(0);
3330
3331     // Form a REG_SEQUENCE to force register allocation.
3332     SDValue V0 = N->getOperand(0);
3333     SDValue V1 = N->getOperand(1);
3334     SDValue RegSeq = SDValue(createDRegPairNode(MVT::v16i8, V0, V1), 0);
3335
3336     SmallVector<SDValue, 6> Ops;
3337     Ops.push_back(RegSeq);
3338     Ops.push_back(N->getOperand(2));
3339     Ops.push_back(getAL(CurDAG, dl));                // Predicate
3340     Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // Predicate Register
3341     return CurDAG->getMachineNode(ARM::VTBL2, dl, VT, Ops);
3342   }
3343
3344   case ISD::CONCAT_VECTORS:
3345     return SelectConcatVector(N);
3346   }
3347
3348   return SelectCode(N);
3349 }
3350
3351 // Inspect a register string of the form
3352 // cp<coprocessor>:<opc1>:c<CRn>:c<CRm>:<opc2> (32bit) or
3353 // cp<coprocessor>:<opc1>:c<CRm> (64bit) inspect the fields of the string
3354 // and obtain the integer operands from them, adding these operands to the
3355 // provided vector.
3356 static void getIntOperandsFromRegisterString(StringRef RegString,
3357                                              SelectionDAG *CurDAG, SDLoc DL,
3358                                              std::vector<SDValue>& Ops) {
3359   SmallVector<StringRef, 5> Fields;
3360   RegString.split(Fields, ':');
3361
3362   if (Fields.size() > 1) {
3363     bool AllIntFields = true;
3364
3365     for (StringRef Field : Fields) {
3366       // Need to trim out leading 'cp' characters and get the integer field.
3367       unsigned IntField;
3368       AllIntFields &= !Field.trim("CPcp").getAsInteger(10, IntField);
3369       Ops.push_back(CurDAG->getTargetConstant(IntField, DL, MVT::i32));
3370     }
3371
3372     assert(AllIntFields &&
3373             "Unexpected non-integer value in special register string.");
3374   }
3375 }
3376
3377 // Maps a Banked Register string to its mask value. The mask value returned is
3378 // for use in the MRSbanked / MSRbanked instruction nodes as the Banked Register
3379 // mask operand, which expresses which register is to be used, e.g. r8, and in
3380 // which mode it is to be used, e.g. usr. Returns -1 to signify that the string
3381 // was invalid.
3382 static inline int getBankedRegisterMask(StringRef RegString) {
3383   return StringSwitch<int>(RegString.lower())
3384           .Case("r8_usr", 0x00)
3385           .Case("r9_usr", 0x01)
3386           .Case("r10_usr", 0x02)
3387           .Case("r11_usr", 0x03)
3388           .Case("r12_usr", 0x04)
3389           .Case("sp_usr", 0x05)
3390           .Case("lr_usr", 0x06)
3391           .Case("r8_fiq", 0x08)
3392           .Case("r9_fiq", 0x09)
3393           .Case("r10_fiq", 0x0a)
3394           .Case("r11_fiq", 0x0b)
3395           .Case("r12_fiq", 0x0c)
3396           .Case("sp_fiq", 0x0d)
3397           .Case("lr_fiq", 0x0e)
3398           .Case("lr_irq", 0x10)
3399           .Case("sp_irq", 0x11)
3400           .Case("lr_svc", 0x12)
3401           .Case("sp_svc", 0x13)
3402           .Case("lr_abt", 0x14)
3403           .Case("sp_abt", 0x15)
3404           .Case("lr_und", 0x16)
3405           .Case("sp_und", 0x17)
3406           .Case("lr_mon", 0x1c)
3407           .Case("sp_mon", 0x1d)
3408           .Case("elr_hyp", 0x1e)
3409           .Case("sp_hyp", 0x1f)
3410           .Case("spsr_fiq", 0x2e)
3411           .Case("spsr_irq", 0x30)
3412           .Case("spsr_svc", 0x32)
3413           .Case("spsr_abt", 0x34)
3414           .Case("spsr_und", 0x36)
3415           .Case("spsr_mon", 0x3c)
3416           .Case("spsr_hyp", 0x3e)
3417           .Default(-1);
3418 }
3419
3420 // Maps a MClass special register string to its value for use in the
3421 // t2MRS_M / t2MSR_M instruction nodes as the SYSm value operand.
3422 // Returns -1 to signify that the string was invalid.
3423 static inline int getMClassRegisterSYSmValueMask(StringRef RegString) {
3424   return StringSwitch<int>(RegString.lower())
3425           .Case("apsr", 0x0)
3426           .Case("iapsr", 0x1)
3427           .Case("eapsr", 0x2)
3428           .Case("xpsr", 0x3)
3429           .Case("ipsr", 0x5)
3430           .Case("epsr", 0x6)
3431           .Case("iepsr", 0x7)
3432           .Case("msp", 0x8)
3433           .Case("psp", 0x9)
3434           .Case("primask", 0x10)
3435           .Case("basepri", 0x11)
3436           .Case("basepri_max", 0x12)
3437           .Case("faultmask", 0x13)
3438           .Case("control", 0x14)
3439           .Default(-1);
3440 }
3441
3442 // The flags here are common to those allowed for apsr in the A class cores and
3443 // those allowed for the special registers in the M class cores. Returns a
3444 // value representing which flags were present, -1 if invalid.
3445 static inline int getMClassFlagsMask(StringRef Flags, bool hasDSP) {
3446   if (Flags.empty())
3447     return 0x2 | (int)hasDSP;
3448
3449   return StringSwitch<int>(Flags)
3450           .Case("g", 0x1)
3451           .Case("nzcvq", 0x2)
3452           .Case("nzcvqg", 0x3)
3453           .Default(-1);
3454 }
3455
3456 static int getMClassRegisterMask(StringRef Reg, StringRef Flags, bool IsRead,
3457                                  const ARMSubtarget *Subtarget) {
3458   // Ensure that the register (without flags) was a valid M Class special
3459   // register.
3460   int SYSmvalue = getMClassRegisterSYSmValueMask(Reg);
3461   if (SYSmvalue == -1)
3462     return -1;
3463
3464   // basepri, basepri_max and faultmask are only valid for V7m.
3465   if (!Subtarget->hasV7Ops() && SYSmvalue >= 0x11 && SYSmvalue <= 0x13)
3466     return -1;
3467
3468   // If it was a read then we won't be expecting flags and so at this point
3469   // we can return the mask.
3470   if (IsRead) {
3471     assert (Flags.empty() && "Unexpected flags for reading M class register.");
3472     return SYSmvalue;
3473   }
3474
3475   // We know we are now handling a write so need to get the mask for the flags.
3476   int Mask = getMClassFlagsMask(Flags, Subtarget->hasDSP());
3477
3478   // Only apsr, iapsr, eapsr, xpsr can have flags. The other register values
3479   // shouldn't have flags present.
3480   if ((SYSmvalue < 0x4 && Mask == -1) || (SYSmvalue > 0x4 && !Flags.empty()))
3481     return -1;
3482
3483   // The _g and _nzcvqg versions are only valid if the DSP extension is
3484   // available.
3485   if (!Subtarget->hasDSP() && (Mask & 0x1))
3486     return -1;
3487
3488   // The register was valid so need to put the mask in the correct place
3489   // (the flags need to be in bits 11-10) and combine with the SYSmvalue to
3490   // construct the operand for the instruction node.
3491   if (SYSmvalue < 0x4)
3492     return SYSmvalue | Mask << 10;
3493
3494   return SYSmvalue;
3495 }
3496
3497 static int getARClassRegisterMask(StringRef Reg, StringRef Flags) {
3498   // The mask operand contains the special register (R Bit) in bit 4, whether
3499   // the register is spsr (R bit is 1) or one of cpsr/apsr (R bit is 0), and
3500   // bits 3-0 contains the fields to be accessed in the special register, set by
3501   // the flags provided with the register.
3502   int Mask = 0;
3503   if (Reg == "apsr") {
3504     // The flags permitted for apsr are the same flags that are allowed in
3505     // M class registers. We get the flag value and then shift the flags into
3506     // the correct place to combine with the mask.
3507     Mask = getMClassFlagsMask(Flags, true);
3508     if (Mask == -1)
3509       return -1;
3510     return Mask << 2;
3511   }
3512
3513   if (Reg != "cpsr" && Reg != "spsr") {
3514     return -1;
3515   }
3516
3517   // This is the same as if the flags were "fc"
3518   if (Flags.empty() || Flags == "all")
3519     return Mask | 0x9;
3520
3521   // Inspect the supplied flags string and set the bits in the mask for
3522   // the relevant and valid flags allowed for cpsr and spsr.
3523   for (char Flag : Flags) {
3524     int FlagVal;
3525     switch (Flag) {
3526       case 'c':
3527         FlagVal = 0x1;
3528         break;
3529       case 'x':
3530         FlagVal = 0x2;
3531         break;
3532       case 's':
3533         FlagVal = 0x4;
3534         break;
3535       case 'f':
3536         FlagVal = 0x8;
3537         break;
3538       default:
3539         FlagVal = 0;
3540     }
3541
3542     // This avoids allowing strings where the same flag bit appears twice.
3543     if (!FlagVal || (Mask & FlagVal))
3544       return -1;
3545     Mask |= FlagVal;
3546   }
3547
3548   // If the register is spsr then we need to set the R bit.
3549   if (Reg == "spsr")
3550     Mask |= 0x10;
3551
3552   return Mask;
3553 }
3554
3555 // Lower the read_register intrinsic to ARM specific DAG nodes
3556 // using the supplied metadata string to select the instruction node to use
3557 // and the registers/masks to construct as operands for the node.
3558 SDNode *ARMDAGToDAGISel::SelectReadRegister(SDNode *N){
3559   const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
3560   const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
3561   bool IsThumb2 = Subtarget->isThumb2();
3562   SDLoc DL(N);
3563
3564   std::vector<SDValue> Ops;
3565   getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops);
3566
3567   if (!Ops.empty()) {
3568     // If the special register string was constructed of fields (as defined
3569     // in the ACLE) then need to lower to MRC node (32 bit) or
3570     // MRRC node(64 bit), we can make the distinction based on the number of
3571     // operands we have.
3572     unsigned Opcode;
3573     SmallVector<EVT, 3> ResTypes;
3574     if (Ops.size() == 5){
3575       Opcode = IsThumb2 ? ARM::t2MRC : ARM::MRC;
3576       ResTypes.append({ MVT::i32, MVT::Other });
3577     } else {
3578       assert(Ops.size() == 3 &&
3579               "Invalid number of fields in special register string.");
3580       Opcode = IsThumb2 ? ARM::t2MRRC : ARM::MRRC;
3581       ResTypes.append({ MVT::i32, MVT::i32, MVT::Other });
3582     }
3583
3584     Ops.push_back(getAL(CurDAG, DL));
3585     Ops.push_back(CurDAG->getRegister(0, MVT::i32));
3586     Ops.push_back(N->getOperand(0));
3587     return CurDAG->getMachineNode(Opcode, DL, ResTypes, Ops);
3588   }
3589
3590   std::string SpecialReg = RegString->getString().lower();
3591
3592   int BankedReg = getBankedRegisterMask(SpecialReg);
3593   if (BankedReg != -1) {
3594     Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32),
3595             getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
3596             N->getOperand(0) };
3597     return CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSbanked : ARM::MRSbanked,
3598                                   DL, MVT::i32, MVT::Other, Ops);
3599   }
3600
3601   // The VFP registers are read by creating SelectionDAG nodes with opcodes
3602   // corresponding to the register that is being read from. So we switch on the
3603   // string to find which opcode we need to use.
3604   unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
3605                     .Case("fpscr", ARM::VMRS)
3606                     .Case("fpexc", ARM::VMRS_FPEXC)
3607                     .Case("fpsid", ARM::VMRS_FPSID)
3608                     .Case("mvfr0", ARM::VMRS_MVFR0)
3609                     .Case("mvfr1", ARM::VMRS_MVFR1)
3610                     .Case("mvfr2", ARM::VMRS_MVFR2)
3611                     .Case("fpinst", ARM::VMRS_FPINST)
3612                     .Case("fpinst2", ARM::VMRS_FPINST2)
3613                     .Default(0);
3614
3615   // If an opcode was found then we can lower the read to a VFP instruction.
3616   if (Opcode) {
3617     if (!Subtarget->hasVFP2())
3618       return nullptr;
3619     if (Opcode == ARM::VMRS_MVFR2 && !Subtarget->hasFPARMv8())
3620       return nullptr;
3621
3622     Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
3623             N->getOperand(0) };
3624     return CurDAG->getMachineNode(Opcode, DL, MVT::i32, MVT::Other, Ops);
3625   }
3626
3627   // If the target is M Class then need to validate that the register string
3628   // is an acceptable value, so check that a mask can be constructed from the
3629   // string.
3630   if (Subtarget->isMClass()) {
3631     int SYSmValue = getMClassRegisterMask(SpecialReg, "", true, Subtarget);
3632     if (SYSmValue == -1)
3633       return nullptr;
3634
3635     SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32),
3636                       getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
3637                       N->getOperand(0) };
3638     return CurDAG->getMachineNode(ARM::t2MRS_M, DL, MVT::i32, MVT::Other, Ops);
3639   }
3640
3641   // Here we know the target is not M Class so we need to check if it is one
3642   // of the remaining possible values which are apsr, cpsr or spsr.
3643   if (SpecialReg == "apsr" || SpecialReg == "cpsr") {
3644     Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
3645             N->getOperand(0) };
3646     return CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRS_AR : ARM::MRS, DL,
3647                                   MVT::i32, MVT::Other, Ops);
3648   }
3649
3650   if (SpecialReg == "spsr") {
3651     Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
3652             N->getOperand(0) };
3653     return CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSsys_AR : ARM::MRSsys,
3654                                   DL, MVT::i32, MVT::Other, Ops);
3655   }
3656
3657   return nullptr;
3658 }
3659
3660 // Lower the write_register intrinsic to ARM specific DAG nodes
3661 // using the supplied metadata string to select the instruction node to use
3662 // and the registers/masks to use in the nodes
3663 SDNode *ARMDAGToDAGISel::SelectWriteRegister(SDNode *N){
3664   const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
3665   const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
3666   bool IsThumb2 = Subtarget->isThumb2();
3667   SDLoc DL(N);
3668
3669   std::vector<SDValue> Ops;
3670   getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops);
3671
3672   if (!Ops.empty()) {
3673     // If the special register string was constructed of fields (as defined
3674     // in the ACLE) then need to lower to MCR node (32 bit) or
3675     // MCRR node(64 bit), we can make the distinction based on the number of
3676     // operands we have.
3677     unsigned Opcode;
3678     if (Ops.size() == 5) {
3679       Opcode = IsThumb2 ? ARM::t2MCR : ARM::MCR;
3680       Ops.insert(Ops.begin()+2, N->getOperand(2));
3681     } else {
3682       assert(Ops.size() == 3 &&
3683               "Invalid number of fields in special register string.");
3684       Opcode = IsThumb2 ? ARM::t2MCRR : ARM::MCRR;
3685       SDValue WriteValue[] = { N->getOperand(2), N->getOperand(3) };
3686       Ops.insert(Ops.begin()+2, WriteValue, WriteValue+2);
3687     }
3688
3689     Ops.push_back(getAL(CurDAG, DL));
3690     Ops.push_back(CurDAG->getRegister(0, MVT::i32));
3691     Ops.push_back(N->getOperand(0));
3692
3693     return CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops);
3694   }
3695
3696   std::string SpecialReg = RegString->getString().lower();
3697   int BankedReg = getBankedRegisterMask(SpecialReg);
3698   if (BankedReg != -1) {
3699     Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32), N->getOperand(2),
3700             getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
3701             N->getOperand(0) };
3702     return CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSRbanked : ARM::MSRbanked,
3703                                   DL, MVT::Other, Ops);
3704   }
3705
3706   // The VFP registers are written to by creating SelectionDAG nodes with
3707   // opcodes corresponding to the register that is being written. So we switch
3708   // on the string to find which opcode we need to use.
3709   unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
3710                     .Case("fpscr", ARM::VMSR)
3711                     .Case("fpexc", ARM::VMSR_FPEXC)
3712                     .Case("fpsid", ARM::VMSR_FPSID)
3713                     .Case("fpinst", ARM::VMSR_FPINST)
3714                     .Case("fpinst2", ARM::VMSR_FPINST2)
3715                     .Default(0);
3716
3717   if (Opcode) {
3718     if (!Subtarget->hasVFP2())
3719       return nullptr;
3720     Ops = { N->getOperand(2), getAL(CurDAG, DL),
3721             CurDAG->getRegister(0, MVT::i32), N->getOperand(0) };
3722     return CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops);
3723   }
3724
3725   SmallVector<StringRef, 5> Fields;
3726   StringRef(SpecialReg).split(Fields, '_', 1, false);
3727   std::string Reg = Fields[0].str();
3728   StringRef Flags = Fields.size() == 2 ? Fields[1] : "";
3729
3730   // If the target was M Class then need to validate the special register value
3731   // and retrieve the mask for use in the instruction node.
3732   if (Subtarget->isMClass()) {
3733     // basepri_max gets split so need to correct Reg and Flags.
3734     if (SpecialReg == "basepri_max") {
3735       Reg = SpecialReg;
3736       Flags = "";
3737     }
3738     int SYSmValue = getMClassRegisterMask(Reg, Flags, false, Subtarget);
3739     if (SYSmValue == -1)
3740       return nullptr;
3741
3742     SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32),
3743                       N->getOperand(2), getAL(CurDAG, DL),
3744                       CurDAG->getRegister(0, MVT::i32), N->getOperand(0) };
3745     return CurDAG->getMachineNode(ARM::t2MSR_M, DL, MVT::Other, Ops);
3746   }
3747
3748   // We then check to see if a valid mask can be constructed for one of the
3749   // register string values permitted for the A and R class cores. These values
3750   // are apsr, spsr and cpsr; these are also valid on older cores.
3751   int Mask = getARClassRegisterMask(Reg, Flags);
3752   if (Mask != -1) {
3753     Ops = { CurDAG->getTargetConstant(Mask, DL, MVT::i32), N->getOperand(2),
3754             getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
3755             N->getOperand(0) };
3756     return CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSR_AR : ARM::MSR,
3757                                   DL, MVT::Other, Ops);
3758   }
3759
3760   return nullptr;
3761 }
3762
3763 SDNode *ARMDAGToDAGISel::SelectInlineAsm(SDNode *N){
3764   std::vector<SDValue> AsmNodeOperands;
3765   unsigned Flag, Kind;
3766   bool Changed = false;
3767   unsigned NumOps = N->getNumOperands();
3768
3769   // Normally, i64 data is bounded to two arbitrary GRPs for "%r" constraint.
3770   // However, some instrstions (e.g. ldrexd/strexd in ARM mode) require
3771   // (even/even+1) GPRs and use %n and %Hn to refer to the individual regs
3772   // respectively. Since there is no constraint to explicitly specify a
3773   // reg pair, we use GPRPair reg class for "%r" for 64-bit data. For Thumb,
3774   // the 64-bit data may be referred by H, Q, R modifiers, so we still pack
3775   // them into a GPRPair.
3776
3777   SDLoc dl(N);
3778   SDValue Glue = N->getGluedNode() ? N->getOperand(NumOps-1)
3779                                    : SDValue(nullptr,0);
3780
3781   SmallVector<bool, 8> OpChanged;
3782   // Glue node will be appended late.
3783   for(unsigned i = 0, e = N->getGluedNode() ? NumOps - 1 : NumOps; i < e; ++i) {
3784     SDValue op = N->getOperand(i);
3785     AsmNodeOperands.push_back(op);
3786
3787     if (i < InlineAsm::Op_FirstOperand)
3788       continue;
3789
3790     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(i))) {
3791       Flag = C->getZExtValue();
3792       Kind = InlineAsm::getKind(Flag);
3793     }
3794     else
3795       continue;
3796
3797     // Immediate operands to inline asm in the SelectionDAG are modeled with
3798     // two operands. The first is a constant of value InlineAsm::Kind_Imm, and
3799     // the second is a constant with the value of the immediate. If we get here
3800     // and we have a Kind_Imm, skip the next operand, and continue.
3801     if (Kind == InlineAsm::Kind_Imm) {
3802       SDValue op = N->getOperand(++i);
3803       AsmNodeOperands.push_back(op);
3804       continue;
3805     }
3806
3807     unsigned NumRegs = InlineAsm::getNumOperandRegisters(Flag);
3808     if (NumRegs)
3809       OpChanged.push_back(false);
3810
3811     unsigned DefIdx = 0;
3812     bool IsTiedToChangedOp = false;
3813     // If it's a use that is tied with a previous def, it has no
3814     // reg class constraint.
3815     if (Changed && InlineAsm::isUseOperandTiedToDef(Flag, DefIdx))
3816       IsTiedToChangedOp = OpChanged[DefIdx];
3817
3818     if (Kind != InlineAsm::Kind_RegUse && Kind != InlineAsm::Kind_RegDef
3819         && Kind != InlineAsm::Kind_RegDefEarlyClobber)
3820       continue;
3821
3822     unsigned RC;
3823     bool HasRC = InlineAsm::hasRegClassConstraint(Flag, RC);
3824     if ((!IsTiedToChangedOp && (!HasRC || RC != ARM::GPRRegClassID))
3825         || NumRegs != 2)
3826       continue;
3827
3828     assert((i+2 < NumOps) && "Invalid number of operands in inline asm");
3829     SDValue V0 = N->getOperand(i+1);
3830     SDValue V1 = N->getOperand(i+2);
3831     unsigned Reg0 = cast<RegisterSDNode>(V0)->getReg();
3832     unsigned Reg1 = cast<RegisterSDNode>(V1)->getReg();
3833     SDValue PairedReg;
3834     MachineRegisterInfo &MRI = MF->getRegInfo();
3835
3836     if (Kind == InlineAsm::Kind_RegDef ||
3837         Kind == InlineAsm::Kind_RegDefEarlyClobber) {
3838       // Replace the two GPRs with 1 GPRPair and copy values from GPRPair to
3839       // the original GPRs.
3840
3841       unsigned GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
3842       PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
3843       SDValue Chain = SDValue(N,0);
3844
3845       SDNode *GU = N->getGluedUser();
3846       SDValue RegCopy = CurDAG->getCopyFromReg(Chain, dl, GPVR, MVT::Untyped,
3847                                                Chain.getValue(1));
3848
3849       // Extract values from a GPRPair reg and copy to the original GPR reg.
3850       SDValue Sub0 = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32,
3851                                                     RegCopy);
3852       SDValue Sub1 = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32,
3853                                                     RegCopy);
3854       SDValue T0 = CurDAG->getCopyToReg(Sub0, dl, Reg0, Sub0,
3855                                         RegCopy.getValue(1));
3856       SDValue T1 = CurDAG->getCopyToReg(Sub1, dl, Reg1, Sub1, T0.getValue(1));
3857
3858       // Update the original glue user.
3859       std::vector<SDValue> Ops(GU->op_begin(), GU->op_end()-1);
3860       Ops.push_back(T1.getValue(1));
3861       CurDAG->UpdateNodeOperands(GU, Ops);
3862     }
3863     else {
3864       // For Kind  == InlineAsm::Kind_RegUse, we first copy two GPRs into a
3865       // GPRPair and then pass the GPRPair to the inline asm.
3866       SDValue Chain = AsmNodeOperands[InlineAsm::Op_InputChain];
3867
3868       // As REG_SEQ doesn't take RegisterSDNode, we copy them first.
3869       SDValue T0 = CurDAG->getCopyFromReg(Chain, dl, Reg0, MVT::i32,
3870                                           Chain.getValue(1));
3871       SDValue T1 = CurDAG->getCopyFromReg(Chain, dl, Reg1, MVT::i32,
3872                                           T0.getValue(1));
3873       SDValue Pair = SDValue(createGPRPairNode(MVT::Untyped, T0, T1), 0);
3874
3875       // Copy REG_SEQ into a GPRPair-typed VR and replace the original two
3876       // i32 VRs of inline asm with it.
3877       unsigned GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
3878       PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
3879       Chain = CurDAG->getCopyToReg(T1, dl, GPVR, Pair, T1.getValue(1));
3880
3881       AsmNodeOperands[InlineAsm::Op_InputChain] = Chain;
3882       Glue = Chain.getValue(1);
3883     }
3884
3885     Changed = true;
3886
3887     if(PairedReg.getNode()) {
3888       OpChanged[OpChanged.size() -1 ] = true;
3889       Flag = InlineAsm::getFlagWord(Kind, 1 /* RegNum*/);
3890       if (IsTiedToChangedOp)
3891         Flag = InlineAsm::getFlagWordForMatchingOp(Flag, DefIdx);
3892       else
3893         Flag = InlineAsm::getFlagWordForRegClass(Flag, ARM::GPRPairRegClassID);
3894       // Replace the current flag.
3895       AsmNodeOperands[AsmNodeOperands.size() -1] = CurDAG->getTargetConstant(
3896           Flag, dl, MVT::i32);
3897       // Add the new register node and skip the original two GPRs.
3898       AsmNodeOperands.push_back(PairedReg);
3899       // Skip the next two GPRs.
3900       i += 2;
3901     }
3902   }
3903
3904   if (Glue.getNode())
3905     AsmNodeOperands.push_back(Glue);
3906   if (!Changed)
3907     return nullptr;
3908
3909   SDValue New = CurDAG->getNode(ISD::INLINEASM, SDLoc(N),
3910       CurDAG->getVTList(MVT::Other, MVT::Glue), AsmNodeOperands);
3911   New->setNodeId(-1);
3912   return New.getNode();
3913 }
3914
3915
3916 bool ARMDAGToDAGISel::
3917 SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
3918                              std::vector<SDValue> &OutOps) {
3919   switch(ConstraintID) {
3920   default:
3921     llvm_unreachable("Unexpected asm memory constraint");
3922   case InlineAsm::Constraint_i:
3923     // FIXME: It seems strange that 'i' is needed here since it's supposed to
3924     //        be an immediate and not a memory constraint.
3925     // Fallthrough.
3926   case InlineAsm::Constraint_m:
3927   case InlineAsm::Constraint_Q:
3928   case InlineAsm::Constraint_Um:
3929   case InlineAsm::Constraint_Un:
3930   case InlineAsm::Constraint_Uq:
3931   case InlineAsm::Constraint_Us:
3932   case InlineAsm::Constraint_Ut:
3933   case InlineAsm::Constraint_Uv:
3934   case InlineAsm::Constraint_Uy:
3935     // Require the address to be in a register.  That is safe for all ARM
3936     // variants and it is hard to do anything much smarter without knowing
3937     // how the operand is used.
3938     OutOps.push_back(Op);
3939     return false;
3940   }
3941   return true;
3942 }
3943
3944 /// createARMISelDag - This pass converts a legalized DAG into a
3945 /// ARM-specific DAG, ready for instruction scheduling.
3946 ///
3947 FunctionPass *llvm::createARMISelDag(ARMBaseTargetMachine &TM,
3948                                      CodeGenOpt::Level OptLevel) {
3949   return new ARMDAGToDAGISel(TM, OptLevel);
3950 }