lib/Target/ARM/ARMISelDAGToDAG.cpp

   1 //===-- ARMISelDAGToDAG.cpp - A dag to dag inst selector for ARM ----------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file defines an instruction selector for the ARM target.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #include "ARM.h"
  15 #include "ARMBaseInstrInfo.h"
  16 #include "ARMTargetMachine.h"
  17 #include "MCTargetDesc/ARMAddressingModes.h"
  18 #include "llvm/ADT/StringSwitch.h"
  19 #include "llvm/CodeGen/MachineFrameInfo.h"
  20 #include "llvm/CodeGen/MachineFunction.h"
  21 #include "llvm/CodeGen/MachineInstrBuilder.h"
  22 #include "llvm/CodeGen/MachineRegisterInfo.h"
  23 #include "llvm/CodeGen/SelectionDAG.h"
  24 #include "llvm/CodeGen/SelectionDAGISel.h"
  25 #include "llvm/IR/CallingConv.h"
  26 #include "llvm/IR/Constants.h"
  27 #include "llvm/IR/DerivedTypes.h"
  28 #include "llvm/IR/Function.h"
  29 #include "llvm/IR/Intrinsics.h"
  30 #include "llvm/IR/LLVMContext.h"
  31 #include "llvm/Support/CommandLine.h"
  32 #include "llvm/Support/Compiler.h"
  33 #include "llvm/Support/Debug.h"
  34 #include "llvm/Support/ErrorHandling.h"
  35 #include "llvm/Target/TargetLowering.h"
  36 #include "llvm/Target/TargetOptions.h"
  37
  38 using namespace llvm;
  39
  40 #define DEBUG_TYPE "arm-isel"
  41
  42 static cl::opt<bool>
  43 DisableShifterOp("disable-shifter-op", cl::Hidden,
  44   cl::desc("Disable isel of shifter-op"),
  45   cl::init(false));
  46
  47 static cl::opt<bool>
  48 CheckVMLxHazard("check-vmlx-hazard", cl::Hidden,
  49   cl::desc("Check fp vmla / vmls hazard at isel time"),
  50   cl::init(true));
  51
  52 //===--------------------------------------------------------------------===//
  53 /// ARMDAGToDAGISel - ARM specific code to select ARM machine
  54 /// instructions for SelectionDAG operations.
  55 ///
  56 namespace {
  57
  58 enum AddrMode2Type {
  59   AM2_BASE, // Simple AM2 (+-imm12)
  60   AM2_SHOP  // Shifter-op AM2
  61 };
  62
  63 class ARMDAGToDAGISel : public SelectionDAGISel {
  64   /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
  65   /// make the right decision when generating code for different targets.
  66   const ARMSubtarget *Subtarget;
  67
  68 public:
  69   explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm, CodeGenOpt::Level OptLevel)
  70       : SelectionDAGISel(tm, OptLevel) {}
  71
  72   bool runOnMachineFunction(MachineFunction &MF) override {
  73     // Reset the subtarget each time through.
  74     Subtarget = &MF.getSubtarget<ARMSubtarget>();
  75     SelectionDAGISel::runOnMachineFunction(MF);
  76     return true;
  77   }
  78
  79   const char *getPassName() const override {
  80     return "ARM Instruction Selection";
  81   }
  82
  83   void PreprocessISelDAG() override;
  84
  85   /// getI32Imm - Return a target constant of type i32 with the specified
  86   /// value.
  87   inline SDValue getI32Imm(unsigned Imm, SDLoc dl) {
  88     return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
  89   }
  90
  91   SDNode *Select(SDNode *N) override;
  92
  93
  94   bool hasNoVMLxHazardUse(SDNode *N) const;
  95   bool isShifterOpProfitable(const SDValue &Shift,
  96                              ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt);
  97   bool SelectRegShifterOperand(SDValue N, SDValue &A,
  98                                SDValue &B, SDValue &C,
  99                                bool CheckProfitability = true);
 100   bool SelectImmShifterOperand(SDValue N, SDValue &A,
 101                                SDValue &B, bool CheckProfitability = true);
 102   bool SelectShiftRegShifterOperand(SDValue N, SDValue &A,
 103                                     SDValue &B, SDValue &C) {
 104     // Don't apply the profitability check
 105     return SelectRegShifterOperand(N, A, B, C, false);
 106   }
 107   bool SelectShiftImmShifterOperand(SDValue N, SDValue &A,
 108                                     SDValue &B) {
 109     // Don't apply the profitability check
 110     return SelectImmShifterOperand(N, A, B, false);
 111   }
 112
 113   bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
 114   bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc);
 115
 116   AddrMode2Type SelectAddrMode2Worker(SDValue N, SDValue &Base,
 117                                       SDValue &Offset, SDValue &Opc);
 118   bool SelectAddrMode2Base(SDValue N, SDValue &Base, SDValue &Offset,
 119                            SDValue &Opc) {
 120     return SelectAddrMode2Worker(N, Base, Offset, Opc) == AM2_BASE;
 121   }
 122
 123   bool SelectAddrMode2ShOp(SDValue N, SDValue &Base, SDValue &Offset,
 124                            SDValue &Opc) {
 125     return SelectAddrMode2Worker(N, Base, Offset, Opc) == AM2_SHOP;
 126   }
 127
 128   bool SelectAddrMode2(SDValue N, SDValue &Base, SDValue &Offset,
 129                        SDValue &Opc) {
 130     SelectAddrMode2Worker(N, Base, Offset, Opc);
 131 //    return SelectAddrMode2ShOp(N, Base, Offset, Opc);
 132     // This always matches one way or another.
 133     return true;
 134   }
 135
 136   bool SelectCMOVPred(SDValue N, SDValue &Pred, SDValue &Reg) {
 137     const ConstantSDNode *CN = cast<ConstantSDNode>(N);
 138     Pred = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(N), MVT::i32);
 139     Reg = CurDAG->getRegister(ARM::CPSR, MVT::i32);
 140     return true;
 141   }
 142
 143   bool SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
 144                              SDValue &Offset, SDValue &Opc);
 145   bool SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
 146                              SDValue &Offset, SDValue &Opc);
 147   bool SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
 148                              SDValue &Offset, SDValue &Opc);
 149   bool SelectAddrOffsetNone(SDValue N, SDValue &Base);
 150   bool SelectAddrMode3(SDValue N, SDValue &Base,
 151                        SDValue &Offset, SDValue &Opc);
 152   bool SelectAddrMode3Offset(SDNode *Op, SDValue N,
 153                              SDValue &Offset, SDValue &Opc);
 154   bool SelectAddrMode5(SDValue N, SDValue &Base,
 155                        SDValue &Offset);
 156   bool SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,SDValue &Align);
 157   bool SelectAddrMode6Offset(SDNode *Op, SDValue N, SDValue &Offset);
 158
 159   bool SelectAddrModePC(SDValue N, SDValue &Offset, SDValue &Label);
 160
 161   // Thumb Addressing Modes:
 162   bool SelectThumbAddrModeRR(SDValue N, SDValue &Base, SDValue &Offset);
 163   bool SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, SDValue &Base,
 164                                 SDValue &OffImm);
 165   bool SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
 166                                  SDValue &OffImm);
 167   bool SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
 168                                  SDValue &OffImm);
 169   bool SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
 170                                  SDValue &OffImm);
 171   bool SelectThumbAddrModeSP(SDValue N, SDValue &Base, SDValue &OffImm);
 172
 173   // Thumb 2 Addressing Modes:
 174   bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
 175   bool SelectT2AddrModeImm8(SDValue N, SDValue &Base,
 176                             SDValue &OffImm);
 177   bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
 178                                  SDValue &OffImm);
 179   bool SelectT2AddrModeSoReg(SDValue N, SDValue &Base,
 180                              SDValue &OffReg, SDValue &ShImm);
 181   bool SelectT2AddrModeExclusive(SDValue N, SDValue &Base, SDValue &OffImm);
 182
 183   inline bool is_so_imm(unsigned Imm) const {
 184     return ARM_AM::getSOImmVal(Imm) != -1;
 185   }
 186
 187   inline bool is_so_imm_not(unsigned Imm) const {
 188     return ARM_AM::getSOImmVal(~Imm) != -1;
 189   }
 190
 191   inline bool is_t2_so_imm(unsigned Imm) const {
 192     return ARM_AM::getT2SOImmVal(Imm) != -1;
 193   }
 194
 195   inline bool is_t2_so_imm_not(unsigned Imm) const {
 196     return ARM_AM::getT2SOImmVal(~Imm) != -1;
 197   }
 198
 199   // Include the pieces autogenerated from the target description.
 200 #include "ARMGenDAGISel.inc"
 201
 202 private:
 203   /// SelectARMIndexedLoad - Indexed (pre/post inc/dec) load matching code for
 204   /// ARM.
 205   SDNode *SelectARMIndexedLoad(SDNode *N);
 206   SDNode *SelectT2IndexedLoad(SDNode *N);
 207
 208   /// SelectVLD - Select NEON load intrinsics.  NumVecs should be
 209   /// 1, 2, 3 or 4.  The opcode arrays specify the instructions used for
 210   /// loads of D registers and even subregs and odd subregs of Q registers.
 211   /// For NumVecs <= 2, QOpcodes1 is not used.
 212   SDNode *SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
 213                     const uint16_t *DOpcodes,
 214                     const uint16_t *QOpcodes0, const uint16_t *QOpcodes1);
 215
 216   /// SelectVST - Select NEON store intrinsics.  NumVecs should
 217   /// be 1, 2, 3 or 4.  The opcode arrays specify the instructions used for
 218   /// stores of D registers and even subregs and odd subregs of Q registers.
 219   /// For NumVecs <= 2, QOpcodes1 is not used.
 220   SDNode *SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
 221                     const uint16_t *DOpcodes,
 222                     const uint16_t *QOpcodes0, const uint16_t *QOpcodes1);
 223
 224   /// SelectVLDSTLane - Select NEON load/store lane intrinsics.  NumVecs should
 225   /// be 2, 3 or 4.  The opcode arrays specify the instructions used for
 226   /// load/store of D registers and Q registers.
 227   SDNode *SelectVLDSTLane(SDNode *N, bool IsLoad,
 228                           bool isUpdating, unsigned NumVecs,
 229                           const uint16_t *DOpcodes, const uint16_t *QOpcodes);
 230
 231   /// SelectVLDDup - Select NEON load-duplicate intrinsics.  NumVecs
 232   /// should be 2, 3 or 4.  The opcode array specifies the instructions used
 233   /// for loading D registers.  (Q registers are not supported.)
 234   SDNode *SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs,
 235                        const uint16_t *Opcodes);
 236
 237   /// SelectVTBL - Select NEON VTBL and VTBX intrinsics.  NumVecs should be 2,
 238   /// 3 or 4.  These are custom-selected so that a REG_SEQUENCE can be
 239   /// generated to force the table registers to be consecutive.
 240   SDNode *SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs, unsigned Opc);
 241
 242   /// SelectV6T2BitfieldExtractOp - Select SBFX/UBFX instructions for ARM.
 243   SDNode *SelectV6T2BitfieldExtractOp(SDNode *N, bool isSigned);
 244
 245   // Select special operations if node forms integer ABS pattern
 246   SDNode *SelectABSOp(SDNode *N);
 247
 248   SDNode *SelectReadRegister(SDNode *N);
 249   SDNode *SelectWriteRegister(SDNode *N);
 250
 251   SDNode *SelectInlineAsm(SDNode *N);
 252
 253   SDNode *SelectConcatVector(SDNode *N);
 254
 255   /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
 256   /// inline asm expressions.
 257   bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
 258                                     std::vector<SDValue> &OutOps) override;
 259
 260   // Form pairs of consecutive R, S, D, or Q registers.
 261   SDNode *createGPRPairNode(EVT VT, SDValue V0, SDValue V1);
 262   SDNode *createSRegPairNode(EVT VT, SDValue V0, SDValue V1);
 263   SDNode *createDRegPairNode(EVT VT, SDValue V0, SDValue V1);
 264   SDNode *createQRegPairNode(EVT VT, SDValue V0, SDValue V1);
 265
 266   // Form sequences of 4 consecutive S, D, or Q registers.
 267   SDNode *createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
 268   SDNode *createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
 269   SDNode *createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
 270
 271   // Get the alignment operand for a NEON VLD or VST instruction.
 272   SDValue GetVLDSTAlign(SDValue Align, SDLoc dl, unsigned NumVecs,
 273                         bool is64BitVector);
 274
 275   /// Returns the number of instructions required to materialize the given
 276   /// constant in a register, or 3 if a literal pool load is needed.
 277   unsigned ConstantMaterializationCost(unsigned Val) const;
 278
 279   /// Checks if N is a multiplication by a constant where we can extract out a
 280   /// power of two from the constant so that it can be used in a shift, but only
 281   /// if it simplifies the materialization of the constant. Returns true if it
 282   /// is, and assigns to PowerOfTwo the power of two that should be extracted
 283   /// out and to NewMulConst the new constant to be multiplied by.
 284   bool canExtractShiftFromMul(const SDValue &N, unsigned MaxShift,
 285                               unsigned &PowerOfTwo, SDValue &NewMulConst) const;
 286
 287   /// Replace N with M in CurDAG, in a way that also ensures that M gets
 288   /// selected when N would have been selected.
 289   void replaceDAGValue(const SDValue &N, SDValue M);
 290 };
 291 }
 292
 293 /// isInt32Immediate - This method tests to see if the node is a 32-bit constant
 294 /// operand. If so Imm will receive the 32-bit value.
 295 static bool isInt32Immediate(SDNode *N, unsigned &Imm) {
 296   if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) {
 297     Imm = cast<ConstantSDNode>(N)->getZExtValue();
 298     return true;
 299   }
 300   return false;
 301 }
 302
 303 // isInt32Immediate - This method tests to see if a constant operand.
 304 // If so Imm will receive the 32 bit value.
 305 static bool isInt32Immediate(SDValue N, unsigned &Imm) {
 306   return isInt32Immediate(N.getNode(), Imm);
 307 }
 308
 309 // isOpcWithIntImmediate - This method tests to see if the node is a specific
 310 // opcode and that it has a immediate integer right operand.
 311 // If so Imm will receive the 32 bit value.
 312 static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
 313   return N->getOpcode() == Opc &&
 314          isInt32Immediate(N->getOperand(1).getNode(), Imm);
 315 }
 316
 317 /// \brief Check whether a particular node is a constant value representable as
 318 /// (N * Scale) where (N in [\p RangeMin, \p RangeMax).
 319 ///
 320 /// \param ScaledConstant [out] - On success, the pre-scaled constant value.
 321 static bool isScaledConstantInRange(SDValue Node, int Scale,
 322                                     int RangeMin, int RangeMax,
 323                                     int &ScaledConstant) {
 324   assert(Scale > 0 && "Invalid scale!");
 325
 326   // Check that this is a constant.
 327   const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Node);
 328   if (!C)
 329     return false;
 330
 331   ScaledConstant = (int) C->getZExtValue();
 332   if ((ScaledConstant % Scale) != 0)
 333     return false;
 334
 335   ScaledConstant /= Scale;
 336   return ScaledConstant >= RangeMin && ScaledConstant < RangeMax;
 337 }
 338
 339 void ARMDAGToDAGISel::PreprocessISelDAG() {
 340   if (!Subtarget->hasV6T2Ops())
 341     return;
 342
 343   bool isThumb2 = Subtarget->isThumb();
 344   for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
 345        E = CurDAG->allnodes_end(); I != E; ) {
 346     SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues.
 347
 348     if (N->getOpcode() != ISD::ADD)
 349       continue;
 350
 351     // Look for (add X1, (and (srl X2, c1), c2)) where c2 is constant with
 352     // leading zeros, followed by consecutive set bits, followed by 1 or 2
 353     // trailing zeros, e.g. 1020.
 354     // Transform the expression to
 355     // (add X1, (shl (and (srl X2, c1), (c2>>tz)), tz)) where tz is the number
 356     // of trailing zeros of c2. The left shift would be folded as an shifter
 357     // operand of 'add' and the 'and' and 'srl' would become a bits extraction
 358     // node (UBFX).
 359
 360     SDValue N0 = N->getOperand(0);
 361     SDValue N1 = N->getOperand(1);
 362     unsigned And_imm = 0;
 363     if (!isOpcWithIntImmediate(N1.getNode(), ISD::AND, And_imm)) {
 364       if (isOpcWithIntImmediate(N0.getNode(), ISD::AND, And_imm))
 365         std::swap(N0, N1);
 366     }
 367     if (!And_imm)
 368       continue;
 369
 370     // Check if the AND mask is an immediate of the form: 000.....1111111100
 371     unsigned TZ = countTrailingZeros(And_imm);
 372     if (TZ != 1 && TZ != 2)
 373       // Be conservative here. Shifter operands aren't always free. e.g. On
 374       // Swift, left shifter operand of 1 / 2 for free but others are not.
 375       // e.g.
 376       //  ubfx   r3, r1, #16, #8
 377       //  ldr.w  r3, [r0, r3, lsl #2]
 378       // vs.
 379       //  mov.w  r9, #1020
 380       //  and.w  r2, r9, r1, lsr #14
 381       //  ldr    r2, [r0, r2]
 382       continue;
 383     And_imm >>= TZ;
 384     if (And_imm & (And_imm + 1))
 385       continue;
 386
 387     // Look for (and (srl X, c1), c2).
 388     SDValue Srl = N1.getOperand(0);
 389     unsigned Srl_imm = 0;
 390     if (!isOpcWithIntImmediate(Srl.getNode(), ISD::SRL, Srl_imm) ||
 391         (Srl_imm <= 2))
 392       continue;
 393
 394     // Make sure first operand is not a shifter operand which would prevent
 395     // folding of the left shift.
 396     SDValue CPTmp0;
 397     SDValue CPTmp1;
 398     SDValue CPTmp2;
 399     if (isThumb2) {
 400       if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1))
 401         continue;
 402     } else {
 403       if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1) ||
 404           SelectRegShifterOperand(N0, CPTmp0, CPTmp1, CPTmp2))
 405         continue;
 406     }
 407
 408     // Now make the transformation.
 409     Srl = CurDAG->getNode(ISD::SRL, SDLoc(Srl), MVT::i32,
 410                           Srl.getOperand(0),
 411                           CurDAG->getConstant(Srl_imm + TZ, SDLoc(Srl),
 412                                               MVT::i32));
 413     N1 = CurDAG->getNode(ISD::AND, SDLoc(N1), MVT::i32,
 414                          Srl,
 415                          CurDAG->getConstant(And_imm, SDLoc(Srl), MVT::i32));
 416     N1 = CurDAG->getNode(ISD::SHL, SDLoc(N1), MVT::i32,
 417                          N1, CurDAG->getConstant(TZ, SDLoc(Srl), MVT::i32));
 418     CurDAG->UpdateNodeOperands(N, N0, N1);
 419   }
 420 }
 421
 422 /// hasNoVMLxHazardUse - Return true if it's desirable to select a FP MLA / MLS
 423 /// node. VFP / NEON fp VMLA / VMLS instructions have special RAW hazards (at
 424 /// least on current ARM implementations) which should be avoidded.
 425 bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const {
 426   if (OptLevel == CodeGenOpt::None)
 427     return true;
 428
 429   if (!CheckVMLxHazard)
 430     return true;
 431
 432   if (!Subtarget->isCortexA7() && !Subtarget->isCortexA8() &&
 433       !Subtarget->isCortexA9() && !Subtarget->isSwift())
 434     return true;
 435
 436   if (!N->hasOneUse())
 437     return false;
 438
 439   SDNode *Use = *N->use_begin();
 440   if (Use->getOpcode() == ISD::CopyToReg)
 441     return true;
 442   if (Use->isMachineOpcode()) {
 443     const ARMBaseInstrInfo *TII = static_cast<const ARMBaseInstrInfo *>(
 444         CurDAG->getSubtarget().getInstrInfo());
 445
 446     const MCInstrDesc &MCID = TII->get(Use->getMachineOpcode());
 447     if (MCID.mayStore())
 448       return true;
 449     unsigned Opcode = MCID.getOpcode();
 450     if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
 451       return true;
 452     // vmlx feeding into another vmlx. We actually want to unfold
 453     // the use later in the MLxExpansion pass. e.g.
 454     // vmla
 455     // vmla (stall 8 cycles)
 456     //
 457     // vmul (5 cycles)
 458     // vadd (5 cycles)
 459     // vmla
 460     // This adds up to about 18 - 19 cycles.
 461     //
 462     // vmla
 463     // vmul (stall 4 cycles)
 464     // vadd adds up to about 14 cycles.
 465     return TII->isFpMLxInstruction(Opcode);
 466   }
 467
 468   return false;
 469 }
 470
 471 bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift,
 472                                             ARM_AM::ShiftOpc ShOpcVal,
 473                                             unsigned ShAmt) {
 474   if (!Subtarget->isLikeA9() && !Subtarget->isSwift())
 475     return true;
 476   if (Shift.hasOneUse())
 477     return true;
 478   // R << 2 is free.
 479   return ShOpcVal == ARM_AM::lsl &&
 480          (ShAmt == 2 || (Subtarget->isSwift() && ShAmt == 1));
 481 }
 482
 483 unsigned ARMDAGToDAGISel::ConstantMaterializationCost(unsigned Val) const {
 484   if (Subtarget->isThumb()) {
 485     if (Val <= 255) return 1;                               // MOV
 486     if (Subtarget->hasV6T2Ops() && Val <= 0xffff) return 1; // MOVW
 487     if (~Val <= 255) return 2;                              // MOV + MVN
 488     if (ARM_AM::isThumbImmShiftedVal(Val)) return 2;        // MOV + LSL
 489   } else {
 490     if (ARM_AM::getSOImmVal(Val) != -1) return 1;           // MOV
 491     if (ARM_AM::getSOImmVal(~Val) != -1) return 1;          // MVN
 492     if (Subtarget->hasV6T2Ops() && Val <= 0xffff) return 1; // MOVW
 493     if (ARM_AM::isSOImmTwoPartVal(Val)) return 2;           // two instrs
 494   }
 495   if (Subtarget->useMovt(*MF)) return 2; // MOVW + MOVT
 496   return 3; // Literal pool load
 497 }
 498
 499 bool ARMDAGToDAGISel::canExtractShiftFromMul(const SDValue &N,
 500                                              unsigned MaxShift,
 501                                              unsigned &PowerOfTwo,
 502                                              SDValue &NewMulConst) const {
 503   assert(N.getOpcode() == ISD::MUL);
 504   assert(MaxShift > 0);
 505
 506   // If the multiply is used in more than one place then changing the constant
 507   // will make other uses incorrect, so don't.
 508   if (!N.hasOneUse()) return false;
 509   // Check if the multiply is by a constant
 510   ConstantSDNode *MulConst = dyn_cast<ConstantSDNode>(N.getOperand(1));
 511   if (!MulConst) return false;
 512   // If the constant is used in more than one place then modifying it will mean
 513   // we need to materialize two constants instead of one, which is a bad idea.
 514   if (!MulConst->hasOneUse()) return false;
 515   unsigned MulConstVal = MulConst->getZExtValue();
 516   if (MulConstVal == 0) return false;
 517
 518   // Find the largest power of 2 that MulConstVal is a multiple of
 519   PowerOfTwo = MaxShift;
 520   while ((MulConstVal % (1 << PowerOfTwo)) != 0) {
 521     --PowerOfTwo;
 522     if (PowerOfTwo == 0) return false;
 523   }
 524
 525   // Only optimise if the new cost is better
 526   unsigned NewMulConstVal = MulConstVal / (1 << PowerOfTwo);
 527   NewMulConst = CurDAG->getConstant(NewMulConstVal, SDLoc(N), MVT::i32);
 528   unsigned OldCost = ConstantMaterializationCost(MulConstVal);
 529   unsigned NewCost = ConstantMaterializationCost(NewMulConstVal);
 530   return NewCost < OldCost;
 531 }
 532
 533 void ARMDAGToDAGISel::replaceDAGValue(const SDValue &N, SDValue M) {
 534   CurDAG->RepositionNode(N.getNode()->getIterator(), M.getNode());
 535   CurDAG->ReplaceAllUsesWith(N, M);
 536 }
 537
 538 bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N,
 539                                               SDValue &BaseReg,
 540                                               SDValue &Opc,
 541                                               bool CheckProfitability) {
 542   if (DisableShifterOp)
 543     return false;
 544
 545   // If N is a multiply-by-constant and it's profitable to extract a shift and
 546   // use it in a shifted operand do so.
 547   if (N.getOpcode() == ISD::MUL) {
 548     unsigned PowerOfTwo = 0;
 549     SDValue NewMulConst;
 550     if (canExtractShiftFromMul(N, 31, PowerOfTwo, NewMulConst)) {
 551       BaseReg = SDValue(Select(CurDAG->getNode(ISD::MUL, SDLoc(N), MVT::i32,
 552                                                N.getOperand(0), NewMulConst)
 553                                    .getNode()),
 554                         0);
 555       replaceDAGValue(N.getOperand(1), NewMulConst);
 556       Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ARM_AM::lsl,
 557                                                           PowerOfTwo),
 558                                       SDLoc(N), MVT::i32);
 559       return true;
 560     }
 561   }
 562
 563   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
 564
 565   // Don't match base register only case. That is matched to a separate
 566   // lower complexity pattern with explicit register operand.
 567   if (ShOpcVal == ARM_AM::no_shift) return false;
 568
 569   BaseReg = N.getOperand(0);
 570   unsigned ShImmVal = 0;
 571   ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
 572   if (!RHS) return false;
 573   ShImmVal = RHS->getZExtValue() & 31;
 574   Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
 575                                   SDLoc(N), MVT::i32);
 576   return true;
 577 }
 578
 579 bool ARMDAGToDAGISel::SelectRegShifterOperand(SDValue N,
 580                                               SDValue &BaseReg,
 581                                               SDValue &ShReg,
 582                                               SDValue &Opc,
 583                                               bool CheckProfitability) {
 584   if (DisableShifterOp)
 585     return false;
 586
 587   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
 588
 589   // Don't match base register only case. That is matched to a separate
 590   // lower complexity pattern with explicit register operand.
 591   if (ShOpcVal == ARM_AM::no_shift) return false;
 592
 593   BaseReg = N.getOperand(0);
 594   unsigned ShImmVal = 0;
 595   ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
 596   if (RHS) return false;
 597
 598   ShReg = N.getOperand(1);
 599   if (CheckProfitability && !isShifterOpProfitable(N, ShOpcVal, ShImmVal))
 600     return false;
 601   Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
 602                                   SDLoc(N), MVT::i32);
 603   return true;
 604 }
 605
 606
 607 bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,
 608                                           SDValue &Base,
 609                                           SDValue &OffImm) {
 610   // Match simple R + imm12 operands.
 611
 612   // Base only.
 613   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
 614       !CurDAG->isBaseWithConstantOffset(N)) {
 615     if (N.getOpcode() == ISD::FrameIndex) {
 616       // Match frame index.
 617       int FI = cast<FrameIndexSDNode>(N)->getIndex();
 618       Base = CurDAG->getTargetFrameIndex(
 619           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
 620       OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
 621       return true;
 622     }
 623
 624     if (N.getOpcode() == ARMISD::Wrapper &&
 625         N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
 626         N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
 627       Base = N.getOperand(0);
 628     } else
 629       Base = N;
 630     OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
 631     return true;
 632   }
 633
 634   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
 635     int RHSC = (int)RHS->getSExtValue();
 636     if (N.getOpcode() == ISD::SUB)
 637       RHSC = -RHSC;
 638
 639     if (RHSC > -0x1000 && RHSC < 0x1000) { // 12 bits
 640       Base   = N.getOperand(0);
 641       if (Base.getOpcode() == ISD::FrameIndex) {
 642         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
 643         Base = CurDAG->getTargetFrameIndex(
 644             FI, TLI->getPointerTy(CurDAG->getDataLayout()));
 645       }
 646       OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
 647       return true;
 648     }
 649   }
 650
 651   // Base only.
 652   Base = N;
 653   OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
 654   return true;
 655 }
 656
 657
 658
 659 bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset,
 660                                       SDValue &Opc) {
 661   if (N.getOpcode() == ISD::MUL &&
 662       ((!Subtarget->isLikeA9() && !Subtarget->isSwift()) || N.hasOneUse())) {
 663     if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
 664       // X * [3,5,9] -> X + X * [2,4,8] etc.
 665       int RHSC = (int)RHS->getZExtValue();
 666       if (RHSC & 1) {
 667         RHSC = RHSC & ~1;
 668         ARM_AM::AddrOpc AddSub = ARM_AM::add;
 669         if (RHSC < 0) {
 670           AddSub = ARM_AM::sub;
 671           RHSC = - RHSC;
 672         }
 673         if (isPowerOf2_32(RHSC)) {
 674           unsigned ShAmt = Log2_32(RHSC);
 675           Base = Offset = N.getOperand(0);
 676           Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt,
 677                                                             ARM_AM::lsl),
 678                                           SDLoc(N), MVT::i32);
 679           return true;
 680         }
 681       }
 682     }
 683   }
 684
 685   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
 686       // ISD::OR that is equivalent to an ISD::ADD.
 687       !CurDAG->isBaseWithConstantOffset(N))
 688     return false;
 689
 690   // Leave simple R +/- imm12 operands for LDRi12
 691   if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::OR) {
 692     int RHSC;
 693     if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
 694                                 -0x1000+1, 0x1000, RHSC)) // 12 bits.
 695       return false;
 696   }
 697
 698   // Otherwise this is R +/- [possibly shifted] R.
 699   ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::SUB ? ARM_AM::sub:ARM_AM::add;
 700   ARM_AM::ShiftOpc ShOpcVal =
 701     ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode());
 702   unsigned ShAmt = 0;
 703
 704   Base   = N.getOperand(0);
 705   Offset = N.getOperand(1);
 706
 707   if (ShOpcVal != ARM_AM::no_shift) {
 708     // Check to see if the RHS of the shift is a constant, if not, we can't fold
 709     // it.
 710     if (ConstantSDNode *Sh =
 711            dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) {
 712       ShAmt = Sh->getZExtValue();
 713       if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt))
 714         Offset = N.getOperand(1).getOperand(0);
 715       else {
 716         ShAmt = 0;
 717         ShOpcVal = ARM_AM::no_shift;
 718       }
 719     } else {
 720       ShOpcVal = ARM_AM::no_shift;
 721     }
 722   }
 723
 724   // Try matching (R shl C) + (R).
 725   if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift &&
 726       !(Subtarget->isLikeA9() || Subtarget->isSwift() ||
 727         N.getOperand(0).hasOneUse())) {
 728     ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode());
 729     if (ShOpcVal != ARM_AM::no_shift) {
 730       // Check to see if the RHS of the shift is a constant, if not, we can't
 731       // fold it.
 732       if (ConstantSDNode *Sh =
 733           dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) {
 734         ShAmt = Sh->getZExtValue();
 735         if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) {
 736           Offset = N.getOperand(0).getOperand(0);
 737           Base = N.getOperand(1);
 738         } else {
 739           ShAmt = 0;
 740           ShOpcVal = ARM_AM::no_shift;
 741         }
 742       } else {
 743         ShOpcVal = ARM_AM::no_shift;
 744       }
 745     }
 746   }
 747
 748   // If Offset is a multiply-by-constant and it's profitable to extract a shift
 749   // and use it in a shifted operand do so.
 750   if (Offset.getOpcode() == ISD::MUL && N.hasOneUse()) {
 751     unsigned PowerOfTwo = 0;
 752     SDValue NewMulConst;
 753     if (canExtractShiftFromMul(Offset, 31, PowerOfTwo, NewMulConst)) {
 754       replaceDAGValue(Offset.getOperand(1), NewMulConst);
 755       ShAmt = PowerOfTwo;
 756       ShOpcVal = ARM_AM::lsl;
 757     }
 758   }
 759
 760   Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
 761                                   SDLoc(N), MVT::i32);
 762   return true;
 763 }
 764
 765
 766 //-----
 767
 768 AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N,
 769                                                      SDValue &Base,
 770                                                      SDValue &Offset,
 771                                                      SDValue &Opc) {
 772   if (N.getOpcode() == ISD::MUL &&
 773       (!(Subtarget->isLikeA9() || Subtarget->isSwift()) || N.hasOneUse())) {
 774     if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
 775       // X * [3,5,9] -> X + X * [2,4,8] etc.
 776       int RHSC = (int)RHS->getZExtValue();
 777       if (RHSC & 1) {
 778         RHSC = RHSC & ~1;
 779         ARM_AM::AddrOpc AddSub = ARM_AM::add;
 780         if (RHSC < 0) {
 781           AddSub = ARM_AM::sub;
 782           RHSC = - RHSC;
 783         }
 784         if (isPowerOf2_32(RHSC)) {
 785           unsigned ShAmt = Log2_32(RHSC);
 786           Base = Offset = N.getOperand(0);
 787           Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt,
 788                                                             ARM_AM::lsl),
 789                                           SDLoc(N), MVT::i32);
 790           return AM2_SHOP;
 791         }
 792       }
 793     }
 794   }
 795
 796   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
 797       // ISD::OR that is equivalent to an ADD.
 798       !CurDAG->isBaseWithConstantOffset(N)) {
 799     Base = N;
 800     if (N.getOpcode() == ISD::FrameIndex) {
 801       int FI = cast<FrameIndexSDNode>(N)->getIndex();
 802       Base = CurDAG->getTargetFrameIndex(
 803           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
 804     } else if (N.getOpcode() == ARMISD::Wrapper &&
 805                N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
 806                N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
 807       Base = N.getOperand(0);
 808     }
 809     Offset = CurDAG->getRegister(0, MVT::i32);
 810     Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(ARM_AM::add, 0,
 811                                                       ARM_AM::no_shift),
 812                                     SDLoc(N), MVT::i32);
 813     return AM2_BASE;
 814   }
 815
 816   // Match simple R +/- imm12 operands.
 817   if (N.getOpcode() != ISD::SUB) {
 818     int RHSC;
 819     if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
 820                                 -0x1000+1, 0x1000, RHSC)) { // 12 bits.
 821       Base = N.getOperand(0);
 822       if (Base.getOpcode() == ISD::FrameIndex) {
 823         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
 824         Base = CurDAG->getTargetFrameIndex(
 825             FI, TLI->getPointerTy(CurDAG->getDataLayout()));
 826       }
 827       Offset = CurDAG->getRegister(0, MVT::i32);
 828
 829       ARM_AM::AddrOpc AddSub = ARM_AM::add;
 830       if (RHSC < 0) {
 831         AddSub = ARM_AM::sub;
 832         RHSC = - RHSC;
 833       }
 834       Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, RHSC,
 835                                                         ARM_AM::no_shift),
 836                                       SDLoc(N), MVT::i32);
 837       return AM2_BASE;
 838     }
 839   }
 840
 841   if ((Subtarget->isLikeA9() || Subtarget->isSwift()) && !N.hasOneUse()) {
 842     // Compute R +/- (R << N) and reuse it.
 843     Base = N;
 844     Offset = CurDAG->getRegister(0, MVT::i32);
 845     Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(ARM_AM::add, 0,
 846                                                       ARM_AM::no_shift),
 847                                     SDLoc(N), MVT::i32);
 848     return AM2_BASE;
 849   }
 850
 851   // Otherwise this is R +/- [possibly shifted] R.
 852   ARM_AM::AddrOpc AddSub = N.getOpcode() != ISD::SUB ? ARM_AM::add:ARM_AM::sub;
 853   ARM_AM::ShiftOpc ShOpcVal =
 854     ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode());
 855   unsigned ShAmt = 0;
 856
 857   Base   = N.getOperand(0);
 858   Offset = N.getOperand(1);
 859
 860   if (ShOpcVal != ARM_AM::no_shift) {
 861     // Check to see if the RHS of the shift is a constant, if not, we can't fold
 862     // it.
 863     if (ConstantSDNode *Sh =
 864            dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) {
 865       ShAmt = Sh->getZExtValue();
 866       if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt))
 867         Offset = N.getOperand(1).getOperand(0);
 868       else {
 869         ShAmt = 0;
 870         ShOpcVal = ARM_AM::no_shift;
 871       }
 872     } else {
 873       ShOpcVal = ARM_AM::no_shift;
 874     }
 875   }
 876
 877   // Try matching (R shl C) + (R).
 878   if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift &&
 879       !(Subtarget->isLikeA9() || Subtarget->isSwift() ||
 880         N.getOperand(0).hasOneUse())) {
 881     ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode());
 882     if (ShOpcVal != ARM_AM::no_shift) {
 883       // Check to see if the RHS of the shift is a constant, if not, we can't
 884       // fold it.
 885       if (ConstantSDNode *Sh =
 886           dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) {
 887         ShAmt = Sh->getZExtValue();
 888         if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) {
 889           Offset = N.getOperand(0).getOperand(0);
 890           Base = N.getOperand(1);
 891         } else {
 892           ShAmt = 0;
 893           ShOpcVal = ARM_AM::no_shift;
 894         }
 895       } else {
 896         ShOpcVal = ARM_AM::no_shift;
 897       }
 898     }
 899   }
 900
 901   Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
 902                                   SDLoc(N), MVT::i32);
 903   return AM2_SHOP;
 904 }
 905
 906 bool ARMDAGToDAGISel::SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
 907                                             SDValue &Offset, SDValue &Opc) {
 908   unsigned Opcode = Op->getOpcode();
 909   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
 910     ? cast<LoadSDNode>(Op)->getAddressingMode()
 911     : cast<StoreSDNode>(Op)->getAddressingMode();
 912   ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
 913     ? ARM_AM::add : ARM_AM::sub;
 914   int Val;
 915   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val))
 916     return false;
 917
 918   Offset = N;
 919   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
 920   unsigned ShAmt = 0;
 921   if (ShOpcVal != ARM_AM::no_shift) {
 922     // Check to see if the RHS of the shift is a constant, if not, we can't fold
 923     // it.
 924     if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
 925       ShAmt = Sh->getZExtValue();
 926       if (isShifterOpProfitable(N, ShOpcVal, ShAmt))
 927         Offset = N.getOperand(0);
 928       else {
 929         ShAmt = 0;
 930         ShOpcVal = ARM_AM::no_shift;
 931       }
 932     } else {
 933       ShOpcVal = ARM_AM::no_shift;
 934     }
 935   }
 936
 937   Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
 938                                   SDLoc(N), MVT::i32);
 939   return true;
 940 }
 941
 942 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
 943                                             SDValue &Offset, SDValue &Opc) {
 944   unsigned Opcode = Op->getOpcode();
 945   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
 946     ? cast<LoadSDNode>(Op)->getAddressingMode()
 947     : cast<StoreSDNode>(Op)->getAddressingMode();
 948   ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
 949     ? ARM_AM::add : ARM_AM::sub;
 950   int Val;
 951   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
 952     if (AddSub == ARM_AM::sub) Val *= -1;
 953     Offset = CurDAG->getRegister(0, MVT::i32);
 954     Opc = CurDAG->getTargetConstant(Val, SDLoc(Op), MVT::i32);
 955     return true;
 956   }
 957
 958   return false;
 959 }
 960
 961
 962 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
 963                                             SDValue &Offset, SDValue &Opc) {
 964   unsigned Opcode = Op->getOpcode();
 965   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
 966     ? cast<LoadSDNode>(Op)->getAddressingMode()
 967     : cast<StoreSDNode>(Op)->getAddressingMode();
 968   ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
 969     ? ARM_AM::add : ARM_AM::sub;
 970   int Val;
 971   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
 972     Offset = CurDAG->getRegister(0, MVT::i32);
 973     Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, Val,
 974                                                       ARM_AM::no_shift),
 975                                     SDLoc(Op), MVT::i32);
 976     return true;
 977   }
 978
 979   return false;
 980 }
 981
 982 bool ARMDAGToDAGISel::SelectAddrOffsetNone(SDValue N, SDValue &Base) {
 983   Base = N;
 984   return true;
 985 }
 986
 987 bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N,
 988                                       SDValue &Base, SDValue &Offset,
 989                                       SDValue &Opc) {
 990   if (N.getOpcode() == ISD::SUB) {
 991     // X - C  is canonicalize to X + -C, no need to handle it here.
 992     Base = N.getOperand(0);
 993     Offset = N.getOperand(1);
 994     Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::sub, 0), SDLoc(N),
 995                                     MVT::i32);
 996     return true;
 997   }
 998
 999   if (!CurDAG->isBaseWithConstantOffset(N)) {
1000     Base = N;
1001     if (N.getOpcode() == ISD::FrameIndex) {
1002       int FI = cast<FrameIndexSDNode>(N)->getIndex();
1003       Base = CurDAG->getTargetFrameIndex(
1004           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1005     }
1006     Offset = CurDAG->getRegister(0, MVT::i32);
1007     Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),
1008                                     MVT::i32);
1009     return true;
1010   }
1011
1012   // If the RHS is +/- imm8, fold into addr mode.
1013   int RHSC;
1014   if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
1015                               -256 + 1, 256, RHSC)) { // 8 bits.
1016     Base = N.getOperand(0);
1017     if (Base.getOpcode() == ISD::FrameIndex) {
1018       int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1019       Base = CurDAG->getTargetFrameIndex(
1020           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1021     }
1022     Offset = CurDAG->getRegister(0, MVT::i32);
1023
1024     ARM_AM::AddrOpc AddSub = ARM_AM::add;
1025     if (RHSC < 0) {
1026       AddSub = ARM_AM::sub;
1027       RHSC = -RHSC;
1028     }
1029     Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, RHSC), SDLoc(N),
1030                                     MVT::i32);
1031     return true;
1032   }
1033
1034   Base = N.getOperand(0);
1035   Offset = N.getOperand(1);
1036   Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),
1037                                   MVT::i32);
1038   return true;
1039 }
1040
1041 bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode *Op, SDValue N,
1042                                             SDValue &Offset, SDValue &Opc) {
1043   unsigned Opcode = Op->getOpcode();
1044   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
1045     ? cast<LoadSDNode>(Op)->getAddressingMode()
1046     : cast<StoreSDNode>(Op)->getAddressingMode();
1047   ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
1048     ? ARM_AM::add : ARM_AM::sub;
1049   int Val;
1050   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 256, Val)) { // 12 bits.
1051     Offset = CurDAG->getRegister(0, MVT::i32);
1052     Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, Val), SDLoc(Op),
1053                                     MVT::i32);
1054     return true;
1055   }
1056
1057   Offset = N;
1058   Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, 0), SDLoc(Op),
1059                                   MVT::i32);
1060   return true;
1061 }
1062
1063 bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N,
1064                                       SDValue &Base, SDValue &Offset) {
1065   if (!CurDAG->isBaseWithConstantOffset(N)) {
1066     Base = N;
1067     if (N.getOpcode() == ISD::FrameIndex) {
1068       int FI = cast<FrameIndexSDNode>(N)->getIndex();
1069       Base = CurDAG->getTargetFrameIndex(
1070           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1071     } else if (N.getOpcode() == ARMISD::Wrapper &&
1072                N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
1073                N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
1074       Base = N.getOperand(0);
1075     }
1076     Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
1077                                        SDLoc(N), MVT::i32);
1078     return true;
1079   }
1080
1081   // If the RHS is +/- imm8, fold into addr mode.
1082   int RHSC;
1083   if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4,
1084                               -256 + 1, 256, RHSC)) {
1085     Base = N.getOperand(0);
1086     if (Base.getOpcode() == ISD::FrameIndex) {
1087       int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1088       Base = CurDAG->getTargetFrameIndex(
1089           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1090     }
1091
1092     ARM_AM::AddrOpc AddSub = ARM_AM::add;
1093     if (RHSC < 0) {
1094       AddSub = ARM_AM::sub;
1095       RHSC = -RHSC;
1096     }
1097     Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(AddSub, RHSC),
1098                                        SDLoc(N), MVT::i32);
1099     return true;
1100   }
1101
1102   Base = N;
1103   Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
1104                                      SDLoc(N), MVT::i32);
1105   return true;
1106 }
1107
1108 bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,
1109                                       SDValue &Align) {
1110   Addr = N;
1111
1112   unsigned Alignment = 0;
1113
1114   MemSDNode *MemN = cast<MemSDNode>(Parent);
1115
1116   if (isa<LSBaseSDNode>(MemN) ||
1117       ((MemN->getOpcode() == ARMISD::VST1_UPD ||
1118         MemN->getOpcode() == ARMISD::VLD1_UPD) &&
1119        MemN->getConstantOperandVal(MemN->getNumOperands() - 1) == 1)) {
1120     // This case occurs only for VLD1-lane/dup and VST1-lane instructions.
1121     // The maximum alignment is equal to the memory size being referenced.
1122     unsigned MMOAlign = MemN->getAlignment();
1123     unsigned MemSize = MemN->getMemoryVT().getSizeInBits() / 8;
1124     if (MMOAlign >= MemSize && MemSize > 1)
1125       Alignment = MemSize;
1126   } else {
1127     // All other uses of addrmode6 are for intrinsics.  For now just record
1128     // the raw alignment value; it will be refined later based on the legal
1129     // alignment operands for the intrinsic.
1130     Alignment = MemN->getAlignment();
1131   }
1132
1133   Align = CurDAG->getTargetConstant(Alignment, SDLoc(N), MVT::i32);
1134   return true;
1135 }
1136
1137 bool ARMDAGToDAGISel::SelectAddrMode6Offset(SDNode *Op, SDValue N,
1138                                             SDValue &Offset) {
1139   LSBaseSDNode *LdSt = cast<LSBaseSDNode>(Op);
1140   ISD::MemIndexedMode AM = LdSt->getAddressingMode();
1141   if (AM != ISD::POST_INC)
1142     return false;
1143   Offset = N;
1144   if (ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N)) {
1145     if (NC->getZExtValue() * 8 == LdSt->getMemoryVT().getSizeInBits())
1146       Offset = CurDAG->getRegister(0, MVT::i32);
1147   }
1148   return true;
1149 }
1150
1151 bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N,
1152                                        SDValue &Offset, SDValue &Label) {
1153   if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) {
1154     Offset = N.getOperand(0);
1155     SDValue N1 = N.getOperand(1);
1156     Label = CurDAG->getTargetConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
1157                                       SDLoc(N), MVT::i32);
1158     return true;
1159   }
1160
1161   return false;
1162 }
1163
1164
1165 //===----------------------------------------------------------------------===//
1166 //                         Thumb Addressing Modes
1167 //===----------------------------------------------------------------------===//
1168
1169 bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue N,
1170                                             SDValue &Base, SDValue &Offset){
1171   if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) {
1172     ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N);
1173     if (!NC || !NC->isNullValue())
1174       return false;
1175
1176     Base = Offset = N;
1177     return true;
1178   }
1179
1180   Base = N.getOperand(0);
1181   Offset = N.getOperand(1);
1182   return true;
1183 }
1184
1185 bool
1186 ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale,
1187                                           SDValue &Base, SDValue &OffImm) {
1188   if (!CurDAG->isBaseWithConstantOffset(N)) {
1189     if (N.getOpcode() == ISD::ADD) {
1190       return false; // We want to select register offset instead
1191     } else if (N.getOpcode() == ARMISD::Wrapper &&
1192         N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
1193         N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
1194       Base = N.getOperand(0);
1195     } else {
1196       Base = N;
1197     }
1198
1199     OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1200     return true;
1201   }
1202
1203   // If the RHS is + imm5 * scale, fold into addr mode.
1204   int RHSC;
1205   if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC)) {
1206     Base = N.getOperand(0);
1207     OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1208     return true;
1209   }
1210
1211   // Offset is too large, so use register offset instead.
1212   return false;
1213 }
1214
1215 bool
1216 ARMDAGToDAGISel::SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
1217                                            SDValue &OffImm) {
1218   return SelectThumbAddrModeImm5S(N, 4, Base, OffImm);
1219 }
1220
1221 bool
1222 ARMDAGToDAGISel::SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
1223                                            SDValue &OffImm) {
1224   return SelectThumbAddrModeImm5S(N, 2, Base, OffImm);
1225 }
1226
1227 bool
1228 ARMDAGToDAGISel::SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
1229                                            SDValue &OffImm) {
1230   return SelectThumbAddrModeImm5S(N, 1, Base, OffImm);
1231 }
1232
1233 bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N,
1234                                             SDValue &Base, SDValue &OffImm) {
1235   if (N.getOpcode() == ISD::FrameIndex) {
1236     int FI = cast<FrameIndexSDNode>(N)->getIndex();
1237     // Only multiples of 4 are allowed for the offset, so the frame object
1238     // alignment must be at least 4.
1239     MachineFrameInfo *MFI = MF->getFrameInfo();
1240     if (MFI->getObjectAlignment(FI) < 4)
1241       MFI->setObjectAlignment(FI, 4);
1242     Base = CurDAG->getTargetFrameIndex(
1243         FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1244     OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1245     return true;
1246   }
1247
1248   if (!CurDAG->isBaseWithConstantOffset(N))
1249     return false;
1250
1251   RegisterSDNode *LHSR = dyn_cast<RegisterSDNode>(N.getOperand(0));
1252   if (N.getOperand(0).getOpcode() == ISD::FrameIndex ||
1253       (LHSR && LHSR->getReg() == ARM::SP)) {
1254     // If the RHS is + imm8 * scale, fold into addr mode.
1255     int RHSC;
1256     if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4, 0, 256, RHSC)) {
1257       Base = N.getOperand(0);
1258       if (Base.getOpcode() == ISD::FrameIndex) {
1259         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1260         // For LHS+RHS to result in an offset that's a multiple of 4 the object
1261         // indexed by the LHS must be 4-byte aligned.
1262         MachineFrameInfo *MFI = MF->getFrameInfo();
1263         if (MFI->getObjectAlignment(FI) < 4)
1264           MFI->setObjectAlignment(FI, 4);
1265         Base = CurDAG->getTargetFrameIndex(
1266             FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1267       }
1268       OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1269       return true;
1270     }
1271   }
1272
1273   return false;
1274 }
1275
1276
1277 //===----------------------------------------------------------------------===//
1278 //                        Thumb 2 Addressing Modes
1279 //===----------------------------------------------------------------------===//
1280
1281
1282 bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N,
1283                                             SDValue &Base, SDValue &OffImm) {
1284   // Match simple R + imm12 operands.
1285
1286   // Base only.
1287   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1288       !CurDAG->isBaseWithConstantOffset(N)) {
1289     if (N.getOpcode() == ISD::FrameIndex) {
1290       // Match frame index.
1291       int FI = cast<FrameIndexSDNode>(N)->getIndex();
1292       Base = CurDAG->getTargetFrameIndex(
1293           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1294       OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1295       return true;
1296     }
1297
1298     if (N.getOpcode() == ARMISD::Wrapper &&
1299         N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
1300         N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
1301       Base = N.getOperand(0);
1302       if (Base.getOpcode() == ISD::TargetConstantPool)
1303         return false;  // We want to select t2LDRpci instead.
1304     } else
1305       Base = N;
1306     OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1307     return true;
1308   }
1309
1310   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1311     if (SelectT2AddrModeImm8(N, Base, OffImm))
1312       // Let t2LDRi8 handle (R - imm8).
1313       return false;
1314
1315     int RHSC = (int)RHS->getZExtValue();
1316     if (N.getOpcode() == ISD::SUB)
1317       RHSC = -RHSC;
1318
1319     if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits (unsigned)
1320       Base   = N.getOperand(0);
1321       if (Base.getOpcode() == ISD::FrameIndex) {
1322         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1323         Base = CurDAG->getTargetFrameIndex(
1324             FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1325       }
1326       OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1327       return true;
1328     }
1329   }
1330
1331   // Base only.
1332   Base = N;
1333   OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1334   return true;
1335 }
1336
1337 bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N,
1338                                            SDValue &Base, SDValue &OffImm) {
1339   // Match simple R - imm8 operands.
1340   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1341       !CurDAG->isBaseWithConstantOffset(N))
1342     return false;
1343
1344   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1345     int RHSC = (int)RHS->getSExtValue();
1346     if (N.getOpcode() == ISD::SUB)
1347       RHSC = -RHSC;
1348
1349     if ((RHSC >= -255) && (RHSC < 0)) { // 8 bits (always negative)
1350       Base = N.getOperand(0);
1351       if (Base.getOpcode() == ISD::FrameIndex) {
1352         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1353         Base = CurDAG->getTargetFrameIndex(
1354             FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1355       }
1356       OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1357       return true;
1358     }
1359   }
1360
1361   return false;
1362 }
1363
1364 bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
1365                                                  SDValue &OffImm){
1366   unsigned Opcode = Op->getOpcode();
1367   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
1368     ? cast<LoadSDNode>(Op)->getAddressingMode()
1369     : cast<StoreSDNode>(Op)->getAddressingMode();
1370   int RHSC;
1371   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x100, RHSC)) { // 8 bits.
1372     OffImm = ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC))
1373       ? CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32)
1374       : CurDAG->getTargetConstant(-RHSC, SDLoc(N), MVT::i32);
1375     return true;
1376   }
1377
1378   return false;
1379 }
1380
1381 bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N,
1382                                             SDValue &Base,
1383                                             SDValue &OffReg, SDValue &ShImm) {
1384   // (R - imm8) should be handled by t2LDRi8. The rest are handled by t2LDRi12.
1385   if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N))
1386     return false;
1387
1388   // Leave (R + imm12) for t2LDRi12, (R - imm8) for t2LDRi8.
1389   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1390     int RHSC = (int)RHS->getZExtValue();
1391     if (RHSC >= 0 && RHSC < 0x1000) // 12 bits (unsigned)
1392       return false;
1393     else if (RHSC < 0 && RHSC >= -255) // 8 bits
1394       return false;
1395   }
1396
1397   // Look for (R + R) or (R + (R << [1,2,3])).
1398   unsigned ShAmt = 0;
1399   Base   = N.getOperand(0);
1400   OffReg = N.getOperand(1);
1401
1402   // Swap if it is ((R << c) + R).
1403   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(OffReg.getOpcode());
1404   if (ShOpcVal != ARM_AM::lsl) {
1405     ShOpcVal = ARM_AM::getShiftOpcForNode(Base.getOpcode());
1406     if (ShOpcVal == ARM_AM::lsl)
1407       std::swap(Base, OffReg);
1408   }
1409
1410   if (ShOpcVal == ARM_AM::lsl) {
1411     // Check to see if the RHS of the shift is a constant, if not, we can't fold
1412     // it.
1413     if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(OffReg.getOperand(1))) {
1414       ShAmt = Sh->getZExtValue();
1415       if (ShAmt < 4 && isShifterOpProfitable(OffReg, ShOpcVal, ShAmt))
1416         OffReg = OffReg.getOperand(0);
1417       else {
1418         ShAmt = 0;
1419       }
1420     }
1421   }
1422
1423   // If OffReg is a multiply-by-constant and it's profitable to extract a shift
1424   // and use it in a shifted operand do so.
1425   if (OffReg.getOpcode() == ISD::MUL && N.hasOneUse()) {
1426     unsigned PowerOfTwo = 0;
1427     SDValue NewMulConst;
1428     if (canExtractShiftFromMul(OffReg, 3, PowerOfTwo, NewMulConst)) {
1429       replaceDAGValue(OffReg.getOperand(1), NewMulConst);
1430       ShAmt = PowerOfTwo;
1431     }
1432   }
1433
1434   ShImm = CurDAG->getTargetConstant(ShAmt, SDLoc(N), MVT::i32);
1435
1436   return true;
1437 }
1438
1439 bool ARMDAGToDAGISel::SelectT2AddrModeExclusive(SDValue N, SDValue &Base,
1440                                                 SDValue &OffImm) {
1441   // This *must* succeed since it's used for the irreplaceable ldrex and strex
1442   // instructions.
1443   Base = N;
1444   OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1445
1446   if (N.getOpcode() != ISD::ADD || !CurDAG->isBaseWithConstantOffset(N))
1447     return true;
1448
1449   ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
1450   if (!RHS)
1451     return true;
1452
1453   uint32_t RHSC = (int)RHS->getZExtValue();
1454   if (RHSC > 1020 || RHSC % 4 != 0)
1455     return true;
1456
1457   Base = N.getOperand(0);
1458   if (Base.getOpcode() == ISD::FrameIndex) {
1459     int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1460     Base = CurDAG->getTargetFrameIndex(
1461         FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1462   }
1463
1464   OffImm = CurDAG->getTargetConstant(RHSC/4, SDLoc(N), MVT::i32);
1465   return true;
1466 }
1467
1468 //===--------------------------------------------------------------------===//
1469
1470 /// getAL - Returns a ARMCC::AL immediate node.
1471 static inline SDValue getAL(SelectionDAG *CurDAG, SDLoc dl) {
1472   return CurDAG->getTargetConstant((uint64_t)ARMCC::AL, dl, MVT::i32);
1473 }
1474
1475 SDNode *ARMDAGToDAGISel::SelectARMIndexedLoad(SDNode *N) {
1476   LoadSDNode *LD = cast<LoadSDNode>(N);
1477   ISD::MemIndexedMode AM = LD->getAddressingMode();
1478   if (AM == ISD::UNINDEXED)
1479     return nullptr;
1480
1481   EVT LoadedVT = LD->getMemoryVT();
1482   SDValue Offset, AMOpc;
1483   bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1484   unsigned Opcode = 0;
1485   bool Match = false;
1486   if (LoadedVT == MVT::i32 && isPre &&
1487       SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
1488     Opcode = ARM::LDR_PRE_IMM;
1489     Match = true;
1490   } else if (LoadedVT == MVT::i32 && !isPre &&
1491       SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
1492     Opcode = ARM::LDR_POST_IMM;
1493     Match = true;
1494   } else if (LoadedVT == MVT::i32 &&
1495       SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
1496     Opcode = isPre ? ARM::LDR_PRE_REG : ARM::LDR_POST_REG;
1497     Match = true;
1498
1499   } else if (LoadedVT == MVT::i16 &&
1500              SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
1501     Match = true;
1502     Opcode = (LD->getExtensionType() == ISD::SEXTLOAD)
1503       ? (isPre ? ARM::LDRSH_PRE : ARM::LDRSH_POST)
1504       : (isPre ? ARM::LDRH_PRE : ARM::LDRH_POST);
1505   } else if (LoadedVT == MVT::i8 || LoadedVT == MVT::i1) {
1506     if (LD->getExtensionType() == ISD::SEXTLOAD) {
1507       if (SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
1508         Match = true;
1509         Opcode = isPre ? ARM::LDRSB_PRE : ARM::LDRSB_POST;
1510       }
1511     } else {
1512       if (isPre &&
1513           SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
1514         Match = true;
1515         Opcode = ARM::LDRB_PRE_IMM;
1516       } else if (!isPre &&
1517                   SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
1518         Match = true;
1519         Opcode = ARM::LDRB_POST_IMM;
1520       } else if (SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
1521         Match = true;
1522         Opcode = isPre ? ARM::LDRB_PRE_REG : ARM::LDRB_POST_REG;
1523       }
1524     }
1525   }
1526
1527   if (Match) {
1528     if (Opcode == ARM::LDR_PRE_IMM || Opcode == ARM::LDRB_PRE_IMM) {
1529       SDValue Chain = LD->getChain();
1530       SDValue Base = LD->getBasePtr();
1531       SDValue Ops[]= { Base, AMOpc, getAL(CurDAG, SDLoc(N)),
1532                        CurDAG->getRegister(0, MVT::i32), Chain };
1533       return CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32,
1534                                     MVT::i32, MVT::Other, Ops);
1535     } else {
1536       SDValue Chain = LD->getChain();
1537       SDValue Base = LD->getBasePtr();
1538       SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG, SDLoc(N)),
1539                        CurDAG->getRegister(0, MVT::i32), Chain };
1540       return CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32,
1541                                     MVT::i32, MVT::Other, Ops);
1542     }
1543   }
1544
1545   return nullptr;
1546 }
1547
1548 SDNode *ARMDAGToDAGISel::SelectT2IndexedLoad(SDNode *N) {
1549   LoadSDNode *LD = cast<LoadSDNode>(N);
1550   ISD::MemIndexedMode AM = LD->getAddressingMode();
1551   if (AM == ISD::UNINDEXED)
1552     return nullptr;
1553
1554   EVT LoadedVT = LD->getMemoryVT();
1555   bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
1556   SDValue Offset;
1557   bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1558   unsigned Opcode = 0;
1559   bool Match = false;
1560   if (SelectT2AddrModeImm8Offset(N, LD->getOffset(), Offset)) {
1561     switch (LoadedVT.getSimpleVT().SimpleTy) {
1562     case MVT::i32:
1563       Opcode = isPre ? ARM::t2LDR_PRE : ARM::t2LDR_POST;
1564       break;
1565     case MVT::i16:
1566       if (isSExtLd)
1567         Opcode = isPre ? ARM::t2LDRSH_PRE : ARM::t2LDRSH_POST;
1568       else
1569         Opcode = isPre ? ARM::t2LDRH_PRE : ARM::t2LDRH_POST;
1570       break;
1571     case MVT::i8:
1572     case MVT::i1:
1573       if (isSExtLd)
1574         Opcode = isPre ? ARM::t2LDRSB_PRE : ARM::t2LDRSB_POST;
1575       else
1576         Opcode = isPre ? ARM::t2LDRB_PRE : ARM::t2LDRB_POST;
1577       break;
1578     default:
1579       return nullptr;
1580     }
1581     Match = true;
1582   }
1583
1584   if (Match) {
1585     SDValue Chain = LD->getChain();
1586     SDValue Base = LD->getBasePtr();
1587     SDValue Ops[]= { Base, Offset, getAL(CurDAG, SDLoc(N)),
1588                      CurDAG->getRegister(0, MVT::i32), Chain };
1589     return CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1590                                   MVT::Other, Ops);
1591   }
1592
1593   return nullptr;
1594 }
1595
1596 /// \brief Form a GPRPair pseudo register from a pair of GPR regs.
1597 SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) {
1598   SDLoc dl(V0.getNode());
1599   SDValue RegClass =
1600     CurDAG->getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32);
1601   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);
1602   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32);
1603   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1604   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1605 }
1606
1607 /// \brief Form a D register from a pair of S registers.
1608 SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1609   SDLoc dl(V0.getNode());
1610   SDValue RegClass =
1611     CurDAG->getTargetConstant(ARM::DPR_VFP2RegClassID, dl, MVT::i32);
1612   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32);
1613   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32);
1614   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1615   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1616 }
1617
1618 /// \brief Form a quad register from a pair of D registers.
1619 SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1620   SDLoc dl(V0.getNode());
1621   SDValue RegClass = CurDAG->getTargetConstant(ARM::QPRRegClassID, dl,
1622                                                MVT::i32);
1623   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32);
1624   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32);
1625   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1626   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1627 }
1628
1629 /// \brief Form 4 consecutive D registers from a pair of Q registers.
1630 SDNode *ARMDAGToDAGISel::createQRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1631   SDLoc dl(V0.getNode());
1632   SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl,
1633                                                MVT::i32);
1634   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32);
1635   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32);
1636   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1637   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1638 }
1639
1640 /// \brief Form 4 consecutive S registers.
1641 SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1,
1642                                    SDValue V2, SDValue V3) {
1643   SDLoc dl(V0.getNode());
1644   SDValue RegClass =
1645     CurDAG->getTargetConstant(ARM::QPR_VFP2RegClassID, dl, MVT::i32);
1646   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32);
1647   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32);
1648   SDValue SubReg2 = CurDAG->getTargetConstant(ARM::ssub_2, dl, MVT::i32);
1649   SDValue SubReg3 = CurDAG->getTargetConstant(ARM::ssub_3, dl, MVT::i32);
1650   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1651                                     V2, SubReg2, V3, SubReg3 };
1652   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1653 }
1654
1655 /// \brief Form 4 consecutive D registers.
1656 SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1,
1657                                    SDValue V2, SDValue V3) {
1658   SDLoc dl(V0.getNode());
1659   SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl,
1660                                                MVT::i32);
1661   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32);
1662   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32);
1663   SDValue SubReg2 = CurDAG->getTargetConstant(ARM::dsub_2, dl, MVT::i32);
1664   SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, dl, MVT::i32);
1665   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1666                                     V2, SubReg2, V3, SubReg3 };
1667   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1668 }
1669
1670 /// \brief Form 4 consecutive Q registers.
1671 SDNode *ARMDAGToDAGISel::createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1,
1672                                    SDValue V2, SDValue V3) {
1673   SDLoc dl(V0.getNode());
1674   SDValue RegClass = CurDAG->getTargetConstant(ARM::QQQQPRRegClassID, dl,
1675                                                MVT::i32);
1676   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32);
1677   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32);
1678   SDValue SubReg2 = CurDAG->getTargetConstant(ARM::qsub_2, dl, MVT::i32);
1679   SDValue SubReg3 = CurDAG->getTargetConstant(ARM::qsub_3, dl, MVT::i32);
1680   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1681                                     V2, SubReg2, V3, SubReg3 };
1682   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1683 }
1684
1685 /// GetVLDSTAlign - Get the alignment (in bytes) for the alignment operand
1686 /// of a NEON VLD or VST instruction.  The supported values depend on the
1687 /// number of registers being loaded.
1688 SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, SDLoc dl,
1689                                        unsigned NumVecs, bool is64BitVector) {
1690   unsigned NumRegs = NumVecs;
1691   if (!is64BitVector && NumVecs < 3)
1692     NumRegs *= 2;
1693
1694   unsigned Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
1695   if (Alignment >= 32 && NumRegs == 4)
1696     Alignment = 32;
1697   else if (Alignment >= 16 && (NumRegs == 2 || NumRegs == 4))
1698     Alignment = 16;
1699   else if (Alignment >= 8)
1700     Alignment = 8;
1701   else
1702     Alignment = 0;
1703
1704   return CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
1705 }
1706
1707 static bool isVLDfixed(unsigned Opc)
1708 {
1709   switch (Opc) {
1710   default: return false;
1711   case ARM::VLD1d8wb_fixed : return true;
1712   case ARM::VLD1d16wb_fixed : return true;
1713   case ARM::VLD1d64Qwb_fixed : return true;
1714   case ARM::VLD1d32wb_fixed : return true;
1715   case ARM::VLD1d64wb_fixed : return true;
1716   case ARM::VLD1d64TPseudoWB_fixed : return true;
1717   case ARM::VLD1d64QPseudoWB_fixed : return true;
1718   case ARM::VLD1q8wb_fixed : return true;
1719   case ARM::VLD1q16wb_fixed : return true;
1720   case ARM::VLD1q32wb_fixed : return true;
1721   case ARM::VLD1q64wb_fixed : return true;
1722   case ARM::VLD2d8wb_fixed : return true;
1723   case ARM::VLD2d16wb_fixed : return true;
1724   case ARM::VLD2d32wb_fixed : return true;
1725   case ARM::VLD2q8PseudoWB_fixed : return true;
1726   case ARM::VLD2q16PseudoWB_fixed : return true;
1727   case ARM::VLD2q32PseudoWB_fixed : return true;
1728   case ARM::VLD2DUPd8wb_fixed : return true;
1729   case ARM::VLD2DUPd16wb_fixed : return true;
1730   case ARM::VLD2DUPd32wb_fixed : return true;
1731   }
1732 }
1733
1734 static bool isVSTfixed(unsigned Opc)
1735 {
1736   switch (Opc) {
1737   default: return false;
1738   case ARM::VST1d8wb_fixed : return true;
1739   case ARM::VST1d16wb_fixed : return true;
1740   case ARM::VST1d32wb_fixed : return true;
1741   case ARM::VST1d64wb_fixed : return true;
1742   case ARM::VST1q8wb_fixed : return true;
1743   case ARM::VST1q16wb_fixed : return true;
1744   case ARM::VST1q32wb_fixed : return true;
1745   case ARM::VST1q64wb_fixed : return true;
1746   case ARM::VST1d64TPseudoWB_fixed : return true;
1747   case ARM::VST1d64QPseudoWB_fixed : return true;
1748   case ARM::VST2d8wb_fixed : return true;
1749   case ARM::VST2d16wb_fixed : return true;
1750   case ARM::VST2d32wb_fixed : return true;
1751   case ARM::VST2q8PseudoWB_fixed : return true;
1752   case ARM::VST2q16PseudoWB_fixed : return true;
1753   case ARM::VST2q32PseudoWB_fixed : return true;
1754   }
1755 }
1756
1757 // Get the register stride update opcode of a VLD/VST instruction that
1758 // is otherwise equivalent to the given fixed stride updating instruction.
1759 static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) {
1760   assert((isVLDfixed(Opc) || isVSTfixed(Opc))
1761     && "Incorrect fixed stride updating instruction.");
1762   switch (Opc) {
1763   default: break;
1764   case ARM::VLD1d8wb_fixed: return ARM::VLD1d8wb_register;
1765   case ARM::VLD1d16wb_fixed: return ARM::VLD1d16wb_register;
1766   case ARM::VLD1d32wb_fixed: return ARM::VLD1d32wb_register;
1767   case ARM::VLD1d64wb_fixed: return ARM::VLD1d64wb_register;
1768   case ARM::VLD1q8wb_fixed: return ARM::VLD1q8wb_register;
1769   case ARM::VLD1q16wb_fixed: return ARM::VLD1q16wb_register;
1770   case ARM::VLD1q32wb_fixed: return ARM::VLD1q32wb_register;
1771   case ARM::VLD1q64wb_fixed: return ARM::VLD1q64wb_register;
1772   case ARM::VLD1d64Twb_fixed: return ARM::VLD1d64Twb_register;
1773   case ARM::VLD1d64Qwb_fixed: return ARM::VLD1d64Qwb_register;
1774   case ARM::VLD1d64TPseudoWB_fixed: return ARM::VLD1d64TPseudoWB_register;
1775   case ARM::VLD1d64QPseudoWB_fixed: return ARM::VLD1d64QPseudoWB_register;
1776
1777   case ARM::VST1d8wb_fixed: return ARM::VST1d8wb_register;
1778   case ARM::VST1d16wb_fixed: return ARM::VST1d16wb_register;
1779   case ARM::VST1d32wb_fixed: return ARM::VST1d32wb_register;
1780   case ARM::VST1d64wb_fixed: return ARM::VST1d64wb_register;
1781   case ARM::VST1q8wb_fixed: return ARM::VST1q8wb_register;
1782   case ARM::VST1q16wb_fixed: return ARM::VST1q16wb_register;
1783   case ARM::VST1q32wb_fixed: return ARM::VST1q32wb_register;
1784   case ARM::VST1q64wb_fixed: return ARM::VST1q64wb_register;
1785   case ARM::VST1d64TPseudoWB_fixed: return ARM::VST1d64TPseudoWB_register;
1786   case ARM::VST1d64QPseudoWB_fixed: return ARM::VST1d64QPseudoWB_register;
1787
1788   case ARM::VLD2d8wb_fixed: return ARM::VLD2d8wb_register;
1789   case ARM::VLD2d16wb_fixed: return ARM::VLD2d16wb_register;
1790   case ARM::VLD2d32wb_fixed: return ARM::VLD2d32wb_register;
1791   case ARM::VLD2q8PseudoWB_fixed: return ARM::VLD2q8PseudoWB_register;
1792   case ARM::VLD2q16PseudoWB_fixed: return ARM::VLD2q16PseudoWB_register;
1793   case ARM::VLD2q32PseudoWB_fixed: return ARM::VLD2q32PseudoWB_register;
1794
1795   case ARM::VST2d8wb_fixed: return ARM::VST2d8wb_register;
1796   case ARM::VST2d16wb_fixed: return ARM::VST2d16wb_register;
1797   case ARM::VST2d32wb_fixed: return ARM::VST2d32wb_register;
1798   case ARM::VST2q8PseudoWB_fixed: return ARM::VST2q8PseudoWB_register;
1799   case ARM::VST2q16PseudoWB_fixed: return ARM::VST2q16PseudoWB_register;
1800   case ARM::VST2q32PseudoWB_fixed: return ARM::VST2q32PseudoWB_register;
1801
1802   case ARM::VLD2DUPd8wb_fixed: return ARM::VLD2DUPd8wb_register;
1803   case ARM::VLD2DUPd16wb_fixed: return ARM::VLD2DUPd16wb_register;
1804   case ARM::VLD2DUPd32wb_fixed: return ARM::VLD2DUPd32wb_register;
1805   }
1806   return Opc; // If not one we handle, return it unchanged.
1807 }
1808
1809 SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
1810                                    const uint16_t *DOpcodes,
1811                                    const uint16_t *QOpcodes0,
1812                                    const uint16_t *QOpcodes1) {
1813   assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range");
1814   SDLoc dl(N);
1815
1816   SDValue MemAddr, Align;
1817   unsigned AddrOpIdx = isUpdating ? 1 : 2;
1818   if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
1819     return nullptr;
1820
1821   SDValue Chain = N->getOperand(0);
1822   EVT VT = N->getValueType(0);
1823   bool is64BitVector = VT.is64BitVector();
1824   Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
1825
1826   unsigned OpcodeIndex;
1827   switch (VT.getSimpleVT().SimpleTy) {
1828   default: llvm_unreachable("unhandled vld type");
1829     // Double-register operations:
1830   case MVT::v8i8:  OpcodeIndex = 0; break;
1831   case MVT::v4i16: OpcodeIndex = 1; break;
1832   case MVT::v2f32:
1833   case MVT::v2i32: OpcodeIndex = 2; break;
1834   case MVT::v1i64: OpcodeIndex = 3; break;
1835     // Quad-register operations:
1836   case MVT::v16i8: OpcodeIndex = 0; break;
1837   case MVT::v8i16: OpcodeIndex = 1; break;
1838   case MVT::v4f32:
1839   case MVT::v4i32: OpcodeIndex = 2; break;
1840   case MVT::v2f64:
1841   case MVT::v2i64: OpcodeIndex = 3;
1842     assert(NumVecs == 1 && "v2i64 type only supported for VLD1");
1843     break;
1844   }
1845
1846   EVT ResTy;
1847   if (NumVecs == 1)
1848     ResTy = VT;
1849   else {
1850     unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
1851     if (!is64BitVector)
1852       ResTyElts *= 2;
1853     ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
1854   }
1855   std::vector<EVT> ResTys;
1856   ResTys.push_back(ResTy);
1857   if (isUpdating)
1858     ResTys.push_back(MVT::i32);
1859   ResTys.push_back(MVT::Other);
1860
1861   SDValue Pred = getAL(CurDAG, dl);
1862   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
1863   SDNode *VLd;
1864   SmallVector<SDValue, 7> Ops;
1865
1866   // Double registers and VLD1/VLD2 quad registers are directly supported.
1867   if (is64BitVector || NumVecs <= 2) {
1868     unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
1869                     QOpcodes0[OpcodeIndex]);
1870     Ops.push_back(MemAddr);
1871     Ops.push_back(Align);
1872     if (isUpdating) {
1873       SDValue Inc = N->getOperand(AddrOpIdx + 1);
1874       // FIXME: VLD1/VLD2 fixed increment doesn't need Reg0. Remove the reg0
1875       // case entirely when the rest are updated to that form, too.
1876       if ((NumVecs <= 2) && !isa<ConstantSDNode>(Inc.getNode()))
1877         Opc = getVLDSTRegisterUpdateOpcode(Opc);
1878       // FIXME: We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so
1879       // check for that explicitly too. Horribly hacky, but temporary.
1880       if ((NumVecs > 2 && !isVLDfixed(Opc)) ||
1881           !isa<ConstantSDNode>(Inc.getNode()))
1882         Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc);
1883     }
1884     Ops.push_back(Pred);
1885     Ops.push_back(Reg0);
1886     Ops.push_back(Chain);
1887     VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1888
1889   } else {
1890     // Otherwise, quad registers are loaded with two separate instructions,
1891     // where one loads the even registers and the other loads the odd registers.
1892     EVT AddrTy = MemAddr.getValueType();
1893
1894     // Load the even subregs.  This is always an updating load, so that it
1895     // provides the address to the second load for the odd subregs.
1896     SDValue ImplDef =
1897       SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
1898     const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain };
1899     SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
1900                                           ResTy, AddrTy, MVT::Other, OpsA);
1901     Chain = SDValue(VLdA, 2);
1902
1903     // Load the odd subregs.
1904     Ops.push_back(SDValue(VLdA, 1));
1905     Ops.push_back(Align);
1906     if (isUpdating) {
1907       SDValue Inc = N->getOperand(AddrOpIdx + 1);
1908       assert(isa<ConstantSDNode>(Inc.getNode()) &&
1909              "only constant post-increment update allowed for VLD3/4");
1910       (void)Inc;
1911       Ops.push_back(Reg0);
1912     }
1913     Ops.push_back(SDValue(VLdA, 0));
1914     Ops.push_back(Pred);
1915     Ops.push_back(Reg0);
1916     Ops.push_back(Chain);
1917     VLd = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, Ops);
1918   }
1919
1920   // Transfer memoperands.
1921   MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
1922   MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1923   cast<MachineSDNode>(VLd)->setMemRefs(MemOp, MemOp + 1);
1924
1925   if (NumVecs == 1)
1926     return VLd;
1927
1928   // Extract out the subregisters.
1929   SDValue SuperReg = SDValue(VLd, 0);
1930   assert(ARM::dsub_7 == ARM::dsub_0+7 &&
1931          ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering");
1932   unsigned Sub0 = (is64BitVector ? ARM::dsub_0 : ARM::qsub_0);
1933   for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
1934     ReplaceUses(SDValue(N, Vec),
1935                 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
1936   ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1));
1937   if (isUpdating)
1938     ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLd, 2));
1939   return nullptr;
1940 }
1941
1942 SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
1943                                    const uint16_t *DOpcodes,
1944                                    const uint16_t *QOpcodes0,
1945                                    const uint16_t *QOpcodes1) {
1946   assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range");
1947   SDLoc dl(N);
1948
1949   SDValue MemAddr, Align;
1950   unsigned AddrOpIdx = isUpdating ? 1 : 2;
1951   unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
1952   if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
1953     return nullptr;
1954
1955   MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
1956   MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1957
1958   SDValue Chain = N->getOperand(0);
1959   EVT VT = N->getOperand(Vec0Idx).getValueType();
1960   bool is64BitVector = VT.is64BitVector();
1961   Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
1962
1963   unsigned OpcodeIndex;
1964   switch (VT.getSimpleVT().SimpleTy) {
1965   default: llvm_unreachable("unhandled vst type");
1966     // Double-register operations:
1967   case MVT::v8i8:  OpcodeIndex = 0; break;
1968   case MVT::v4i16: OpcodeIndex = 1; break;
1969   case MVT::v2f32:
1970   case MVT::v2i32: OpcodeIndex = 2; break;
1971   case MVT::v1i64: OpcodeIndex = 3; break;
1972     // Quad-register operations:
1973   case MVT::v16i8: OpcodeIndex = 0; break;
1974   case MVT::v8i16: OpcodeIndex = 1; break;
1975   case MVT::v4f32:
1976   case MVT::v4i32: OpcodeIndex = 2; break;
1977   case MVT::v2f64:
1978   case MVT::v2i64: OpcodeIndex = 3;
1979     assert(NumVecs == 1 && "v2i64 type only supported for VST1");
1980     break;
1981   }
1982
1983   std::vector<EVT> ResTys;
1984   if (isUpdating)
1985     ResTys.push_back(MVT::i32);
1986   ResTys.push_back(MVT::Other);
1987
1988   SDValue Pred = getAL(CurDAG, dl);
1989   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
1990   SmallVector<SDValue, 7> Ops;
1991
1992   // Double registers and VST1/VST2 quad registers are directly supported.
1993   if (is64BitVector || NumVecs <= 2) {
1994     SDValue SrcReg;
1995     if (NumVecs == 1) {
1996       SrcReg = N->getOperand(Vec0Idx);
1997     } else if (is64BitVector) {
1998       // Form a REG_SEQUENCE to force register allocation.
1999       SDValue V0 = N->getOperand(Vec0Idx + 0);
2000       SDValue V1 = N->getOperand(Vec0Idx + 1);
2001       if (NumVecs == 2)
2002         SrcReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
2003       else {
2004         SDValue V2 = N->getOperand(Vec0Idx + 2);
2005         // If it's a vst3, form a quad D-register and leave the last part as
2006         // an undef.
2007         SDValue V3 = (NumVecs == 3)
2008           ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
2009           : N->getOperand(Vec0Idx + 3);
2010         SrcReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
2011       }
2012     } else {
2013       // Form a QQ register.
2014       SDValue Q0 = N->getOperand(Vec0Idx);
2015       SDValue Q1 = N->getOperand(Vec0Idx + 1);
2016       SrcReg = SDValue(createQRegPairNode(MVT::v4i64, Q0, Q1), 0);
2017     }
2018
2019     unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2020                     QOpcodes0[OpcodeIndex]);
2021     Ops.push_back(MemAddr);
2022     Ops.push_back(Align);
2023     if (isUpdating) {
2024       SDValue Inc = N->getOperand(AddrOpIdx + 1);
2025       // FIXME: VST1/VST2 fixed increment doesn't need Reg0. Remove the reg0
2026       // case entirely when the rest are updated to that form, too.
2027       if (NumVecs <= 2 && !isa<ConstantSDNode>(Inc.getNode()))
2028         Opc = getVLDSTRegisterUpdateOpcode(Opc);
2029       // FIXME: We use a VST1 for v1i64 even if the pseudo says vld2/3/4, so
2030       // check for that explicitly too. Horribly hacky, but temporary.
2031       if  (!isa<ConstantSDNode>(Inc.getNode()))
2032         Ops.push_back(Inc);
2033       else if (NumVecs > 2 && !isVSTfixed(Opc))
2034         Ops.push_back(Reg0);
2035     }
2036     Ops.push_back(SrcReg);
2037     Ops.push_back(Pred);
2038     Ops.push_back(Reg0);
2039     Ops.push_back(Chain);
2040     SDNode *VSt = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2041
2042     // Transfer memoperands.
2043     cast<MachineSDNode>(VSt)->setMemRefs(MemOp, MemOp + 1);
2044
2045     return VSt;
2046   }
2047
2048   // Otherwise, quad registers are stored with two separate instructions,
2049   // where one stores the even registers and the other stores the odd registers.
2050
2051   // Form the QQQQ REG_SEQUENCE.
2052   SDValue V0 = N->getOperand(Vec0Idx + 0);
2053   SDValue V1 = N->getOperand(Vec0Idx + 1);
2054   SDValue V2 = N->getOperand(Vec0Idx + 2);
2055   SDValue V3 = (NumVecs == 3)
2056     ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2057     : N->getOperand(Vec0Idx + 3);
2058   SDValue RegSeq = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
2059
2060   // Store the even D registers.  This is always an updating store, so that it
2061   // provides the address to the second store for the odd subregs.
2062   const SDValue OpsA[] = { MemAddr, Align, Reg0, RegSeq, Pred, Reg0, Chain };
2063   SDNode *VStA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
2064                                         MemAddr.getValueType(),
2065                                         MVT::Other, OpsA);
2066   cast<MachineSDNode>(VStA)->setMemRefs(MemOp, MemOp + 1);
2067   Chain = SDValue(VStA, 1);
2068
2069   // Store the odd D registers.
2070   Ops.push_back(SDValue(VStA, 0));
2071   Ops.push_back(Align);
2072   if (isUpdating) {
2073     SDValue Inc = N->getOperand(AddrOpIdx + 1);
2074     assert(isa<ConstantSDNode>(Inc.getNode()) &&
2075            "only constant post-increment update allowed for VST3/4");
2076     (void)Inc;
2077     Ops.push_back(Reg0);
2078   }
2079   Ops.push_back(RegSeq);
2080   Ops.push_back(Pred);
2081   Ops.push_back(Reg0);
2082   Ops.push_back(Chain);
2083   SDNode *VStB = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys,
2084                                         Ops);
2085   cast<MachineSDNode>(VStB)->setMemRefs(MemOp, MemOp + 1);
2086   return VStB;
2087 }
2088
2089 SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad,
2090                                          bool isUpdating, unsigned NumVecs,
2091                                          const uint16_t *DOpcodes,
2092                                          const uint16_t *QOpcodes) {
2093   assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range");
2094   SDLoc dl(N);
2095
2096   SDValue MemAddr, Align;
2097   unsigned AddrOpIdx = isUpdating ? 1 : 2;
2098   unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
2099   if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2100     return nullptr;
2101
2102   MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
2103   MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2104
2105   SDValue Chain = N->getOperand(0);
2106   unsigned Lane =
2107     cast<ConstantSDNode>(N->getOperand(Vec0Idx + NumVecs))->getZExtValue();
2108   EVT VT = N->getOperand(Vec0Idx).getValueType();
2109   bool is64BitVector = VT.is64BitVector();
2110
2111   unsigned Alignment = 0;
2112   if (NumVecs != 3) {
2113     Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
2114     unsigned NumBytes = NumVecs * VT.getVectorElementType().getSizeInBits()/8;
2115     if (Alignment > NumBytes)
2116       Alignment = NumBytes;
2117     if (Alignment < 8 && Alignment < NumBytes)
2118       Alignment = 0;
2119     // Alignment must be a power of two; make sure of that.
2120     Alignment = (Alignment & -Alignment);
2121     if (Alignment == 1)
2122       Alignment = 0;
2123   }
2124   Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
2125
2126   unsigned OpcodeIndex;
2127   switch (VT.getSimpleVT().SimpleTy) {
2128   default: llvm_unreachable("unhandled vld/vst lane type");
2129     // Double-register operations:
2130   case MVT::v8i8:  OpcodeIndex = 0; break;
2131   case MVT::v4i16: OpcodeIndex = 1; break;
2132   case MVT::v2f32:
2133   case MVT::v2i32: OpcodeIndex = 2; break;
2134     // Quad-register operations:
2135   case MVT::v8i16: OpcodeIndex = 0; break;
2136   case MVT::v4f32:
2137   case MVT::v4i32: OpcodeIndex = 1; break;
2138   }
2139
2140   std::vector<EVT> ResTys;
2141   if (IsLoad) {
2142     unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2143     if (!is64BitVector)
2144       ResTyElts *= 2;
2145     ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(),
2146                                       MVT::i64, ResTyElts));
2147   }
2148   if (isUpdating)
2149     ResTys.push_back(MVT::i32);
2150   ResTys.push_back(MVT::Other);
2151
2152   SDValue Pred = getAL(CurDAG, dl);
2153   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2154
2155   SmallVector<SDValue, 8> Ops;
2156   Ops.push_back(MemAddr);
2157   Ops.push_back(Align);
2158   if (isUpdating) {
2159     SDValue Inc = N->getOperand(AddrOpIdx + 1);
2160     Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc);
2161   }
2162
2163   SDValue SuperReg;
2164   SDValue V0 = N->getOperand(Vec0Idx + 0);
2165   SDValue V1 = N->getOperand(Vec0Idx + 1);
2166   if (NumVecs == 2) {
2167     if (is64BitVector)
2168       SuperReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
2169     else
2170       SuperReg = SDValue(createQRegPairNode(MVT::v4i64, V0, V1), 0);
2171   } else {
2172     SDValue V2 = N->getOperand(Vec0Idx + 2);
2173     SDValue V3 = (NumVecs == 3)
2174       ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2175       : N->getOperand(Vec0Idx + 3);
2176     if (is64BitVector)
2177       SuperReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
2178     else
2179       SuperReg = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
2180   }
2181   Ops.push_back(SuperReg);
2182   Ops.push_back(getI32Imm(Lane, dl));
2183   Ops.push_back(Pred);
2184   Ops.push_back(Reg0);
2185   Ops.push_back(Chain);
2186
2187   unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2188                                   QOpcodes[OpcodeIndex]);
2189   SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2190   cast<MachineSDNode>(VLdLn)->setMemRefs(MemOp, MemOp + 1);
2191   if (!IsLoad)
2192     return VLdLn;
2193
2194   // Extract the subregisters.
2195   SuperReg = SDValue(VLdLn, 0);
2196   assert(ARM::dsub_7 == ARM::dsub_0+7 &&
2197          ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering");
2198   unsigned Sub0 = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
2199   for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
2200     ReplaceUses(SDValue(N, Vec),
2201                 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
2202   ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, 1));
2203   if (isUpdating)
2204     ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdLn, 2));
2205   return nullptr;
2206 }
2207
2208 SDNode *ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating,
2209                                       unsigned NumVecs,
2210                                       const uint16_t *Opcodes) {
2211   assert(NumVecs >=2 && NumVecs <= 4 && "VLDDup NumVecs out-of-range");
2212   SDLoc dl(N);
2213
2214   SDValue MemAddr, Align;
2215   if (!SelectAddrMode6(N, N->getOperand(1), MemAddr, Align))
2216     return nullptr;
2217
2218   MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
2219   MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2220
2221   SDValue Chain = N->getOperand(0);
2222   EVT VT = N->getValueType(0);
2223
2224   unsigned Alignment = 0;
2225   if (NumVecs != 3) {
2226     Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
2227     unsigned NumBytes = NumVecs * VT.getVectorElementType().getSizeInBits()/8;
2228     if (Alignment > NumBytes)
2229       Alignment = NumBytes;
2230     if (Alignment < 8 && Alignment < NumBytes)
2231       Alignment = 0;
2232     // Alignment must be a power of two; make sure of that.
2233     Alignment = (Alignment & -Alignment);
2234     if (Alignment == 1)
2235       Alignment = 0;
2236   }
2237   Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
2238
2239   unsigned OpcodeIndex;
2240   switch (VT.getSimpleVT().SimpleTy) {
2241   default: llvm_unreachable("unhandled vld-dup type");
2242   case MVT::v8i8:  OpcodeIndex = 0; break;
2243   case MVT::v4i16: OpcodeIndex = 1; break;
2244   case MVT::v2f32:
2245   case MVT::v2i32: OpcodeIndex = 2; break;
2246   }
2247
2248   SDValue Pred = getAL(CurDAG, dl);
2249   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2250   SDValue SuperReg;
2251   unsigned Opc = Opcodes[OpcodeIndex];
2252   SmallVector<SDValue, 6> Ops;
2253   Ops.push_back(MemAddr);
2254   Ops.push_back(Align);
2255   if (isUpdating) {
2256     // fixed-stride update instructions don't have an explicit writeback
2257     // operand. It's implicit in the opcode itself.
2258     SDValue Inc = N->getOperand(2);
2259     if (!isa<ConstantSDNode>(Inc.getNode()))
2260       Ops.push_back(Inc);
2261     // FIXME: VLD3 and VLD4 haven't been updated to that form yet.
2262     else if (NumVecs > 2)
2263       Ops.push_back(Reg0);
2264   }
2265   Ops.push_back(Pred);
2266   Ops.push_back(Reg0);
2267   Ops.push_back(Chain);
2268
2269   unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2270   std::vector<EVT> ResTys;
2271   ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(), MVT::i64,ResTyElts));
2272   if (isUpdating)
2273     ResTys.push_back(MVT::i32);
2274   ResTys.push_back(MVT::Other);
2275   SDNode *VLdDup = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2276   cast<MachineSDNode>(VLdDup)->setMemRefs(MemOp, MemOp + 1);
2277   SuperReg = SDValue(VLdDup, 0);
2278
2279   // Extract the subregisters.
2280   assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
2281   unsigned SubIdx = ARM::dsub_0;
2282   for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
2283     ReplaceUses(SDValue(N, Vec),
2284                 CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, SuperReg));
2285   ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1));
2286   if (isUpdating)
2287     ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdDup, 2));
2288   return nullptr;
2289 }
2290
2291 SDNode *ARMDAGToDAGISel::SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs,
2292                                     unsigned Opc) {
2293   assert(NumVecs >= 2 && NumVecs <= 4 && "VTBL NumVecs out-of-range");
2294   SDLoc dl(N);
2295   EVT VT = N->getValueType(0);
2296   unsigned FirstTblReg = IsExt ? 2 : 1;
2297
2298   // Form a REG_SEQUENCE to force register allocation.
2299   SDValue RegSeq;
2300   SDValue V0 = N->getOperand(FirstTblReg + 0);
2301   SDValue V1 = N->getOperand(FirstTblReg + 1);
2302   if (NumVecs == 2)
2303     RegSeq = SDValue(createDRegPairNode(MVT::v16i8, V0, V1), 0);
2304   else {
2305     SDValue V2 = N->getOperand(FirstTblReg + 2);
2306     // If it's a vtbl3, form a quad D-register and leave the last part as
2307     // an undef.
2308     SDValue V3 = (NumVecs == 3)
2309       ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2310       : N->getOperand(FirstTblReg + 3);
2311     RegSeq = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
2312   }
2313
2314   SmallVector<SDValue, 6> Ops;
2315   if (IsExt)
2316     Ops.push_back(N->getOperand(1));
2317   Ops.push_back(RegSeq);
2318   Ops.push_back(N->getOperand(FirstTblReg + NumVecs));
2319   Ops.push_back(getAL(CurDAG, dl)); // predicate
2320   Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // predicate register
2321   return CurDAG->getMachineNode(Opc, dl, VT, Ops);
2322 }
2323
2324 SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDNode *N,
2325                                                      bool isSigned) {
2326   if (!Subtarget->hasV6T2Ops())
2327     return nullptr;
2328
2329   unsigned Opc = isSigned
2330     ? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX)
2331     : (Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX);
2332   SDLoc dl(N);
2333
2334   // For unsigned extracts, check for a shift right and mask
2335   unsigned And_imm = 0;
2336   if (N->getOpcode() == ISD::AND) {
2337     if (isOpcWithIntImmediate(N, ISD::AND, And_imm)) {
2338
2339       // The immediate is a mask of the low bits iff imm & (imm+1) == 0
2340       if (And_imm & (And_imm + 1))
2341         return nullptr;
2342
2343       unsigned Srl_imm = 0;
2344       if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL,
2345                                 Srl_imm)) {
2346         assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
2347
2348         // Note: The width operand is encoded as width-1.
2349         unsigned Width = countTrailingOnes(And_imm) - 1;
2350         unsigned LSB = Srl_imm;
2351
2352         SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2353
2354         if ((LSB + Width + 1) == N->getValueType(0).getSizeInBits()) {
2355           // It's cheaper to use a right shift to extract the top bits.
2356           if (Subtarget->isThumb()) {
2357             Opc = isSigned ? ARM::t2ASRri : ARM::t2LSRri;
2358             SDValue Ops[] = { N->getOperand(0).getOperand(0),
2359                               CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2360                               getAL(CurDAG, dl), Reg0, Reg0 };
2361             return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2362           }
2363
2364           // ARM models shift instructions as MOVsi with shifter operand.
2365           ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(ISD::SRL);
2366           SDValue ShOpc =
2367             CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, LSB), dl,
2368                                       MVT::i32);
2369           SDValue Ops[] = { N->getOperand(0).getOperand(0), ShOpc,
2370                             getAL(CurDAG, dl), Reg0, Reg0 };
2371           return CurDAG->SelectNodeTo(N, ARM::MOVsi, MVT::i32, Ops);
2372         }
2373
2374         SDValue Ops[] = { N->getOperand(0).getOperand(0),
2375                           CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2376                           CurDAG->getTargetConstant(Width, dl, MVT::i32),
2377                           getAL(CurDAG, dl), Reg0 };
2378         return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2379       }
2380     }
2381     return nullptr;
2382   }
2383
2384   // Otherwise, we're looking for a shift of a shift
2385   unsigned Shl_imm = 0;
2386   if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) {
2387     assert(Shl_imm > 0 && Shl_imm < 32 && "bad amount in shift node!");
2388     unsigned Srl_imm = 0;
2389     if (isInt32Immediate(N->getOperand(1), Srl_imm)) {
2390       assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
2391       // Note: The width operand is encoded as width-1.
2392       unsigned Width = 32 - Srl_imm - 1;
2393       int LSB = Srl_imm - Shl_imm;
2394       if (LSB < 0)
2395         return nullptr;
2396       SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2397       SDValue Ops[] = { N->getOperand(0).getOperand(0),
2398                         CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2399                         CurDAG->getTargetConstant(Width, dl, MVT::i32),
2400                         getAL(CurDAG, dl), Reg0 };
2401       return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2402     }
2403   }
2404
2405   if (N->getOpcode() == ISD::SIGN_EXTEND_INREG) {
2406     unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
2407     unsigned LSB = 0;
2408     if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL, LSB) &&
2409         !isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRA, LSB))
2410       return nullptr;
2411
2412     if (LSB + Width > 32)
2413       return nullptr;
2414
2415     SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2416     SDValue Ops[] = { N->getOperand(0).getOperand(0),
2417                       CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2418                       CurDAG->getTargetConstant(Width - 1, dl, MVT::i32),
2419                       getAL(CurDAG, dl), Reg0 };
2420     return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2421   }
2422
2423   return nullptr;
2424 }
2425
2426 /// Target-specific DAG combining for ISD::XOR.
2427 /// Target-independent combining lowers SELECT_CC nodes of the form
2428 /// select_cc setg[ge] X,  0,  X, -X
2429 /// select_cc setgt    X, -1,  X, -X
2430 /// select_cc setl[te] X,  0, -X,  X
2431 /// select_cc setlt    X,  1, -X,  X
2432 /// which represent Integer ABS into:
2433 /// Y = sra (X, size(X)-1); xor (add (X, Y), Y)
2434 /// ARM instruction selection detects the latter and matches it to
2435 /// ARM::ABS or ARM::t2ABS machine node.
2436 SDNode *ARMDAGToDAGISel::SelectABSOp(SDNode *N){
2437   SDValue XORSrc0 = N->getOperand(0);
2438   SDValue XORSrc1 = N->getOperand(1);
2439   EVT VT = N->getValueType(0);
2440
2441   if (Subtarget->isThumb1Only())
2442     return nullptr;
2443
2444   if (XORSrc0.getOpcode() != ISD::ADD || XORSrc1.getOpcode() != ISD::SRA)
2445     return nullptr;
2446
2447   SDValue ADDSrc0 = XORSrc0.getOperand(0);
2448   SDValue ADDSrc1 = XORSrc0.getOperand(1);
2449   SDValue SRASrc0 = XORSrc1.getOperand(0);
2450   SDValue SRASrc1 = XORSrc1.getOperand(1);
2451   ConstantSDNode *SRAConstant =  dyn_cast<ConstantSDNode>(SRASrc1);
2452   EVT XType = SRASrc0.getValueType();
2453   unsigned Size = XType.getSizeInBits() - 1;
2454
2455   if (ADDSrc1 == XORSrc1 && ADDSrc0 == SRASrc0 &&
2456       XType.isInteger() && SRAConstant != nullptr &&
2457       Size == SRAConstant->getZExtValue()) {
2458     unsigned Opcode = Subtarget->isThumb2() ? ARM::t2ABS : ARM::ABS;
2459     return CurDAG->SelectNodeTo(N, Opcode, VT, ADDSrc0);
2460   }
2461
2462   return nullptr;
2463 }
2464
2465 SDNode *ARMDAGToDAGISel::SelectConcatVector(SDNode *N) {
2466   // The only time a CONCAT_VECTORS operation can have legal types is when
2467   // two 64-bit vectors are concatenated to a 128-bit vector.
2468   EVT VT = N->getValueType(0);
2469   if (!VT.is128BitVector() || N->getNumOperands() != 2)
2470     llvm_unreachable("unexpected CONCAT_VECTORS");
2471   return createDRegPairNode(VT, N->getOperand(0), N->getOperand(1));
2472 }
2473
2474 SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
2475   SDLoc dl(N);
2476
2477   if (N->isMachineOpcode()) {
2478     N->setNodeId(-1);
2479     return nullptr;   // Already selected.
2480   }
2481
2482   switch (N->getOpcode()) {
2483   default: break;
2484   case ISD::WRITE_REGISTER: {
2485     SDNode *ResNode = SelectWriteRegister(N);
2486     if (ResNode)
2487       return ResNode;
2488     break;
2489   }
2490   case ISD::READ_REGISTER: {
2491     SDNode *ResNode = SelectReadRegister(N);
2492     if (ResNode)
2493       return ResNode;
2494     break;
2495   }
2496   case ISD::INLINEASM: {
2497     SDNode *ResNode = SelectInlineAsm(N);
2498     if (ResNode)
2499       return ResNode;
2500     break;
2501   }
2502   case ISD::XOR: {
2503     // Select special operations if XOR node forms integer ABS pattern
2504     SDNode *ResNode = SelectABSOp(N);
2505     if (ResNode)
2506       return ResNode;
2507     // Other cases are autogenerated.
2508     break;
2509   }
2510   case ISD::Constant: {
2511     unsigned Val = cast<ConstantSDNode>(N)->getZExtValue();
2512     // If we can't materialize the constant we need to use a literal pool
2513     if (ConstantMaterializationCost(Val) > 2) {
2514       SDValue CPIdx = CurDAG->getTargetConstantPool(
2515           ConstantInt::get(Type::getInt32Ty(*CurDAG->getContext()), Val),
2516           TLI->getPointerTy(CurDAG->getDataLayout()));
2517
2518       SDNode *ResNode;
2519       if (Subtarget->isThumb()) {
2520         SDValue Pred = getAL(CurDAG, dl);
2521         SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
2522         SDValue Ops[] = { CPIdx, Pred, PredReg, CurDAG->getEntryNode() };
2523         ResNode = CurDAG->getMachineNode(ARM::tLDRpci, dl, MVT::i32, MVT::Other,
2524                                          Ops);
2525       } else {
2526         SDValue Ops[] = {
2527           CPIdx,
2528           CurDAG->getTargetConstant(0, dl, MVT::i32),
2529           getAL(CurDAG, dl),
2530           CurDAG->getRegister(0, MVT::i32),
2531           CurDAG->getEntryNode()
2532         };
2533         ResNode=CurDAG->getMachineNode(ARM::LDRcp, dl, MVT::i32, MVT::Other,
2534                                        Ops);
2535       }
2536       ReplaceUses(SDValue(N, 0), SDValue(ResNode, 0));
2537       return nullptr;
2538     }
2539
2540     // Other cases are autogenerated.
2541     break;
2542   }
2543   case ISD::FrameIndex: {
2544     // Selects to ADDri FI, 0 which in turn will become ADDri SP, imm.
2545     int FI = cast<FrameIndexSDNode>(N)->getIndex();
2546     SDValue TFI = CurDAG->getTargetFrameIndex(
2547         FI, TLI->getPointerTy(CurDAG->getDataLayout()));
2548     if (Subtarget->isThumb1Only()) {
2549       // Set the alignment of the frame object to 4, to avoid having to generate
2550       // more than one ADD
2551       MachineFrameInfo *MFI = MF->getFrameInfo();
2552       if (MFI->getObjectAlignment(FI) < 4)
2553         MFI->setObjectAlignment(FI, 4);
2554       return CurDAG->SelectNodeTo(N, ARM::tADDframe, MVT::i32, TFI,
2555                                   CurDAG->getTargetConstant(0, dl, MVT::i32));
2556     } else {
2557       unsigned Opc = ((Subtarget->isThumb() && Subtarget->hasThumb2()) ?
2558                       ARM::t2ADDri : ARM::ADDri);
2559       SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, dl, MVT::i32),
2560                         getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
2561                         CurDAG->getRegister(0, MVT::i32) };
2562       return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2563     }
2564   }
2565   case ISD::SRL:
2566     if (SDNode *I = SelectV6T2BitfieldExtractOp(N, false))
2567       return I;
2568     break;
2569   case ISD::SIGN_EXTEND_INREG:
2570   case ISD::SRA:
2571     if (SDNode *I = SelectV6T2BitfieldExtractOp(N, true))
2572       return I;
2573     break;
2574   case ISD::MUL:
2575     if (Subtarget->isThumb1Only())
2576       break;
2577     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
2578       unsigned RHSV = C->getZExtValue();
2579       if (!RHSV) break;
2580       if (isPowerOf2_32(RHSV-1)) {  // 2^n+1?
2581         unsigned ShImm = Log2_32(RHSV-1);
2582         if (ShImm >= 32)
2583           break;
2584         SDValue V = N->getOperand(0);
2585         ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
2586         SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
2587         SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2588         if (Subtarget->isThumb()) {
2589           SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
2590           return CurDAG->SelectNodeTo(N, ARM::t2ADDrs, MVT::i32, Ops);
2591         } else {
2592           SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
2593                             Reg0 };
2594           return CurDAG->SelectNodeTo(N, ARM::ADDrsi, MVT::i32, Ops);
2595         }
2596       }
2597       if (isPowerOf2_32(RHSV+1)) {  // 2^n-1?
2598         unsigned ShImm = Log2_32(RHSV+1);
2599         if (ShImm >= 32)
2600           break;
2601         SDValue V = N->getOperand(0);
2602         ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
2603         SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
2604         SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2605         if (Subtarget->isThumb()) {
2606           SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
2607           return CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops);
2608         } else {
2609           SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
2610                             Reg0 };
2611           return CurDAG->SelectNodeTo(N, ARM::RSBrsi, MVT::i32, Ops);
2612         }
2613       }
2614     }
2615     break;
2616   case ISD::AND: {
2617     // Check for unsigned bitfield extract
2618     if (SDNode *I = SelectV6T2BitfieldExtractOp(N, false))
2619       return I;
2620
2621     // (and (or x, c2), c1) and top 16-bits of c1 and c2 match, lower 16-bits
2622     // of c1 are 0xffff, and lower 16-bit of c2 are 0. That is, the top 16-bits
2623     // are entirely contributed by c2 and lower 16-bits are entirely contributed
2624     // by x. That's equal to (or (and x, 0xffff), (and c1, 0xffff0000)).
2625     // Select it to: "movt x, ((c1 & 0xffff) >> 16)
2626     EVT VT = N->getValueType(0);
2627     if (VT != MVT::i32)
2628       break;
2629     unsigned Opc = (Subtarget->isThumb() && Subtarget->hasThumb2())
2630       ? ARM::t2MOVTi16
2631       : (Subtarget->hasV6T2Ops() ? ARM::MOVTi16 : 0);
2632     if (!Opc)
2633       break;
2634     SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
2635     ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2636     if (!N1C)
2637       break;
2638     if (N0.getOpcode() == ISD::OR && N0.getNode()->hasOneUse()) {
2639       SDValue N2 = N0.getOperand(1);
2640       ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
2641       if (!N2C)
2642         break;
2643       unsigned N1CVal = N1C->getZExtValue();
2644       unsigned N2CVal = N2C->getZExtValue();
2645       if ((N1CVal & 0xffff0000U) == (N2CVal & 0xffff0000U) &&
2646           (N1CVal & 0xffffU) == 0xffffU &&
2647           (N2CVal & 0xffffU) == 0x0U) {
2648         SDValue Imm16 = CurDAG->getTargetConstant((N2CVal & 0xFFFF0000U) >> 16,
2649                                                   dl, MVT::i32);
2650         SDValue Ops[] = { N0.getOperand(0), Imm16,
2651                           getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) };
2652         return CurDAG->getMachineNode(Opc, dl, VT, Ops);
2653       }
2654     }
2655     break;
2656   }
2657   case ARMISD::VMOVRRD:
2658     return CurDAG->getMachineNode(ARM::VMOVRRD, dl, MVT::i32, MVT::i32,
2659                                   N->getOperand(0), getAL(CurDAG, dl),
2660                                   CurDAG->getRegister(0, MVT::i32));
2661   case ISD::UMUL_LOHI: {
2662     if (Subtarget->isThumb1Only())
2663       break;
2664     if (Subtarget->isThumb()) {
2665       SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
2666                         getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) };
2667       return CurDAG->getMachineNode(ARM::t2UMULL, dl, MVT::i32, MVT::i32, Ops);
2668     } else {
2669       SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
2670                         getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
2671                         CurDAG->getRegister(0, MVT::i32) };
2672       return CurDAG->getMachineNode(Subtarget->hasV6Ops() ?
2673                                     ARM::UMULL : ARM::UMULLv5,
2674                                     dl, MVT::i32, MVT::i32, Ops);
2675     }
2676   }
2677   case ISD::SMUL_LOHI: {
2678     if (Subtarget->isThumb1Only())
2679       break;
2680     if (Subtarget->isThumb()) {
2681       SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
2682                         getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) };
2683       return CurDAG->getMachineNode(ARM::t2SMULL, dl, MVT::i32, MVT::i32, Ops);
2684     } else {
2685       SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
2686                         getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
2687                         CurDAG->getRegister(0, MVT::i32) };
2688       return CurDAG->getMachineNode(Subtarget->hasV6Ops() ?
2689                                     ARM::SMULL : ARM::SMULLv5,
2690                                     dl, MVT::i32, MVT::i32, Ops);
2691     }
2692   }
2693   case ARMISD::UMLAL:{
2694     if (Subtarget->isThumb()) {
2695       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
2696                         N->getOperand(3), getAL(CurDAG, dl),
2697                         CurDAG->getRegister(0, MVT::i32)};
2698       return CurDAG->getMachineNode(ARM::t2UMLAL, dl, MVT::i32, MVT::i32, Ops);
2699     }else{
2700       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
2701                         N->getOperand(3), getAL(CurDAG, dl),
2702                         CurDAG->getRegister(0, MVT::i32),
2703                         CurDAG->getRegister(0, MVT::i32) };
2704       return CurDAG->getMachineNode(Subtarget->hasV6Ops() ?
2705                                       ARM::UMLAL : ARM::UMLALv5,
2706                                       dl, MVT::i32, MVT::i32, Ops);
2707     }
2708   }
2709   case ARMISD::SMLAL:{
2710     if (Subtarget->isThumb()) {
2711       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
2712                         N->getOperand(3), getAL(CurDAG, dl),
2713                         CurDAG->getRegister(0, MVT::i32)};
2714       return CurDAG->getMachineNode(ARM::t2SMLAL, dl, MVT::i32, MVT::i32, Ops);
2715     }else{
2716       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
2717                         N->getOperand(3), getAL(CurDAG, dl),
2718                         CurDAG->getRegister(0, MVT::i32),
2719                         CurDAG->getRegister(0, MVT::i32) };
2720       return CurDAG->getMachineNode(Subtarget->hasV6Ops() ?
2721                                       ARM::SMLAL : ARM::SMLALv5,
2722                                       dl, MVT::i32, MVT::i32, Ops);
2723     }
2724   }
2725   case ISD::LOAD: {
2726     SDNode *ResNode = nullptr;
2727     if (Subtarget->isThumb() && Subtarget->hasThumb2())
2728       ResNode = SelectT2IndexedLoad(N);
2729     else
2730       ResNode = SelectARMIndexedLoad(N);
2731     if (ResNode)
2732       return ResNode;
2733     // Other cases are autogenerated.
2734     break;
2735   }
2736   case ARMISD::BRCOND: {
2737     // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
2738     // Emits: (Bcc:void (bb:Other):$dst, (imm:i32):$cc)
2739     // Pattern complexity = 6  cost = 1  size = 0
2740
2741     // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
2742     // Emits: (tBcc:void (bb:Other):$dst, (imm:i32):$cc)
2743     // Pattern complexity = 6  cost = 1  size = 0
2744
2745     // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
2746     // Emits: (t2Bcc:void (bb:Other):$dst, (imm:i32):$cc)
2747     // Pattern complexity = 6  cost = 1  size = 0
2748
2749     unsigned Opc = Subtarget->isThumb() ?
2750       ((Subtarget->hasThumb2()) ? ARM::t2Bcc : ARM::tBcc) : ARM::Bcc;
2751     SDValue Chain = N->getOperand(0);
2752     SDValue N1 = N->getOperand(1);
2753     SDValue N2 = N->getOperand(2);
2754     SDValue N3 = N->getOperand(3);
2755     SDValue InFlag = N->getOperand(4);
2756     assert(N1.getOpcode() == ISD::BasicBlock);
2757     assert(N2.getOpcode() == ISD::Constant);
2758     assert(N3.getOpcode() == ISD::Register);
2759
2760     SDValue Tmp2 = CurDAG->getTargetConstant(((unsigned)
2761                                cast<ConstantSDNode>(N2)->getZExtValue()), dl,
2762                                MVT::i32);
2763     SDValue Ops[] = { N1, Tmp2, N3, Chain, InFlag };
2764     SDNode *ResNode = CurDAG->getMachineNode(Opc, dl, MVT::Other,
2765                                              MVT::Glue, Ops);
2766     Chain = SDValue(ResNode, 0);
2767     if (N->getNumValues() == 2) {
2768       InFlag = SDValue(ResNode, 1);
2769       ReplaceUses(SDValue(N, 1), InFlag);
2770     }
2771     ReplaceUses(SDValue(N, 0),
2772                 SDValue(Chain.getNode(), Chain.getResNo()));
2773     return nullptr;
2774   }
2775   case ARMISD::VZIP: {
2776     unsigned Opc = 0;
2777     EVT VT = N->getValueType(0);
2778     switch (VT.getSimpleVT().SimpleTy) {
2779     default: return nullptr;
2780     case MVT::v8i8:  Opc = ARM::VZIPd8; break;
2781     case MVT::v4i16: Opc = ARM::VZIPd16; break;
2782     case MVT::v2f32:
2783     // vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
2784     case MVT::v2i32: Opc = ARM::VTRNd32; break;
2785     case MVT::v16i8: Opc = ARM::VZIPq8; break;
2786     case MVT::v8i16: Opc = ARM::VZIPq16; break;
2787     case MVT::v4f32:
2788     case MVT::v4i32: Opc = ARM::VZIPq32; break;
2789     }
2790     SDValue Pred = getAL(CurDAG, dl);
2791     SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
2792     SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
2793     return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops);
2794   }
2795   case ARMISD::VUZP: {
2796     unsigned Opc = 0;
2797     EVT VT = N->getValueType(0);
2798     switch (VT.getSimpleVT().SimpleTy) {
2799     default: return nullptr;
2800     case MVT::v8i8:  Opc = ARM::VUZPd8; break;
2801     case MVT::v4i16: Opc = ARM::VUZPd16; break;
2802     case MVT::v2f32:
2803     // vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
2804     case MVT::v2i32: Opc = ARM::VTRNd32; break;
2805     case MVT::v16i8: Opc = ARM::VUZPq8; break;
2806     case MVT::v8i16: Opc = ARM::VUZPq16; break;
2807     case MVT::v4f32:
2808     case MVT::v4i32: Opc = ARM::VUZPq32; break;
2809     }
2810     SDValue Pred = getAL(CurDAG, dl);
2811     SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
2812     SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
2813     return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops);
2814   }
2815   case ARMISD::VTRN: {
2816     unsigned Opc = 0;
2817     EVT VT = N->getValueType(0);
2818     switch (VT.getSimpleVT().SimpleTy) {
2819     default: return nullptr;
2820     case MVT::v8i8:  Opc = ARM::VTRNd8; break;
2821     case MVT::v4i16: Opc = ARM::VTRNd16; break;
2822     case MVT::v2f32:
2823     case MVT::v2i32: Opc = ARM::VTRNd32; break;
2824     case MVT::v16i8: Opc = ARM::VTRNq8; break;
2825     case MVT::v8i16: Opc = ARM::VTRNq16; break;
2826     case MVT::v4f32:
2827     case MVT::v4i32: Opc = ARM::VTRNq32; break;
2828     }
2829     SDValue Pred = getAL(CurDAG, dl);
2830     SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
2831     SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
2832     return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops);
2833   }
2834   case ARMISD::BUILD_VECTOR: {
2835     EVT VecVT = N->getValueType(0);
2836     EVT EltVT = VecVT.getVectorElementType();
2837     unsigned NumElts = VecVT.getVectorNumElements();
2838     if (EltVT == MVT::f64) {
2839       assert(NumElts == 2 && "unexpected type for BUILD_VECTOR");
2840       return createDRegPairNode(VecVT, N->getOperand(0), N->getOperand(1));
2841     }
2842     assert(EltVT == MVT::f32 && "unexpected type for BUILD_VECTOR");
2843     if (NumElts == 2)
2844       return createSRegPairNode(VecVT, N->getOperand(0), N->getOperand(1));
2845     assert(NumElts == 4 && "unexpected type for BUILD_VECTOR");
2846     return createQuadSRegsNode(VecVT, N->getOperand(0), N->getOperand(1),
2847                      N->getOperand(2), N->getOperand(3));
2848   }
2849
2850   case ARMISD::VLD2DUP: {
2851     static const uint16_t Opcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
2852                                         ARM::VLD2DUPd32 };
2853     return SelectVLDDup(N, false, 2, Opcodes);
2854   }
2855
2856   case ARMISD::VLD3DUP: {
2857     static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo,
2858                                         ARM::VLD3DUPd16Pseudo,
2859                                         ARM::VLD3DUPd32Pseudo };
2860     return SelectVLDDup(N, false, 3, Opcodes);
2861   }
2862
2863   case ARMISD::VLD4DUP: {
2864     static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo,
2865                                         ARM::VLD4DUPd16Pseudo,
2866                                         ARM::VLD4DUPd32Pseudo };
2867     return SelectVLDDup(N, false, 4, Opcodes);
2868   }
2869
2870   case ARMISD::VLD2DUP_UPD: {
2871     static const uint16_t Opcodes[] = { ARM::VLD2DUPd8wb_fixed,
2872                                         ARM::VLD2DUPd16wb_fixed,
2873                                         ARM::VLD2DUPd32wb_fixed };
2874     return SelectVLDDup(N, true, 2, Opcodes);
2875   }
2876
2877   case ARMISD::VLD3DUP_UPD: {
2878     static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo_UPD,
2879                                         ARM::VLD3DUPd16Pseudo_UPD,
2880                                         ARM::VLD3DUPd32Pseudo_UPD };
2881     return SelectVLDDup(N, true, 3, Opcodes);
2882   }
2883
2884   case ARMISD::VLD4DUP_UPD: {
2885     static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo_UPD,
2886                                         ARM::VLD4DUPd16Pseudo_UPD,
2887                                         ARM::VLD4DUPd32Pseudo_UPD };
2888     return SelectVLDDup(N, true, 4, Opcodes);
2889   }
2890
2891   case ARMISD::VLD1_UPD: {
2892     static const uint16_t DOpcodes[] = { ARM::VLD1d8wb_fixed,
2893                                          ARM::VLD1d16wb_fixed,
2894                                          ARM::VLD1d32wb_fixed,
2895                                          ARM::VLD1d64wb_fixed };
2896     static const uint16_t QOpcodes[] = { ARM::VLD1q8wb_fixed,
2897                                          ARM::VLD1q16wb_fixed,
2898                                          ARM::VLD1q32wb_fixed,
2899                                          ARM::VLD1q64wb_fixed };
2900     return SelectVLD(N, true, 1, DOpcodes, QOpcodes, nullptr);
2901   }
2902
2903   case ARMISD::VLD2_UPD: {
2904     static const uint16_t DOpcodes[] = { ARM::VLD2d8wb_fixed,
2905                                          ARM::VLD2d16wb_fixed,
2906                                          ARM::VLD2d32wb_fixed,
2907                                          ARM::VLD1q64wb_fixed};
2908     static const uint16_t QOpcodes[] = { ARM::VLD2q8PseudoWB_fixed,
2909                                          ARM::VLD2q16PseudoWB_fixed,
2910                                          ARM::VLD2q32PseudoWB_fixed };
2911     return SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr);
2912   }
2913
2914   case ARMISD::VLD3_UPD: {
2915     static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo_UPD,
2916                                          ARM::VLD3d16Pseudo_UPD,
2917                                          ARM::VLD3d32Pseudo_UPD,
2918                                          ARM::VLD1d64TPseudoWB_fixed};
2919     static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
2920                                           ARM::VLD3q16Pseudo_UPD,
2921                                           ARM::VLD3q32Pseudo_UPD };
2922     static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD,
2923                                           ARM::VLD3q16oddPseudo_UPD,
2924                                           ARM::VLD3q32oddPseudo_UPD };
2925     return SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
2926   }
2927
2928   case ARMISD::VLD4_UPD: {
2929     static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo_UPD,
2930                                          ARM::VLD4d16Pseudo_UPD,
2931                                          ARM::VLD4d32Pseudo_UPD,
2932                                          ARM::VLD1d64QPseudoWB_fixed};
2933     static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
2934                                           ARM::VLD4q16Pseudo_UPD,
2935                                           ARM::VLD4q32Pseudo_UPD };
2936     static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo_UPD,
2937                                           ARM::VLD4q16oddPseudo_UPD,
2938                                           ARM::VLD4q32oddPseudo_UPD };
2939     return SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
2940   }
2941
2942   case ARMISD::VLD2LN_UPD: {
2943     static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo_UPD,
2944                                          ARM::VLD2LNd16Pseudo_UPD,
2945                                          ARM::VLD2LNd32Pseudo_UPD };
2946     static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo_UPD,
2947                                          ARM::VLD2LNq32Pseudo_UPD };
2948     return SelectVLDSTLane(N, true, true, 2, DOpcodes, QOpcodes);
2949   }
2950
2951   case ARMISD::VLD3LN_UPD: {
2952     static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo_UPD,
2953                                          ARM::VLD3LNd16Pseudo_UPD,
2954                                          ARM::VLD3LNd32Pseudo_UPD };
2955     static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo_UPD,
2956                                          ARM::VLD3LNq32Pseudo_UPD };
2957     return SelectVLDSTLane(N, true, true, 3, DOpcodes, QOpcodes);
2958   }
2959
2960   case ARMISD::VLD4LN_UPD: {
2961     static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo_UPD,
2962                                          ARM::VLD4LNd16Pseudo_UPD,
2963                                          ARM::VLD4LNd32Pseudo_UPD };
2964     static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo_UPD,
2965                                          ARM::VLD4LNq32Pseudo_UPD };
2966     return SelectVLDSTLane(N, true, true, 4, DOpcodes, QOpcodes);
2967   }
2968
2969   case ARMISD::VST1_UPD: {
2970     static const uint16_t DOpcodes[] = { ARM::VST1d8wb_fixed,
2971                                          ARM::VST1d16wb_fixed,
2972                                          ARM::VST1d32wb_fixed,
2973                                          ARM::VST1d64wb_fixed };
2974     static const uint16_t QOpcodes[] = { ARM::VST1q8wb_fixed,
2975                                          ARM::VST1q16wb_fixed,
2976                                          ARM::VST1q32wb_fixed,
2977                                          ARM::VST1q64wb_fixed };
2978     return SelectVST(N, true, 1, DOpcodes, QOpcodes, nullptr);
2979   }
2980
2981   case ARMISD::VST2_UPD: {
2982     static const uint16_t DOpcodes[] = { ARM::VST2d8wb_fixed,
2983                                          ARM::VST2d16wb_fixed,
2984                                          ARM::VST2d32wb_fixed,
2985                                          ARM::VST1q64wb_fixed};
2986     static const uint16_t QOpcodes[] = { ARM::VST2q8PseudoWB_fixed,
2987                                          ARM::VST2q16PseudoWB_fixed,
2988                                          ARM::VST2q32PseudoWB_fixed };
2989     return SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr);
2990   }
2991
2992   case ARMISD::VST3_UPD: {
2993     static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo_UPD,
2994                                          ARM::VST3d16Pseudo_UPD,
2995                                          ARM::VST3d32Pseudo_UPD,
2996                                          ARM::VST1d64TPseudoWB_fixed};
2997     static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
2998                                           ARM::VST3q16Pseudo_UPD,
2999                                           ARM::VST3q32Pseudo_UPD };
3000     static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD,
3001                                           ARM::VST3q16oddPseudo_UPD,
3002                                           ARM::VST3q32oddPseudo_UPD };
3003     return SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
3004   }
3005
3006   case ARMISD::VST4_UPD: {
3007     static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo_UPD,
3008                                          ARM::VST4d16Pseudo_UPD,
3009                                          ARM::VST4d32Pseudo_UPD,
3010                                          ARM::VST1d64QPseudoWB_fixed};
3011     static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
3012                                           ARM::VST4q16Pseudo_UPD,
3013                                           ARM::VST4q32Pseudo_UPD };
3014     static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo_UPD,
3015                                           ARM::VST4q16oddPseudo_UPD,
3016                                           ARM::VST4q32oddPseudo_UPD };
3017     return SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
3018   }
3019
3020   case ARMISD::VST2LN_UPD: {
3021     static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo_UPD,
3022                                          ARM::VST2LNd16Pseudo_UPD,
3023                                          ARM::VST2LNd32Pseudo_UPD };
3024     static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo_UPD,
3025                                          ARM::VST2LNq32Pseudo_UPD };
3026     return SelectVLDSTLane(N, false, true, 2, DOpcodes, QOpcodes);
3027   }
3028
3029   case ARMISD::VST3LN_UPD: {
3030     static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo_UPD,
3031                                          ARM::VST3LNd16Pseudo_UPD,
3032                                          ARM::VST3LNd32Pseudo_UPD };
3033     static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo_UPD,
3034                                          ARM::VST3LNq32Pseudo_UPD };
3035     return SelectVLDSTLane(N, false, true, 3, DOpcodes, QOpcodes);
3036   }
3037
3038   case ARMISD::VST4LN_UPD: {
3039     static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo_UPD,
3040                                          ARM::VST4LNd16Pseudo_UPD,
3041                                          ARM::VST4LNd32Pseudo_UPD };
3042     static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo_UPD,
3043                                          ARM::VST4LNq32Pseudo_UPD };
3044     return SelectVLDSTLane(N, false, true, 4, DOpcodes, QOpcodes);
3045   }
3046
3047   case ISD::INTRINSIC_VOID:
3048   case ISD::INTRINSIC_W_CHAIN: {
3049     unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
3050     switch (IntNo) {
3051     default:
3052       break;
3053
3054     case Intrinsic::arm_ldaexd:
3055     case Intrinsic::arm_ldrexd: {
3056       SDLoc dl(N);
3057       SDValue Chain = N->getOperand(0);
3058       SDValue MemAddr = N->getOperand(2);
3059       bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2();
3060
3061       bool IsAcquire = IntNo == Intrinsic::arm_ldaexd;
3062       unsigned NewOpc = isThumb ? (IsAcquire ? ARM::t2LDAEXD : ARM::t2LDREXD)
3063                                 : (IsAcquire ? ARM::LDAEXD : ARM::LDREXD);
3064
3065       // arm_ldrexd returns a i64 value in {i32, i32}
3066       std::vector<EVT> ResTys;
3067       if (isThumb) {
3068         ResTys.push_back(MVT::i32);
3069         ResTys.push_back(MVT::i32);
3070       } else
3071         ResTys.push_back(MVT::Untyped);
3072       ResTys.push_back(MVT::Other);
3073
3074       // Place arguments in the right order.
3075       SmallVector<SDValue, 7> Ops;
3076       Ops.push_back(MemAddr);
3077       Ops.push_back(getAL(CurDAG, dl));
3078       Ops.push_back(CurDAG->getRegister(0, MVT::i32));
3079       Ops.push_back(Chain);
3080       SDNode *Ld = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
3081       // Transfer memoperands.
3082       MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
3083       MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
3084       cast<MachineSDNode>(Ld)->setMemRefs(MemOp, MemOp + 1);
3085
3086       // Remap uses.
3087       SDValue OutChain = isThumb ? SDValue(Ld, 2) : SDValue(Ld, 1);
3088       if (!SDValue(N, 0).use_empty()) {
3089         SDValue Result;
3090         if (isThumb)
3091           Result = SDValue(Ld, 0);
3092         else {
3093           SDValue SubRegIdx =
3094             CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);
3095           SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
3096               dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
3097           Result = SDValue(ResNode,0);
3098         }
3099         ReplaceUses(SDValue(N, 0), Result);
3100       }
3101       if (!SDValue(N, 1).use_empty()) {
3102         SDValue Result;
3103         if (isThumb)
3104           Result = SDValue(Ld, 1);
3105         else {
3106           SDValue SubRegIdx =
3107             CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32);
3108           SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
3109               dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
3110           Result = SDValue(ResNode,0);
3111         }
3112         ReplaceUses(SDValue(N, 1), Result);
3113       }
3114       ReplaceUses(SDValue(N, 2), OutChain);
3115       return nullptr;
3116     }
3117     case Intrinsic::arm_stlexd:
3118     case Intrinsic::arm_strexd: {
3119       SDLoc dl(N);
3120       SDValue Chain = N->getOperand(0);
3121       SDValue Val0 = N->getOperand(2);
3122       SDValue Val1 = N->getOperand(3);
3123       SDValue MemAddr = N->getOperand(4);
3124
3125       // Store exclusive double return a i32 value which is the return status
3126       // of the issued store.
3127       const EVT ResTys[] = {MVT::i32, MVT::Other};
3128
3129       bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2();
3130       // Place arguments in the right order.
3131       SmallVector<SDValue, 7> Ops;
3132       if (isThumb) {
3133         Ops.push_back(Val0);
3134         Ops.push_back(Val1);
3135       } else
3136         // arm_strexd uses GPRPair.
3137         Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, Val0, Val1), 0));
3138       Ops.push_back(MemAddr);
3139       Ops.push_back(getAL(CurDAG, dl));
3140       Ops.push_back(CurDAG->getRegister(0, MVT::i32));
3141       Ops.push_back(Chain);
3142
3143       bool IsRelease = IntNo == Intrinsic::arm_stlexd;
3144       unsigned NewOpc = isThumb ? (IsRelease ? ARM::t2STLEXD : ARM::t2STREXD)
3145                                 : (IsRelease ? ARM::STLEXD : ARM::STREXD);
3146
3147       SDNode *St = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
3148       // Transfer memoperands.
3149       MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
3150       MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
3151       cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1);
3152
3153       return St;
3154     }
3155
3156     case Intrinsic::arm_neon_vld1: {
3157       static const uint16_t DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16,
3158                                            ARM::VLD1d32, ARM::VLD1d64 };
3159       static const uint16_t QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
3160                                            ARM::VLD1q32, ARM::VLD1q64};
3161       return SelectVLD(N, false, 1, DOpcodes, QOpcodes, nullptr);
3162     }
3163
3164     case Intrinsic::arm_neon_vld2: {
3165       static const uint16_t DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16,
3166                                            ARM::VLD2d32, ARM::VLD1q64 };
3167       static const uint16_t QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo,
3168                                            ARM::VLD2q32Pseudo };
3169       return SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr);
3170     }
3171
3172     case Intrinsic::arm_neon_vld3: {
3173       static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo,
3174                                            ARM::VLD3d16Pseudo,
3175                                            ARM::VLD3d32Pseudo,
3176                                            ARM::VLD1d64TPseudo };
3177       static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
3178                                             ARM::VLD3q16Pseudo_UPD,
3179                                             ARM::VLD3q32Pseudo_UPD };
3180       static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo,
3181                                             ARM::VLD3q16oddPseudo,
3182                                             ARM::VLD3q32oddPseudo };
3183       return SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
3184     }
3185
3186     case Intrinsic::arm_neon_vld4: {
3187       static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo,
3188                                            ARM::VLD4d16Pseudo,
3189                                            ARM::VLD4d32Pseudo,
3190                                            ARM::VLD1d64QPseudo };
3191       static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
3192                                             ARM::VLD4q16Pseudo_UPD,
3193                                             ARM::VLD4q32Pseudo_UPD };
3194       static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo,
3195                                             ARM::VLD4q16oddPseudo,
3196                                             ARM::VLD4q32oddPseudo };
3197       return SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
3198     }
3199
3200     case Intrinsic::arm_neon_vld2lane: {
3201       static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo,
3202                                            ARM::VLD2LNd16Pseudo,
3203                                            ARM::VLD2LNd32Pseudo };
3204       static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo,
3205                                            ARM::VLD2LNq32Pseudo };
3206       return SelectVLDSTLane(N, true, false, 2, DOpcodes, QOpcodes);
3207     }
3208
3209     case Intrinsic::arm_neon_vld3lane: {
3210       static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo,
3211                                            ARM::VLD3LNd16Pseudo,
3212                                            ARM::VLD3LNd32Pseudo };
3213       static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo,
3214                                            ARM::VLD3LNq32Pseudo };
3215       return SelectVLDSTLane(N, true, false, 3, DOpcodes, QOpcodes);
3216     }
3217
3218     case Intrinsic::arm_neon_vld4lane: {
3219       static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo,
3220                                            ARM::VLD4LNd16Pseudo,
3221                                            ARM::VLD4LNd32Pseudo };
3222       static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo,
3223                                            ARM::VLD4LNq32Pseudo };
3224       return SelectVLDSTLane(N, true, false, 4, DOpcodes, QOpcodes);
3225     }
3226
3227     case Intrinsic::arm_neon_vst1: {
3228       static const uint16_t DOpcodes[] = { ARM::VST1d8, ARM::VST1d16,
3229                                            ARM::VST1d32, ARM::VST1d64 };
3230       static const uint16_t QOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
3231                                            ARM::VST1q32, ARM::VST1q64 };
3232       return SelectVST(N, false, 1, DOpcodes, QOpcodes, nullptr);
3233     }
3234
3235     case Intrinsic::arm_neon_vst2: {
3236       static const uint16_t DOpcodes[] = { ARM::VST2d8, ARM::VST2d16,
3237                                            ARM::VST2d32, ARM::VST1q64 };
3238       static uint16_t QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo,
3239                                      ARM::VST2q32Pseudo };
3240       return SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr);
3241     }
3242
3243     case Intrinsic::arm_neon_vst3: {
3244       static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo,
3245                                            ARM::VST3d16Pseudo,
3246                                            ARM::VST3d32Pseudo,
3247                                            ARM::VST1d64TPseudo };
3248       static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
3249                                             ARM::VST3q16Pseudo_UPD,
3250                                             ARM::VST3q32Pseudo_UPD };
3251       static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo,
3252                                             ARM::VST3q16oddPseudo,
3253                                             ARM::VST3q32oddPseudo };
3254       return SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
3255     }
3256
3257     case Intrinsic::arm_neon_vst4: {
3258       static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo,
3259                                            ARM::VST4d16Pseudo,
3260                                            ARM::VST4d32Pseudo,
3261                                            ARM::VST1d64QPseudo };
3262       static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
3263                                             ARM::VST4q16Pseudo_UPD,
3264                                             ARM::VST4q32Pseudo_UPD };
3265       static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo,
3266                                             ARM::VST4q16oddPseudo,
3267                                             ARM::VST4q32oddPseudo };
3268       return SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
3269     }
3270
3271     case Intrinsic::arm_neon_vst2lane: {
3272       static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo,
3273                                            ARM::VST2LNd16Pseudo,
3274                                            ARM::VST2LNd32Pseudo };
3275       static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo,
3276                                            ARM::VST2LNq32Pseudo };
3277       return SelectVLDSTLane(N, false, false, 2, DOpcodes, QOpcodes);
3278     }
3279
3280     case Intrinsic::arm_neon_vst3lane: {
3281       static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo,
3282                                            ARM::VST3LNd16Pseudo,
3283                                            ARM::VST3LNd32Pseudo };
3284       static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo,
3285                                            ARM::VST3LNq32Pseudo };
3286       return SelectVLDSTLane(N, false, false, 3, DOpcodes, QOpcodes);
3287     }
3288
3289     case Intrinsic::arm_neon_vst4lane: {
3290       static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo,
3291                                            ARM::VST4LNd16Pseudo,
3292                                            ARM::VST4LNd32Pseudo };
3293       static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo,
3294                                            ARM::VST4LNq32Pseudo };
3295       return SelectVLDSTLane(N, false, false, 4, DOpcodes, QOpcodes);
3296     }
3297     }
3298     break;
3299   }
3300
3301   case ISD::INTRINSIC_WO_CHAIN: {
3302     unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
3303     switch (IntNo) {
3304     default:
3305       break;
3306
3307     case Intrinsic::arm_neon_vtbl2:
3308       return SelectVTBL(N, false, 2, ARM::VTBL2);
3309     case Intrinsic::arm_neon_vtbl3:
3310       return SelectVTBL(N, false, 3, ARM::VTBL3Pseudo);
3311     case Intrinsic::arm_neon_vtbl4:
3312       return SelectVTBL(N, false, 4, ARM::VTBL4Pseudo);
3313
3314     case Intrinsic::arm_neon_vtbx2:
3315       return SelectVTBL(N, true, 2, ARM::VTBX2);
3316     case Intrinsic::arm_neon_vtbx3:
3317       return SelectVTBL(N, true, 3, ARM::VTBX3Pseudo);
3318     case Intrinsic::arm_neon_vtbx4:
3319       return SelectVTBL(N, true, 4, ARM::VTBX4Pseudo);
3320     }
3321     break;
3322   }
3323
3324   case ARMISD::VTBL1: {
3325     SDLoc dl(N);
3326     EVT VT = N->getValueType(0);
3327     SmallVector<SDValue, 6> Ops;
3328
3329     Ops.push_back(N->getOperand(0));
3330     Ops.push_back(N->getOperand(1));
3331     Ops.push_back(getAL(CurDAG, dl));                // Predicate
3332     Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // Predicate Register
3333     return CurDAG->getMachineNode(ARM::VTBL1, dl, VT, Ops);
3334   }
3335   case ARMISD::VTBL2: {
3336     SDLoc dl(N);
3337     EVT VT = N->getValueType(0);
3338
3339     // Form a REG_SEQUENCE to force register allocation.
3340     SDValue V0 = N->getOperand(0);
3341     SDValue V1 = N->getOperand(1);
3342     SDValue RegSeq = SDValue(createDRegPairNode(MVT::v16i8, V0, V1), 0);
3343
3344     SmallVector<SDValue, 6> Ops;
3345     Ops.push_back(RegSeq);
3346     Ops.push_back(N->getOperand(2));
3347     Ops.push_back(getAL(CurDAG, dl));                // Predicate
3348     Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // Predicate Register
3349     return CurDAG->getMachineNode(ARM::VTBL2, dl, VT, Ops);
3350   }
3351
3352   case ISD::CONCAT_VECTORS:
3353     return SelectConcatVector(N);
3354   }
3355
3356   return SelectCode(N);
3357 }
3358
3359 // Inspect a register string of the form
3360 // cp<coprocessor>:<opc1>:c<CRn>:c<CRm>:<opc2> (32bit) or
3361 // cp<coprocessor>:<opc1>:c<CRm> (64bit) inspect the fields of the string
3362 // and obtain the integer operands from them, adding these operands to the
3363 // provided vector.
3364 static void getIntOperandsFromRegisterString(StringRef RegString,
3365                                              SelectionDAG *CurDAG, SDLoc DL,
3366                                              std::vector<SDValue>& Ops) {
3367   SmallVector<StringRef, 5> Fields;
3368   RegString.split(Fields, ':');
3369
3370   if (Fields.size() > 1) {
3371     bool AllIntFields = true;
3372
3373     for (StringRef Field : Fields) {
3374       // Need to trim out leading 'cp' characters and get the integer field.
3375       unsigned IntField;
3376       AllIntFields &= !Field.trim("CPcp").getAsInteger(10, IntField);
3377       Ops.push_back(CurDAG->getTargetConstant(IntField, DL, MVT::i32));
3378     }
3379
3380     assert(AllIntFields &&
3381             "Unexpected non-integer value in special register string.");
3382   }
3383 }
3384
3385 // Maps a Banked Register string to its mask value. The mask value returned is
3386 // for use in the MRSbanked / MSRbanked instruction nodes as the Banked Register
3387 // mask operand, which expresses which register is to be used, e.g. r8, and in
3388 // which mode it is to be used, e.g. usr. Returns -1 to signify that the string
3389 // was invalid.
3390 static inline int getBankedRegisterMask(StringRef RegString) {
3391   return StringSwitch<int>(RegString.lower())
3392           .Case("r8_usr", 0x00)
3393           .Case("r9_usr", 0x01)
3394           .Case("r10_usr", 0x02)
3395           .Case("r11_usr", 0x03)
3396           .Case("r12_usr", 0x04)
3397           .Case("sp_usr", 0x05)
3398           .Case("lr_usr", 0x06)
3399           .Case("r8_fiq", 0x08)
3400           .Case("r9_fiq", 0x09)
3401           .Case("r10_fiq", 0x0a)
3402           .Case("r11_fiq", 0x0b)
3403           .Case("r12_fiq", 0x0c)
3404           .Case("sp_fiq", 0x0d)
3405           .Case("lr_fiq", 0x0e)
3406           .Case("lr_irq", 0x10)
3407           .Case("sp_irq", 0x11)
3408           .Case("lr_svc", 0x12)
3409           .Case("sp_svc", 0x13)
3410           .Case("lr_abt", 0x14)
3411           .Case("sp_abt", 0x15)
3412           .Case("lr_und", 0x16)
3413           .Case("sp_und", 0x17)
3414           .Case("lr_mon", 0x1c)
3415           .Case("sp_mon", 0x1d)
3416           .Case("elr_hyp", 0x1e)
3417           .Case("sp_hyp", 0x1f)
3418           .Case("spsr_fiq", 0x2e)
3419           .Case("spsr_irq", 0x30)
3420           .Case("spsr_svc", 0x32)
3421           .Case("spsr_abt", 0x34)
3422           .Case("spsr_und", 0x36)
3423           .Case("spsr_mon", 0x3c)
3424           .Case("spsr_hyp", 0x3e)
3425           .Default(-1);
3426 }
3427
3428 // Maps a MClass special register string to its value for use in the
3429 // t2MRS_M / t2MSR_M instruction nodes as the SYSm value operand.
3430 // Returns -1 to signify that the string was invalid.
3431 static inline int getMClassRegisterSYSmValueMask(StringRef RegString) {
3432   return StringSwitch<int>(RegString.lower())
3433           .Case("apsr", 0x0)
3434           .Case("iapsr", 0x1)
3435           .Case("eapsr", 0x2)
3436           .Case("xpsr", 0x3)
3437           .Case("ipsr", 0x5)
3438           .Case("epsr", 0x6)
3439           .Case("iepsr", 0x7)
3440           .Case("msp", 0x8)
3441           .Case("psp", 0x9)
3442           .Case("primask", 0x10)
3443           .Case("basepri", 0x11)
3444           .Case("basepri_max", 0x12)
3445           .Case("faultmask", 0x13)
3446           .Case("control", 0x14)
3447           .Default(-1);
3448 }
3449
3450 // The flags here are common to those allowed for apsr in the A class cores and
3451 // those allowed for the special registers in the M class cores. Returns a
3452 // value representing which flags were present, -1 if invalid.
3453 static inline int getMClassFlagsMask(StringRef Flags, bool hasDSP) {
3454   if (Flags.empty())
3455     return 0x2 | (int)hasDSP;
3456
3457   return StringSwitch<int>(Flags)
3458           .Case("g", 0x1)
3459           .Case("nzcvq", 0x2)
3460           .Case("nzcvqg", 0x3)
3461           .Default(-1);
3462 }
3463
3464 static int getMClassRegisterMask(StringRef Reg, StringRef Flags, bool IsRead,
3465                                  const ARMSubtarget *Subtarget) {
3466   // Ensure that the register (without flags) was a valid M Class special
3467   // register.
3468   int SYSmvalue = getMClassRegisterSYSmValueMask(Reg);
3469   if (SYSmvalue == -1)
3470     return -1;
3471
3472   // basepri, basepri_max and faultmask are only valid for V7m.
3473   if (!Subtarget->hasV7Ops() && SYSmvalue >= 0x11 && SYSmvalue <= 0x13)
3474     return -1;
3475
3476   // If it was a read then we won't be expecting flags and so at this point
3477   // we can return the mask.
3478   if (IsRead) {
3479     assert (Flags.empty() && "Unexpected flags for reading M class register.");
3480     return SYSmvalue;
3481   }
3482
3483   // We know we are now handling a write so need to get the mask for the flags.
3484   int Mask = getMClassFlagsMask(Flags, Subtarget->hasDSP());
3485
3486   // Only apsr, iapsr, eapsr, xpsr can have flags. The other register values
3487   // shouldn't have flags present.
3488   if ((SYSmvalue < 0x4 && Mask == -1) || (SYSmvalue > 0x4 && !Flags.empty()))
3489     return -1;
3490
3491   // The _g and _nzcvqg versions are only valid if the DSP extension is
3492   // available.
3493   if (!Subtarget->hasDSP() && (Mask & 0x1))
3494     return -1;
3495
3496   // The register was valid so need to put the mask in the correct place
3497   // (the flags need to be in bits 11-10) and combine with the SYSmvalue to
3498   // construct the operand for the instruction node.
3499   if (SYSmvalue < 0x4)
3500     return SYSmvalue | Mask << 10;
3501
3502   return SYSmvalue;
3503 }
3504
3505 static int getARClassRegisterMask(StringRef Reg, StringRef Flags) {
3506   // The mask operand contains the special register (R Bit) in bit 4, whether
3507   // the register is spsr (R bit is 1) or one of cpsr/apsr (R bit is 0), and
3508   // bits 3-0 contains the fields to be accessed in the special register, set by
3509   // the flags provided with the register.
3510   int Mask = 0;
3511   if (Reg == "apsr") {
3512     // The flags permitted for apsr are the same flags that are allowed in
3513     // M class registers. We get the flag value and then shift the flags into
3514     // the correct place to combine with the mask.
3515     Mask = getMClassFlagsMask(Flags, true);
3516     if (Mask == -1)
3517       return -1;
3518     return Mask << 2;
3519   }
3520
3521   if (Reg != "cpsr" && Reg != "spsr") {
3522     return -1;
3523   }
3524
3525   // This is the same as if the flags were "fc"
3526   if (Flags.empty() || Flags == "all")
3527     return Mask | 0x9;
3528
3529   // Inspect the supplied flags string and set the bits in the mask for
3530   // the relevant and valid flags allowed for cpsr and spsr.
3531   for (char Flag : Flags) {
3532     int FlagVal;
3533     switch (Flag) {
3534       case 'c':
3535         FlagVal = 0x1;
3536         break;
3537       case 'x':
3538         FlagVal = 0x2;
3539         break;
3540       case 's':
3541         FlagVal = 0x4;
3542         break;
3543       case 'f':
3544         FlagVal = 0x8;
3545         break;
3546       default:
3547         FlagVal = 0;
3548     }
3549
3550     // This avoids allowing strings where the same flag bit appears twice.
3551     if (!FlagVal || (Mask & FlagVal))
3552       return -1;
3553     Mask |= FlagVal;
3554   }
3555
3556   // If the register is spsr then we need to set the R bit.
3557   if (Reg == "spsr")
3558     Mask |= 0x10;
3559
3560   return Mask;
3561 }
3562
3563 // Lower the read_register intrinsic to ARM specific DAG nodes
3564 // using the supplied metadata string to select the instruction node to use
3565 // and the registers/masks to construct as operands for the node.
3566 SDNode *ARMDAGToDAGISel::SelectReadRegister(SDNode *N){
3567   const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
3568   const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
3569   bool IsThumb2 = Subtarget->isThumb2();
3570   SDLoc DL(N);
3571
3572   std::vector<SDValue> Ops;
3573   getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops);
3574
3575   if (!Ops.empty()) {
3576     // If the special register string was constructed of fields (as defined
3577     // in the ACLE) then need to lower to MRC node (32 bit) or
3578     // MRRC node(64 bit), we can make the distinction based on the number of
3579     // operands we have.
3580     unsigned Opcode;
3581     SmallVector<EVT, 3> ResTypes;
3582     if (Ops.size() == 5){
3583       Opcode = IsThumb2 ? ARM::t2MRC : ARM::MRC;
3584       ResTypes.append({ MVT::i32, MVT::Other });
3585     } else {
3586       assert(Ops.size() == 3 &&
3587               "Invalid number of fields in special register string.");
3588       Opcode = IsThumb2 ? ARM::t2MRRC : ARM::MRRC;
3589       ResTypes.append({ MVT::i32, MVT::i32, MVT::Other });
3590     }
3591
3592     Ops.push_back(getAL(CurDAG, DL));
3593     Ops.push_back(CurDAG->getRegister(0, MVT::i32));
3594     Ops.push_back(N->getOperand(0));
3595     return CurDAG->getMachineNode(Opcode, DL, ResTypes, Ops);
3596   }
3597
3598   std::string SpecialReg = RegString->getString().lower();
3599
3600   int BankedReg = getBankedRegisterMask(SpecialReg);
3601   if (BankedReg != -1) {
3602     Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32),
3603             getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
3604             N->getOperand(0) };
3605     return CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSbanked : ARM::MRSbanked,
3606                                   DL, MVT::i32, MVT::Other, Ops);
3607   }
3608
3609   // The VFP registers are read by creating SelectionDAG nodes with opcodes
3610   // corresponding to the register that is being read from. So we switch on the
3611   // string to find which opcode we need to use.
3612   unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
3613                     .Case("fpscr", ARM::VMRS)
3614                     .Case("fpexc", ARM::VMRS_FPEXC)
3615                     .Case("fpsid", ARM::VMRS_FPSID)
3616                     .Case("mvfr0", ARM::VMRS_MVFR0)
3617                     .Case("mvfr1", ARM::VMRS_MVFR1)
3618                     .Case("mvfr2", ARM::VMRS_MVFR2)
3619                     .Case("fpinst", ARM::VMRS_FPINST)
3620                     .Case("fpinst2", ARM::VMRS_FPINST2)
3621                     .Default(0);
3622
3623   // If an opcode was found then we can lower the read to a VFP instruction.
3624   if (Opcode) {
3625     if (!Subtarget->hasVFP2())
3626       return nullptr;
3627     if (Opcode == ARM::VMRS_MVFR2 && !Subtarget->hasFPARMv8())
3628       return nullptr;
3629
3630     Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
3631             N->getOperand(0) };
3632     return CurDAG->getMachineNode(Opcode, DL, MVT::i32, MVT::Other, Ops);
3633   }
3634
3635   // If the target is M Class then need to validate that the register string
3636   // is an acceptable value, so check that a mask can be constructed from the
3637   // string.
3638   if (Subtarget->isMClass()) {
3639     int SYSmValue = getMClassRegisterMask(SpecialReg, "", true, Subtarget);
3640     if (SYSmValue == -1)
3641       return nullptr;
3642
3643     SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32),
3644                       getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
3645                       N->getOperand(0) };
3646     return CurDAG->getMachineNode(ARM::t2MRS_M, DL, MVT::i32, MVT::Other, Ops);
3647   }
3648
3649   // Here we know the target is not M Class so we need to check if it is one
3650   // of the remaining possible values which are apsr, cpsr or spsr.
3651   if (SpecialReg == "apsr" || SpecialReg == "cpsr") {
3652     Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
3653             N->getOperand(0) };
3654     return CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRS_AR : ARM::MRS, DL,
3655                                   MVT::i32, MVT::Other, Ops);
3656   }
3657
3658   if (SpecialReg == "spsr") {
3659     Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
3660             N->getOperand(0) };
3661     return CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSsys_AR : ARM::MRSsys,
3662                                   DL, MVT::i32, MVT::Other, Ops);
3663   }
3664
3665   return nullptr;
3666 }
3667
3668 // Lower the write_register intrinsic to ARM specific DAG nodes
3669 // using the supplied metadata string to select the instruction node to use
3670 // and the registers/masks to use in the nodes
3671 SDNode *ARMDAGToDAGISel::SelectWriteRegister(SDNode *N){
3672   const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
3673   const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
3674   bool IsThumb2 = Subtarget->isThumb2();
3675   SDLoc DL(N);
3676
3677   std::vector<SDValue> Ops;
3678   getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops);
3679
3680   if (!Ops.empty()) {
3681     // If the special register string was constructed of fields (as defined
3682     // in the ACLE) then need to lower to MCR node (32 bit) or
3683     // MCRR node(64 bit), we can make the distinction based on the number of
3684     // operands we have.
3685     unsigned Opcode;
3686     if (Ops.size() == 5) {
3687       Opcode = IsThumb2 ? ARM::t2MCR : ARM::MCR;
3688       Ops.insert(Ops.begin()+2, N->getOperand(2));
3689     } else {
3690       assert(Ops.size() == 3 &&
3691               "Invalid number of fields in special register string.");
3692       Opcode = IsThumb2 ? ARM::t2MCRR : ARM::MCRR;
3693       SDValue WriteValue[] = { N->getOperand(2), N->getOperand(3) };
3694       Ops.insert(Ops.begin()+2, WriteValue, WriteValue+2);
3695     }
3696
3697     Ops.push_back(getAL(CurDAG, DL));
3698     Ops.push_back(CurDAG->getRegister(0, MVT::i32));
3699     Ops.push_back(N->getOperand(0));
3700
3701     return CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops);
3702   }
3703
3704   std::string SpecialReg = RegString->getString().lower();
3705   int BankedReg = getBankedRegisterMask(SpecialReg);
3706   if (BankedReg != -1) {
3707     Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32), N->getOperand(2),
3708             getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
3709             N->getOperand(0) };
3710     return CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSRbanked : ARM::MSRbanked,
3711                                   DL, MVT::Other, Ops);
3712   }
3713
3714   // The VFP registers are written to by creating SelectionDAG nodes with
3715   // opcodes corresponding to the register that is being written. So we switch
3716   // on the string to find which opcode we need to use.
3717   unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
3718                     .Case("fpscr", ARM::VMSR)
3719                     .Case("fpexc", ARM::VMSR_FPEXC)
3720                     .Case("fpsid", ARM::VMSR_FPSID)
3721                     .Case("fpinst", ARM::VMSR_FPINST)
3722                     .Case("fpinst2", ARM::VMSR_FPINST2)
3723                     .Default(0);
3724
3725   if (Opcode) {
3726     if (!Subtarget->hasVFP2())
3727       return nullptr;
3728     Ops = { N->getOperand(2), getAL(CurDAG, DL),
3729             CurDAG->getRegister(0, MVT::i32), N->getOperand(0) };
3730     return CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops);
3731   }
3732
3733   SmallVector<StringRef, 5> Fields;
3734   StringRef(SpecialReg).split(Fields, '_', 1, false);
3735   std::string Reg = Fields[0].str();
3736   StringRef Flags = Fields.size() == 2 ? Fields[1] : "";
3737
3738   // If the target was M Class then need to validate the special register value
3739   // and retrieve the mask for use in the instruction node.
3740   if (Subtarget->isMClass()) {
3741     // basepri_max gets split so need to correct Reg and Flags.
3742     if (SpecialReg == "basepri_max") {
3743       Reg = SpecialReg;
3744       Flags = "";
3745     }
3746     int SYSmValue = getMClassRegisterMask(Reg, Flags, false, Subtarget);
3747     if (SYSmValue == -1)
3748       return nullptr;
3749
3750     SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32),
3751                       N->getOperand(2), getAL(CurDAG, DL),
3752                       CurDAG->getRegister(0, MVT::i32), N->getOperand(0) };
3753     return CurDAG->getMachineNode(ARM::t2MSR_M, DL, MVT::Other, Ops);
3754   }
3755
3756   // We then check to see if a valid mask can be constructed for one of the
3757   // register string values permitted for the A and R class cores. These values
3758   // are apsr, spsr and cpsr; these are also valid on older cores.
3759   int Mask = getARClassRegisterMask(Reg, Flags);
3760   if (Mask != -1) {
3761     Ops = { CurDAG->getTargetConstant(Mask, DL, MVT::i32), N->getOperand(2),
3762             getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
3763             N->getOperand(0) };
3764     return CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSR_AR : ARM::MSR,
3765                                   DL, MVT::Other, Ops);
3766   }
3767
3768   return nullptr;
3769 }
3770
3771 SDNode *ARMDAGToDAGISel::SelectInlineAsm(SDNode *N){
3772   std::vector<SDValue> AsmNodeOperands;
3773   unsigned Flag, Kind;
3774   bool Changed = false;
3775   unsigned NumOps = N->getNumOperands();
3776
3777   // Normally, i64 data is bounded to two arbitrary GRPs for "%r" constraint.
3778   // However, some instrstions (e.g. ldrexd/strexd in ARM mode) require
3779   // (even/even+1) GPRs and use %n and %Hn to refer to the individual regs
3780   // respectively. Since there is no constraint to explicitly specify a
3781   // reg pair, we use GPRPair reg class for "%r" for 64-bit data. For Thumb,
3782   // the 64-bit data may be referred by H, Q, R modifiers, so we still pack
3783   // them into a GPRPair.
3784
3785   SDLoc dl(N);
3786   SDValue Glue = N->getGluedNode() ? N->getOperand(NumOps-1)
3787                                    : SDValue(nullptr,0);
3788
3789   SmallVector<bool, 8> OpChanged;
3790   // Glue node will be appended late.
3791   for(unsigned i = 0, e = N->getGluedNode() ? NumOps - 1 : NumOps; i < e; ++i) {
3792     SDValue op = N->getOperand(i);
3793     AsmNodeOperands.push_back(op);
3794
3795     if (i < InlineAsm::Op_FirstOperand)
3796       continue;
3797
3798     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(i))) {
3799       Flag = C->getZExtValue();
3800       Kind = InlineAsm::getKind(Flag);
3801     }
3802     else
3803       continue;
3804
3805     // Immediate operands to inline asm in the SelectionDAG are modeled with
3806     // two operands. The first is a constant of value InlineAsm::Kind_Imm, and
3807     // the second is a constant with the value of the immediate. If we get here
3808     // and we have a Kind_Imm, skip the next operand, and continue.
3809     if (Kind == InlineAsm::Kind_Imm) {
3810       SDValue op = N->getOperand(++i);
3811       AsmNodeOperands.push_back(op);
3812       continue;
3813     }
3814
3815     unsigned NumRegs = InlineAsm::getNumOperandRegisters(Flag);
3816     if (NumRegs)
3817       OpChanged.push_back(false);
3818
3819     unsigned DefIdx = 0;
3820     bool IsTiedToChangedOp = false;
3821     // If it's a use that is tied with a previous def, it has no
3822     // reg class constraint.
3823     if (Changed && InlineAsm::isUseOperandTiedToDef(Flag, DefIdx))
3824       IsTiedToChangedOp = OpChanged[DefIdx];
3825
3826     if (Kind != InlineAsm::Kind_RegUse && Kind != InlineAsm::Kind_RegDef
3827         && Kind != InlineAsm::Kind_RegDefEarlyClobber)
3828       continue;
3829
3830     unsigned RC;
3831     bool HasRC = InlineAsm::hasRegClassConstraint(Flag, RC);
3832     if ((!IsTiedToChangedOp && (!HasRC || RC != ARM::GPRRegClassID))
3833         || NumRegs != 2)
3834       continue;
3835
3836     assert((i+2 < NumOps) && "Invalid number of operands in inline asm");
3837     SDValue V0 = N->getOperand(i+1);
3838     SDValue V1 = N->getOperand(i+2);
3839     unsigned Reg0 = cast<RegisterSDNode>(V0)->getReg();
3840     unsigned Reg1 = cast<RegisterSDNode>(V1)->getReg();
3841     SDValue PairedReg;
3842     MachineRegisterInfo &MRI = MF->getRegInfo();
3843
3844     if (Kind == InlineAsm::Kind_RegDef ||
3845         Kind == InlineAsm::Kind_RegDefEarlyClobber) {
3846       // Replace the two GPRs with 1 GPRPair and copy values from GPRPair to
3847       // the original GPRs.
3848
3849       unsigned GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
3850       PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
3851       SDValue Chain = SDValue(N,0);
3852
3853       SDNode *GU = N->getGluedUser();
3854       SDValue RegCopy = CurDAG->getCopyFromReg(Chain, dl, GPVR, MVT::Untyped,
3855                                                Chain.getValue(1));
3856
3857       // Extract values from a GPRPair reg and copy to the original GPR reg.
3858       SDValue Sub0 = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32,
3859                                                     RegCopy);
3860       SDValue Sub1 = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32,
3861                                                     RegCopy);
3862       SDValue T0 = CurDAG->getCopyToReg(Sub0, dl, Reg0, Sub0,
3863                                         RegCopy.getValue(1));
3864       SDValue T1 = CurDAG->getCopyToReg(Sub1, dl, Reg1, Sub1, T0.getValue(1));
3865
3866       // Update the original glue user.
3867       std::vector<SDValue> Ops(GU->op_begin(), GU->op_end()-1);
3868       Ops.push_back(T1.getValue(1));
3869       CurDAG->UpdateNodeOperands(GU, Ops);
3870     }
3871     else {
3872       // For Kind  == InlineAsm::Kind_RegUse, we first copy two GPRs into a
3873       // GPRPair and then pass the GPRPair to the inline asm.
3874       SDValue Chain = AsmNodeOperands[InlineAsm::Op_InputChain];
3875
3876       // As REG_SEQ doesn't take RegisterSDNode, we copy them first.
3877       SDValue T0 = CurDAG->getCopyFromReg(Chain, dl, Reg0, MVT::i32,
3878                                           Chain.getValue(1));
3879       SDValue T1 = CurDAG->getCopyFromReg(Chain, dl, Reg1, MVT::i32,
3880                                           T0.getValue(1));
3881       SDValue Pair = SDValue(createGPRPairNode(MVT::Untyped, T0, T1), 0);
3882
3883       // Copy REG_SEQ into a GPRPair-typed VR and replace the original two
3884       // i32 VRs of inline asm with it.
3885       unsigned GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
3886       PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
3887       Chain = CurDAG->getCopyToReg(T1, dl, GPVR, Pair, T1.getValue(1));
3888
3889       AsmNodeOperands[InlineAsm::Op_InputChain] = Chain;
3890       Glue = Chain.getValue(1);
3891     }
3892
3893     Changed = true;
3894
3895     if(PairedReg.getNode()) {
3896       OpChanged[OpChanged.size() -1 ] = true;
3897       Flag = InlineAsm::getFlagWord(Kind, 1 /* RegNum*/);
3898       if (IsTiedToChangedOp)
3899         Flag = InlineAsm::getFlagWordForMatchingOp(Flag, DefIdx);
3900       else
3901         Flag = InlineAsm::getFlagWordForRegClass(Flag, ARM::GPRPairRegClassID);
3902       // Replace the current flag.
3903       AsmNodeOperands[AsmNodeOperands.size() -1] = CurDAG->getTargetConstant(
3904           Flag, dl, MVT::i32);
3905       // Add the new register node and skip the original two GPRs.
3906       AsmNodeOperands.push_back(PairedReg);
3907       // Skip the next two GPRs.
3908       i += 2;
3909     }
3910   }
3911
3912   if (Glue.getNode())
3913     AsmNodeOperands.push_back(Glue);
3914   if (!Changed)
3915     return nullptr;
3916
3917   SDValue New = CurDAG->getNode(ISD::INLINEASM, SDLoc(N),
3918       CurDAG->getVTList(MVT::Other, MVT::Glue), AsmNodeOperands);
3919   New->setNodeId(-1);
3920   return New.getNode();
3921 }
3922
3923
3924 bool ARMDAGToDAGISel::
3925 SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
3926                              std::vector<SDValue> &OutOps) {
3927   switch(ConstraintID) {
3928   default:
3929     llvm_unreachable("Unexpected asm memory constraint");
3930   case InlineAsm::Constraint_i:
3931     // FIXME: It seems strange that 'i' is needed here since it's supposed to
3932     //        be an immediate and not a memory constraint.
3933     // Fallthrough.
3934   case InlineAsm::Constraint_m:
3935   case InlineAsm::Constraint_o:
3936   case InlineAsm::Constraint_Q:
3937   case InlineAsm::Constraint_Um:
3938   case InlineAsm::Constraint_Un:
3939   case InlineAsm::Constraint_Uq:
3940   case InlineAsm::Constraint_Us:
3941   case InlineAsm::Constraint_Ut:
3942   case InlineAsm::Constraint_Uv:
3943   case InlineAsm::Constraint_Uy:
3944     // Require the address to be in a register.  That is safe for all ARM
3945     // variants and it is hard to do anything much smarter without knowing
3946     // how the operand is used.
3947     OutOps.push_back(Op);
3948     return false;
3949   }
3950   return true;
3951 }
3952
3953 /// createARMISelDag - This pass converts a legalized DAG into a
3954 /// ARM-specific DAG, ready for instruction scheduling.
3955 ///
3956 FunctionPass *llvm::createARMISelDag(ARMBaseTargetMachine &TM,
3957                                      CodeGenOpt::Level OptLevel) {
3958   return new ARMDAGToDAGISel(TM, OptLevel);
3959 }