lib/Target/ARM/ARMISelDAGToDAG.cpp

   1 //===-- ARMISelDAGToDAG.cpp - A dag to dag inst selector for ARM ----------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file defines an instruction selector for the ARM target.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #define DEBUG_TYPE "arm-isel"
  15 #include "ARM.h"
  16 #include "ARMBaseInstrInfo.h"
  17 #include "ARMTargetMachine.h"
  18 #include "MCTargetDesc/ARMAddressingModes.h"
  19 #include "llvm/CallingConv.h"
  20 #include "llvm/CodeGen/MachineFrameInfo.h"
  21 #include "llvm/CodeGen/MachineFunction.h"
  22 #include "llvm/CodeGen/MachineInstrBuilder.h"
  23 #include "llvm/CodeGen/SelectionDAG.h"
  24 #include "llvm/CodeGen/SelectionDAGISel.h"
  25 #include "llvm/Constants.h"
  26 #include "llvm/DerivedTypes.h"
  27 #include "llvm/Function.h"
  28 #include "llvm/Intrinsics.h"
  29 #include "llvm/LLVMContext.h"
  30 #include "llvm/Support/CommandLine.h"
  31 #include "llvm/Support/Compiler.h"
  32 #include "llvm/Support/Debug.h"
  33 #include "llvm/Support/ErrorHandling.h"
  34 #include "llvm/Support/raw_ostream.h"
  35 #include "llvm/Target/TargetLowering.h"
  36 #include "llvm/Target/TargetOptions.h"
  37
  38 using namespace llvm;
  39
  40 static cl::opt<bool>
  41 DisableShifterOp("disable-shifter-op", cl::Hidden,
  42   cl::desc("Disable isel of shifter-op"),
  43   cl::init(false));
  44
  45 static cl::opt<bool>
  46 CheckVMLxHazard("check-vmlx-hazard", cl::Hidden,
  47   cl::desc("Check fp vmla / vmls hazard at isel time"),
  48   cl::init(true));
  49
  50 //===--------------------------------------------------------------------===//
  51 /// ARMDAGToDAGISel - ARM specific code to select ARM machine
  52 /// instructions for SelectionDAG operations.
  53 ///
  54 namespace {
  55
  56 enum AddrMode2Type {
  57   AM2_BASE, // Simple AM2 (+-imm12)
  58   AM2_SHOP  // Shifter-op AM2
  59 };
  60
  61 class ARMDAGToDAGISel : public SelectionDAGISel {
  62   ARMBaseTargetMachine &TM;
  63   const ARMBaseInstrInfo *TII;
  64
  65   /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
  66   /// make the right decision when generating code for different targets.
  67   const ARMSubtarget *Subtarget;
  68
  69 public:
  70   explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm,
  71                            CodeGenOpt::Level OptLevel)
  72     : SelectionDAGISel(tm, OptLevel), TM(tm),
  73       TII(static_cast<const ARMBaseInstrInfo*>(TM.getInstrInfo())),
  74       Subtarget(&TM.getSubtarget<ARMSubtarget>()) {
  75   }
  76
  77   virtual const char *getPassName() const {
  78     return "ARM Instruction Selection";
  79   }
  80
  81   virtual void PreprocessISelDAG();
  82
  83   /// getI32Imm - Return a target constant of type i32 with the specified
  84   /// value.
  85   inline SDValue getI32Imm(unsigned Imm) {
  86     return CurDAG->getTargetConstant(Imm, MVT::i32);
  87   }
  88
  89   SDNode *Select(SDNode *N);
  90
  91
  92   bool hasNoVMLxHazardUse(SDNode *N) const;
  93   bool isShifterOpProfitable(const SDValue &Shift,
  94                              ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt);
  95   bool SelectRegShifterOperand(SDValue N, SDValue &A,
  96                                SDValue &B, SDValue &C,
  97                                bool CheckProfitability = true);
  98   bool SelectImmShifterOperand(SDValue N, SDValue &A,
  99                                SDValue &B, bool CheckProfitability = true);
 100   bool SelectShiftRegShifterOperand(SDValue N, SDValue &A,
 101                                     SDValue &B, SDValue &C) {
 102     // Don't apply the profitability check
 103     return SelectRegShifterOperand(N, A, B, C, false);
 104   }
 105   bool SelectShiftImmShifterOperand(SDValue N, SDValue &A,
 106                                     SDValue &B) {
 107     // Don't apply the profitability check
 108     return SelectImmShifterOperand(N, A, B, false);
 109   }
 110
 111   bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
 112   bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc);
 113
 114   AddrMode2Type SelectAddrMode2Worker(SDValue N, SDValue &Base,
 115                                       SDValue &Offset, SDValue &Opc);
 116   bool SelectAddrMode2Base(SDValue N, SDValue &Base, SDValue &Offset,
 117                            SDValue &Opc) {
 118     return SelectAddrMode2Worker(N, Base, Offset, Opc) == AM2_BASE;
 119   }
 120
 121   bool SelectAddrMode2ShOp(SDValue N, SDValue &Base, SDValue &Offset,
 122                            SDValue &Opc) {
 123     return SelectAddrMode2Worker(N, Base, Offset, Opc) == AM2_SHOP;
 124   }
 125
 126   bool SelectAddrMode2(SDValue N, SDValue &Base, SDValue &Offset,
 127                        SDValue &Opc) {
 128     SelectAddrMode2Worker(N, Base, Offset, Opc);
 129 //    return SelectAddrMode2ShOp(N, Base, Offset, Opc);
 130     // This always matches one way or another.
 131     return true;
 132   }
 133
 134   bool SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
 135                              SDValue &Offset, SDValue &Opc);
 136   bool SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
 137                              SDValue &Offset, SDValue &Opc);
 138   bool SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
 139                              SDValue &Offset, SDValue &Opc);
 140   bool SelectAddrOffsetNone(SDValue N, SDValue &Base);
 141   bool SelectAddrMode3(SDValue N, SDValue &Base,
 142                        SDValue &Offset, SDValue &Opc);
 143   bool SelectAddrMode3Offset(SDNode *Op, SDValue N,
 144                              SDValue &Offset, SDValue &Opc);
 145   bool SelectAddrMode5(SDValue N, SDValue &Base,
 146                        SDValue &Offset);
 147   bool SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,SDValue &Align);
 148   bool SelectAddrMode6Offset(SDNode *Op, SDValue N, SDValue &Offset);
 149
 150   bool SelectAddrModePC(SDValue N, SDValue &Offset, SDValue &Label);
 151
 152   // Thumb Addressing Modes:
 153   bool SelectThumbAddrModeRR(SDValue N, SDValue &Base, SDValue &Offset);
 154   bool SelectThumbAddrModeRI(SDValue N, SDValue &Base, SDValue &Offset,
 155                              unsigned Scale);
 156   bool SelectThumbAddrModeRI5S1(SDValue N, SDValue &Base, SDValue &Offset);
 157   bool SelectThumbAddrModeRI5S2(SDValue N, SDValue &Base, SDValue &Offset);
 158   bool SelectThumbAddrModeRI5S4(SDValue N, SDValue &Base, SDValue &Offset);
 159   bool SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, SDValue &Base,
 160                                 SDValue &OffImm);
 161   bool SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
 162                                  SDValue &OffImm);
 163   bool SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
 164                                  SDValue &OffImm);
 165   bool SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
 166                                  SDValue &OffImm);
 167   bool SelectThumbAddrModeSP(SDValue N, SDValue &Base, SDValue &OffImm);
 168
 169   // Thumb 2 Addressing Modes:
 170   bool SelectT2ShifterOperandReg(SDValue N,
 171                                  SDValue &BaseReg, SDValue &Opc);
 172   bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
 173   bool SelectT2AddrModeImm8(SDValue N, SDValue &Base,
 174                             SDValue &OffImm);
 175   bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
 176                                  SDValue &OffImm);
 177   bool SelectT2AddrModeSoReg(SDValue N, SDValue &Base,
 178                              SDValue &OffReg, SDValue &ShImm);
 179
 180   inline bool is_so_imm(unsigned Imm) const {
 181     return ARM_AM::getSOImmVal(Imm) != -1;
 182   }
 183
 184   inline bool is_so_imm_not(unsigned Imm) const {
 185     return ARM_AM::getSOImmVal(~Imm) != -1;
 186   }
 187
 188   inline bool is_t2_so_imm(unsigned Imm) const {
 189     return ARM_AM::getT2SOImmVal(Imm) != -1;
 190   }
 191
 192   inline bool is_t2_so_imm_not(unsigned Imm) const {
 193     return ARM_AM::getT2SOImmVal(~Imm) != -1;
 194   }
 195
 196   // Include the pieces autogenerated from the target description.
 197 #include "ARMGenDAGISel.inc"
 198
 199 private:
 200   /// SelectARMIndexedLoad - Indexed (pre/post inc/dec) load matching code for
 201   /// ARM.
 202   SDNode *SelectARMIndexedLoad(SDNode *N);
 203   SDNode *SelectT2IndexedLoad(SDNode *N);
 204
 205   /// SelectVLD - Select NEON load intrinsics.  NumVecs should be
 206   /// 1, 2, 3 or 4.  The opcode arrays specify the instructions used for
 207   /// loads of D registers and even subregs and odd subregs of Q registers.
 208   /// For NumVecs <= 2, QOpcodes1 is not used.
 209   SDNode *SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
 210                     const uint16_t *DOpcodes,
 211                     const uint16_t *QOpcodes0, const uint16_t *QOpcodes1);
 212
 213   /// SelectVST - Select NEON store intrinsics.  NumVecs should
 214   /// be 1, 2, 3 or 4.  The opcode arrays specify the instructions used for
 215   /// stores of D registers and even subregs and odd subregs of Q registers.
 216   /// For NumVecs <= 2, QOpcodes1 is not used.
 217   SDNode *SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
 218                     const uint16_t *DOpcodes,
 219                     const uint16_t *QOpcodes0, const uint16_t *QOpcodes1);
 220
 221   /// SelectVLDSTLane - Select NEON load/store lane intrinsics.  NumVecs should
 222   /// be 2, 3 or 4.  The opcode arrays specify the instructions used for
 223   /// load/store of D registers and Q registers.
 224   SDNode *SelectVLDSTLane(SDNode *N, bool IsLoad,
 225                           bool isUpdating, unsigned NumVecs,
 226                           const uint16_t *DOpcodes, const uint16_t *QOpcodes);
 227
 228   /// SelectVLDDup - Select NEON load-duplicate intrinsics.  NumVecs
 229   /// should be 2, 3 or 4.  The opcode array specifies the instructions used
 230   /// for loading D registers.  (Q registers are not supported.)
 231   SDNode *SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs,
 232                        const uint16_t *Opcodes);
 233
 234   /// SelectVTBL - Select NEON VTBL and VTBX intrinsics.  NumVecs should be 2,
 235   /// 3 or 4.  These are custom-selected so that a REG_SEQUENCE can be
 236   /// generated to force the table registers to be consecutive.
 237   SDNode *SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs, unsigned Opc);
 238
 239   /// SelectV6T2BitfieldExtractOp - Select SBFX/UBFX instructions for ARM.
 240   SDNode *SelectV6T2BitfieldExtractOp(SDNode *N, bool isSigned);
 241
 242   /// SelectCMOVOp - Select CMOV instructions for ARM.
 243   SDNode *SelectCMOVOp(SDNode *N);
 244   SDNode *SelectT2CMOVShiftOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
 245                               ARMCC::CondCodes CCVal, SDValue CCR,
 246                               SDValue InFlag);
 247   SDNode *SelectARMCMOVShiftOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
 248                                ARMCC::CondCodes CCVal, SDValue CCR,
 249                                SDValue InFlag);
 250   SDNode *SelectT2CMOVImmOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
 251                               ARMCC::CondCodes CCVal, SDValue CCR,
 252                               SDValue InFlag);
 253   SDNode *SelectARMCMOVImmOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
 254                                ARMCC::CondCodes CCVal, SDValue CCR,
 255                                SDValue InFlag);
 256
 257   // Select special operations if node forms integer ABS pattern
 258   SDNode *SelectABSOp(SDNode *N);
 259
 260   SDNode *SelectConcatVector(SDNode *N);
 261
 262   SDNode *SelectAtomic64(SDNode *Node, unsigned Opc);
 263
 264   /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
 265   /// inline asm expressions.
 266   virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
 267                                             char ConstraintCode,
 268                                             std::vector<SDValue> &OutOps);
 269
 270   // Form pairs of consecutive R, S, D, or Q registers.
 271   SDNode *createGPRPairNode(EVT VT, SDValue V0, SDValue V1);
 272   SDNode *createSRegPairNode(EVT VT, SDValue V0, SDValue V1);
 273   SDNode *createDRegPairNode(EVT VT, SDValue V0, SDValue V1);
 274   SDNode *createQRegPairNode(EVT VT, SDValue V0, SDValue V1);
 275
 276   // Form sequences of 4 consecutive S, D, or Q registers.
 277   SDNode *createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
 278   SDNode *createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
 279   SDNode *createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
 280
 281   // Get the alignment operand for a NEON VLD or VST instruction.
 282   SDValue GetVLDSTAlign(SDValue Align, unsigned NumVecs, bool is64BitVector);
 283 };
 284 }
 285
 286 /// isInt32Immediate - This method tests to see if the node is a 32-bit constant
 287 /// operand. If so Imm will receive the 32-bit value.
 288 static bool isInt32Immediate(SDNode *N, unsigned &Imm) {
 289   if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) {
 290     Imm = cast<ConstantSDNode>(N)->getZExtValue();
 291     return true;
 292   }
 293   return false;
 294 }
 295
 296 // isInt32Immediate - This method tests to see if a constant operand.
 297 // If so Imm will receive the 32 bit value.
 298 static bool isInt32Immediate(SDValue N, unsigned &Imm) {
 299   return isInt32Immediate(N.getNode(), Imm);
 300 }
 301
 302 // isOpcWithIntImmediate - This method tests to see if the node is a specific
 303 // opcode and that it has a immediate integer right operand.
 304 // If so Imm will receive the 32 bit value.
 305 static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
 306   return N->getOpcode() == Opc &&
 307          isInt32Immediate(N->getOperand(1).getNode(), Imm);
 308 }
 309
 310 /// \brief Check whether a particular node is a constant value representable as
 311 /// (N * Scale) where (N in [\p RangeMin, \p RangeMax).
 312 ///
 313 /// \param ScaledConstant [out] - On success, the pre-scaled constant value.
 314 static bool isScaledConstantInRange(SDValue Node, int Scale,
 315                                     int RangeMin, int RangeMax,
 316                                     int &ScaledConstant) {
 317   assert(Scale > 0 && "Invalid scale!");
 318
 319   // Check that this is a constant.
 320   const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Node);
 321   if (!C)
 322     return false;
 323
 324   ScaledConstant = (int) C->getZExtValue();
 325   if ((ScaledConstant % Scale) != 0)
 326     return false;
 327
 328   ScaledConstant /= Scale;
 329   return ScaledConstant >= RangeMin && ScaledConstant < RangeMax;
 330 }
 331
 332 void ARMDAGToDAGISel::PreprocessISelDAG() {
 333   if (!Subtarget->hasV6T2Ops())
 334     return;
 335
 336   bool isThumb2 = Subtarget->isThumb();
 337   for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
 338        E = CurDAG->allnodes_end(); I != E; ) {
 339     SDNode *N = I++;  // Preincrement iterator to avoid invalidation issues.
 340
 341     if (N->getOpcode() != ISD::ADD)
 342       continue;
 343
 344     // Look for (add X1, (and (srl X2, c1), c2)) where c2 is constant with
 345     // leading zeros, followed by consecutive set bits, followed by 1 or 2
 346     // trailing zeros, e.g. 1020.
 347     // Transform the expression to
 348     // (add X1, (shl (and (srl X2, c1), (c2>>tz)), tz)) where tz is the number
 349     // of trailing zeros of c2. The left shift would be folded as an shifter
 350     // operand of 'add' and the 'and' and 'srl' would become a bits extraction
 351     // node (UBFX).
 352
 353     SDValue N0 = N->getOperand(0);
 354     SDValue N1 = N->getOperand(1);
 355     unsigned And_imm = 0;
 356     if (!isOpcWithIntImmediate(N1.getNode(), ISD::AND, And_imm)) {
 357       if (isOpcWithIntImmediate(N0.getNode(), ISD::AND, And_imm))
 358         std::swap(N0, N1);
 359     }
 360     if (!And_imm)
 361       continue;
 362
 363     // Check if the AND mask is an immediate of the form: 000.....1111111100
 364     unsigned TZ = CountTrailingZeros_32(And_imm);
 365     if (TZ != 1 && TZ != 2)
 366       // Be conservative here. Shifter operands aren't always free. e.g. On
 367       // Swift, left shifter operand of 1 / 2 for free but others are not.
 368       // e.g.
 369       //  ubfx   r3, r1, #16, #8
 370       //  ldr.w  r3, [r0, r3, lsl #2]
 371       // vs.
 372       //  mov.w  r9, #1020
 373       //  and.w  r2, r9, r1, lsr #14
 374       //  ldr    r2, [r0, r2]
 375       continue;
 376     And_imm >>= TZ;
 377     if (And_imm & (And_imm + 1))
 378       continue;
 379
 380     // Look for (and (srl X, c1), c2).
 381     SDValue Srl = N1.getOperand(0);
 382     unsigned Srl_imm = 0;
 383     if (!isOpcWithIntImmediate(Srl.getNode(), ISD::SRL, Srl_imm) ||
 384         (Srl_imm <= 2))
 385       continue;
 386
 387     // Make sure first operand is not a shifter operand which would prevent
 388     // folding of the left shift.
 389     SDValue CPTmp0;
 390     SDValue CPTmp1;
 391     SDValue CPTmp2;
 392     if (isThumb2) {
 393       if (SelectT2ShifterOperandReg(N0, CPTmp0, CPTmp1))
 394         continue;
 395     } else {
 396       if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1) ||
 397           SelectRegShifterOperand(N0, CPTmp0, CPTmp1, CPTmp2))
 398         continue;
 399     }
 400
 401     // Now make the transformation.
 402     Srl = CurDAG->getNode(ISD::SRL, Srl.getDebugLoc(), MVT::i32,
 403                           Srl.getOperand(0),
 404                           CurDAG->getConstant(Srl_imm+TZ, MVT::i32));
 405     N1 = CurDAG->getNode(ISD::AND, N1.getDebugLoc(), MVT::i32,
 406                          Srl, CurDAG->getConstant(And_imm, MVT::i32));
 407     N1 = CurDAG->getNode(ISD::SHL, N1.getDebugLoc(), MVT::i32,
 408                          N1, CurDAG->getConstant(TZ, MVT::i32));
 409     CurDAG->UpdateNodeOperands(N, N0, N1);
 410   }
 411 }
 412
 413 /// hasNoVMLxHazardUse - Return true if it's desirable to select a FP MLA / MLS
 414 /// node. VFP / NEON fp VMLA / VMLS instructions have special RAW hazards (at
 415 /// least on current ARM implementations) which should be avoidded.
 416 bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const {
 417   if (OptLevel == CodeGenOpt::None)
 418     return true;
 419
 420   if (!CheckVMLxHazard)
 421     return true;
 422
 423   if (!Subtarget->isCortexA8() && !Subtarget->isLikeA9() &&
 424       !Subtarget->isSwift())
 425     return true;
 426
 427   if (!N->hasOneUse())
 428     return false;
 429
 430   SDNode *Use = *N->use_begin();
 431   if (Use->getOpcode() == ISD::CopyToReg)
 432     return true;
 433   if (Use->isMachineOpcode()) {
 434     const MCInstrDesc &MCID = TII->get(Use->getMachineOpcode());
 435     if (MCID.mayStore())
 436       return true;
 437     unsigned Opcode = MCID.getOpcode();
 438     if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
 439       return true;
 440     // vmlx feeding into another vmlx. We actually want to unfold
 441     // the use later in the MLxExpansion pass. e.g.
 442     // vmla
 443     // vmla (stall 8 cycles)
 444     //
 445     // vmul (5 cycles)
 446     // vadd (5 cycles)
 447     // vmla
 448     // This adds up to about 18 - 19 cycles.
 449     //
 450     // vmla
 451     // vmul (stall 4 cycles)
 452     // vadd adds up to about 14 cycles.
 453     return TII->isFpMLxInstruction(Opcode);
 454   }
 455
 456   return false;
 457 }
 458
 459 bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift,
 460                                             ARM_AM::ShiftOpc ShOpcVal,
 461                                             unsigned ShAmt) {
 462   if (!Subtarget->isLikeA9() && !Subtarget->isSwift())
 463     return true;
 464   if (Shift.hasOneUse())
 465     return true;
 466   // R << 2 is free.
 467   return ShOpcVal == ARM_AM::lsl &&
 468          (ShAmt == 2 || (Subtarget->isSwift() && ShAmt == 1));
 469 }
 470
 471 bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N,
 472                                               SDValue &BaseReg,
 473                                               SDValue &Opc,
 474                                               bool CheckProfitability) {
 475   if (DisableShifterOp)
 476     return false;
 477
 478   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
 479
 480   // Don't match base register only case. That is matched to a separate
 481   // lower complexity pattern with explicit register operand.
 482   if (ShOpcVal == ARM_AM::no_shift) return false;
 483
 484   BaseReg = N.getOperand(0);
 485   unsigned ShImmVal = 0;
 486   ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
 487   if (!RHS) return false;
 488   ShImmVal = RHS->getZExtValue() & 31;
 489   Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
 490                                   MVT::i32);
 491   return true;
 492 }
 493
 494 bool ARMDAGToDAGISel::SelectRegShifterOperand(SDValue N,
 495                                               SDValue &BaseReg,
 496                                               SDValue &ShReg,
 497                                               SDValue &Opc,
 498                                               bool CheckProfitability) {
 499   if (DisableShifterOp)
 500     return false;
 501
 502   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
 503
 504   // Don't match base register only case. That is matched to a separate
 505   // lower complexity pattern with explicit register operand.
 506   if (ShOpcVal == ARM_AM::no_shift) return false;
 507
 508   BaseReg = N.getOperand(0);
 509   unsigned ShImmVal = 0;
 510   ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
 511   if (RHS) return false;
 512
 513   ShReg = N.getOperand(1);
 514   if (CheckProfitability && !isShifterOpProfitable(N, ShOpcVal, ShImmVal))
 515     return false;
 516   Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
 517                                   MVT::i32);
 518   return true;
 519 }
 520
 521
 522 bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,
 523                                           SDValue &Base,
 524                                           SDValue &OffImm) {
 525   // Match simple R + imm12 operands.
 526
 527   // Base only.
 528   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
 529       !CurDAG->isBaseWithConstantOffset(N)) {
 530     if (N.getOpcode() == ISD::FrameIndex) {
 531       // Match frame index.
 532       int FI = cast<FrameIndexSDNode>(N)->getIndex();
 533       Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
 534       OffImm  = CurDAG->getTargetConstant(0, MVT::i32);
 535       return true;
 536     }
 537
 538     if (N.getOpcode() == ARMISD::Wrapper &&
 539         !(Subtarget->useMovt() &&
 540                      N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) {
 541       Base = N.getOperand(0);
 542     } else
 543       Base = N;
 544     OffImm  = CurDAG->getTargetConstant(0, MVT::i32);
 545     return true;
 546   }
 547
 548   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
 549     int RHSC = (int)RHS->getZExtValue();
 550     if (N.getOpcode() == ISD::SUB)
 551       RHSC = -RHSC;
 552
 553     if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits (unsigned)
 554       Base   = N.getOperand(0);
 555       if (Base.getOpcode() == ISD::FrameIndex) {
 556         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
 557         Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
 558       }
 559       OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32);
 560       return true;
 561     }
 562   }
 563
 564   // Base only.
 565   Base = N;
 566   OffImm  = CurDAG->getTargetConstant(0, MVT::i32);
 567   return true;
 568 }
 569
 570
 571
 572 bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset,
 573                                       SDValue &Opc) {
 574   if (N.getOpcode() == ISD::MUL &&
 575       ((!Subtarget->isLikeA9() && !Subtarget->isSwift()) || N.hasOneUse())) {
 576     if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
 577       // X * [3,5,9] -> X + X * [2,4,8] etc.
 578       int RHSC = (int)RHS->getZExtValue();
 579       if (RHSC & 1) {
 580         RHSC = RHSC & ~1;
 581         ARM_AM::AddrOpc AddSub = ARM_AM::add;
 582         if (RHSC < 0) {
 583           AddSub = ARM_AM::sub;
 584           RHSC = - RHSC;
 585         }
 586         if (isPowerOf2_32(RHSC)) {
 587           unsigned ShAmt = Log2_32(RHSC);
 588           Base = Offset = N.getOperand(0);
 589           Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt,
 590                                                             ARM_AM::lsl),
 591                                           MVT::i32);
 592           return true;
 593         }
 594       }
 595     }
 596   }
 597
 598   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
 599       // ISD::OR that is equivalent to an ISD::ADD.
 600       !CurDAG->isBaseWithConstantOffset(N))
 601     return false;
 602
 603   // Leave simple R +/- imm12 operands for LDRi12
 604   if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::OR) {
 605     int RHSC;
 606     if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
 607                                 -0x1000+1, 0x1000, RHSC)) // 12 bits.
 608       return false;
 609   }
 610
 611   // Otherwise this is R +/- [possibly shifted] R.
 612   ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::SUB ? ARM_AM::sub:ARM_AM::add;
 613   ARM_AM::ShiftOpc ShOpcVal =
 614     ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode());
 615   unsigned ShAmt = 0;
 616
 617   Base   = N.getOperand(0);
 618   Offset = N.getOperand(1);
 619
 620   if (ShOpcVal != ARM_AM::no_shift) {
 621     // Check to see if the RHS of the shift is a constant, if not, we can't fold
 622     // it.
 623     if (ConstantSDNode *Sh =
 624            dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) {
 625       ShAmt = Sh->getZExtValue();
 626       if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt))
 627         Offset = N.getOperand(1).getOperand(0);
 628       else {
 629         ShAmt = 0;
 630         ShOpcVal = ARM_AM::no_shift;
 631       }
 632     } else {
 633       ShOpcVal = ARM_AM::no_shift;
 634     }
 635   }
 636
 637   // Try matching (R shl C) + (R).
 638   if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift &&
 639       !(Subtarget->isLikeA9() || Subtarget->isSwift() ||
 640         N.getOperand(0).hasOneUse())) {
 641     ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode());
 642     if (ShOpcVal != ARM_AM::no_shift) {
 643       // Check to see if the RHS of the shift is a constant, if not, we can't
 644       // fold it.
 645       if (ConstantSDNode *Sh =
 646           dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) {
 647         ShAmt = Sh->getZExtValue();
 648         if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) {
 649           Offset = N.getOperand(0).getOperand(0);
 650           Base = N.getOperand(1);
 651         } else {
 652           ShAmt = 0;
 653           ShOpcVal = ARM_AM::no_shift;
 654         }
 655       } else {
 656         ShOpcVal = ARM_AM::no_shift;
 657       }
 658     }
 659   }
 660
 661   Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
 662                                   MVT::i32);
 663   return true;
 664 }
 665
 666
 667 //-----
 668
 669 AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N,
 670                                                      SDValue &Base,
 671                                                      SDValue &Offset,
 672                                                      SDValue &Opc) {
 673   if (N.getOpcode() == ISD::MUL &&
 674       (!(Subtarget->isLikeA9() || Subtarget->isSwift()) || N.hasOneUse())) {
 675     if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
 676       // X * [3,5,9] -> X + X * [2,4,8] etc.
 677       int RHSC = (int)RHS->getZExtValue();
 678       if (RHSC & 1) {
 679         RHSC = RHSC & ~1;
 680         ARM_AM::AddrOpc AddSub = ARM_AM::add;
 681         if (RHSC < 0) {
 682           AddSub = ARM_AM::sub;
 683           RHSC = - RHSC;
 684         }
 685         if (isPowerOf2_32(RHSC)) {
 686           unsigned ShAmt = Log2_32(RHSC);
 687           Base = Offset = N.getOperand(0);
 688           Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt,
 689                                                             ARM_AM::lsl),
 690                                           MVT::i32);
 691           return AM2_SHOP;
 692         }
 693       }
 694     }
 695   }
 696
 697   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
 698       // ISD::OR that is equivalent to an ADD.
 699       !CurDAG->isBaseWithConstantOffset(N)) {
 700     Base = N;
 701     if (N.getOpcode() == ISD::FrameIndex) {
 702       int FI = cast<FrameIndexSDNode>(N)->getIndex();
 703       Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
 704     } else if (N.getOpcode() == ARMISD::Wrapper &&
 705                !(Subtarget->useMovt() &&
 706                  N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) {
 707       Base = N.getOperand(0);
 708     }
 709     Offset = CurDAG->getRegister(0, MVT::i32);
 710     Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(ARM_AM::add, 0,
 711                                                       ARM_AM::no_shift),
 712                                     MVT::i32);
 713     return AM2_BASE;
 714   }
 715
 716   // Match simple R +/- imm12 operands.
 717   if (N.getOpcode() != ISD::SUB) {
 718     int RHSC;
 719     if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
 720                                 -0x1000+1, 0x1000, RHSC)) { // 12 bits.
 721       Base = N.getOperand(0);
 722       if (Base.getOpcode() == ISD::FrameIndex) {
 723         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
 724         Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
 725       }
 726       Offset = CurDAG->getRegister(0, MVT::i32);
 727
 728       ARM_AM::AddrOpc AddSub = ARM_AM::add;
 729       if (RHSC < 0) {
 730         AddSub = ARM_AM::sub;
 731         RHSC = - RHSC;
 732       }
 733       Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, RHSC,
 734                                                         ARM_AM::no_shift),
 735                                       MVT::i32);
 736       return AM2_BASE;
 737     }
 738   }
 739
 740   if ((Subtarget->isLikeA9() || Subtarget->isSwift()) && !N.hasOneUse()) {
 741     // Compute R +/- (R << N) and reuse it.
 742     Base = N;
 743     Offset = CurDAG->getRegister(0, MVT::i32);
 744     Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(ARM_AM::add, 0,
 745                                                       ARM_AM::no_shift),
 746                                     MVT::i32);
 747     return AM2_BASE;
 748   }
 749
 750   // Otherwise this is R +/- [possibly shifted] R.
 751   ARM_AM::AddrOpc AddSub = N.getOpcode() != ISD::SUB ? ARM_AM::add:ARM_AM::sub;
 752   ARM_AM::ShiftOpc ShOpcVal =
 753     ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode());
 754   unsigned ShAmt = 0;
 755
 756   Base   = N.getOperand(0);
 757   Offset = N.getOperand(1);
 758
 759   if (ShOpcVal != ARM_AM::no_shift) {
 760     // Check to see if the RHS of the shift is a constant, if not, we can't fold
 761     // it.
 762     if (ConstantSDNode *Sh =
 763            dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) {
 764       ShAmt = Sh->getZExtValue();
 765       if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt))
 766         Offset = N.getOperand(1).getOperand(0);
 767       else {
 768         ShAmt = 0;
 769         ShOpcVal = ARM_AM::no_shift;
 770       }
 771     } else {
 772       ShOpcVal = ARM_AM::no_shift;
 773     }
 774   }
 775
 776   // Try matching (R shl C) + (R).
 777   if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift &&
 778       !(Subtarget->isLikeA9() || Subtarget->isSwift() ||
 779         N.getOperand(0).hasOneUse())) {
 780     ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode());
 781     if (ShOpcVal != ARM_AM::no_shift) {
 782       // Check to see if the RHS of the shift is a constant, if not, we can't
 783       // fold it.
 784       if (ConstantSDNode *Sh =
 785           dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) {
 786         ShAmt = Sh->getZExtValue();
 787         if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) {
 788           Offset = N.getOperand(0).getOperand(0);
 789           Base = N.getOperand(1);
 790         } else {
 791           ShAmt = 0;
 792           ShOpcVal = ARM_AM::no_shift;
 793         }
 794       } else {
 795         ShOpcVal = ARM_AM::no_shift;
 796       }
 797     }
 798   }
 799
 800   Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
 801                                   MVT::i32);
 802   return AM2_SHOP;
 803 }
 804
 805 bool ARMDAGToDAGISel::SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
 806                                             SDValue &Offset, SDValue &Opc) {
 807   unsigned Opcode = Op->getOpcode();
 808   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
 809     ? cast<LoadSDNode>(Op)->getAddressingMode()
 810     : cast<StoreSDNode>(Op)->getAddressingMode();
 811   ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
 812     ? ARM_AM::add : ARM_AM::sub;
 813   int Val;
 814   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val))
 815     return false;
 816
 817   Offset = N;
 818   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
 819   unsigned ShAmt = 0;
 820   if (ShOpcVal != ARM_AM::no_shift) {
 821     // Check to see if the RHS of the shift is a constant, if not, we can't fold
 822     // it.
 823     if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
 824       ShAmt = Sh->getZExtValue();
 825       if (isShifterOpProfitable(N, ShOpcVal, ShAmt))
 826         Offset = N.getOperand(0);
 827       else {
 828         ShAmt = 0;
 829         ShOpcVal = ARM_AM::no_shift;
 830       }
 831     } else {
 832       ShOpcVal = ARM_AM::no_shift;
 833     }
 834   }
 835
 836   Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
 837                                   MVT::i32);
 838   return true;
 839 }
 840
 841 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
 842                                             SDValue &Offset, SDValue &Opc) {
 843   unsigned Opcode = Op->getOpcode();
 844   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
 845     ? cast<LoadSDNode>(Op)->getAddressingMode()
 846     : cast<StoreSDNode>(Op)->getAddressingMode();
 847   ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
 848     ? ARM_AM::add : ARM_AM::sub;
 849   int Val;
 850   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
 851     if (AddSub == ARM_AM::sub) Val *= -1;
 852     Offset = CurDAG->getRegister(0, MVT::i32);
 853     Opc = CurDAG->getTargetConstant(Val, MVT::i32);
 854     return true;
 855   }
 856
 857   return false;
 858 }
 859
 860
 861 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
 862                                             SDValue &Offset, SDValue &Opc) {
 863   unsigned Opcode = Op->getOpcode();
 864   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
 865     ? cast<LoadSDNode>(Op)->getAddressingMode()
 866     : cast<StoreSDNode>(Op)->getAddressingMode();
 867   ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
 868     ? ARM_AM::add : ARM_AM::sub;
 869   int Val;
 870   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
 871     Offset = CurDAG->getRegister(0, MVT::i32);
 872     Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, Val,
 873                                                       ARM_AM::no_shift),
 874                                     MVT::i32);
 875     return true;
 876   }
 877
 878   return false;
 879 }
 880
 881 bool ARMDAGToDAGISel::SelectAddrOffsetNone(SDValue N, SDValue &Base) {
 882   Base = N;
 883   return true;
 884 }
 885
 886 bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N,
 887                                       SDValue &Base, SDValue &Offset,
 888                                       SDValue &Opc) {
 889   if (N.getOpcode() == ISD::SUB) {
 890     // X - C  is canonicalize to X + -C, no need to handle it here.
 891     Base = N.getOperand(0);
 892     Offset = N.getOperand(1);
 893     Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::sub, 0),MVT::i32);
 894     return true;
 895   }
 896
 897   if (!CurDAG->isBaseWithConstantOffset(N)) {
 898     Base = N;
 899     if (N.getOpcode() == ISD::FrameIndex) {
 900       int FI = cast<FrameIndexSDNode>(N)->getIndex();
 901       Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
 902     }
 903     Offset = CurDAG->getRegister(0, MVT::i32);
 904     Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0),MVT::i32);
 905     return true;
 906   }
 907
 908   // If the RHS is +/- imm8, fold into addr mode.
 909   int RHSC;
 910   if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
 911                               -256 + 1, 256, RHSC)) { // 8 bits.
 912     Base = N.getOperand(0);
 913     if (Base.getOpcode() == ISD::FrameIndex) {
 914       int FI = cast<FrameIndexSDNode>(Base)->getIndex();
 915       Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
 916     }
 917     Offset = CurDAG->getRegister(0, MVT::i32);
 918
 919     ARM_AM::AddrOpc AddSub = ARM_AM::add;
 920     if (RHSC < 0) {
 921       AddSub = ARM_AM::sub;
 922       RHSC = -RHSC;
 923     }
 924     Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, RHSC),MVT::i32);
 925     return true;
 926   }
 927
 928   Base = N.getOperand(0);
 929   Offset = N.getOperand(1);
 930   Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), MVT::i32);
 931   return true;
 932 }
 933
 934 bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode *Op, SDValue N,
 935                                             SDValue &Offset, SDValue &Opc) {
 936   unsigned Opcode = Op->getOpcode();
 937   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
 938     ? cast<LoadSDNode>(Op)->getAddressingMode()
 939     : cast<StoreSDNode>(Op)->getAddressingMode();
 940   ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
 941     ? ARM_AM::add : ARM_AM::sub;
 942   int Val;
 943   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 256, Val)) { // 12 bits.
 944     Offset = CurDAG->getRegister(0, MVT::i32);
 945     Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, Val), MVT::i32);
 946     return true;
 947   }
 948
 949   Offset = N;
 950   Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, 0), MVT::i32);
 951   return true;
 952 }
 953
 954 bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N,
 955                                       SDValue &Base, SDValue &Offset) {
 956   if (!CurDAG->isBaseWithConstantOffset(N)) {
 957     Base = N;
 958     if (N.getOpcode() == ISD::FrameIndex) {
 959       int FI = cast<FrameIndexSDNode>(N)->getIndex();
 960       Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
 961     } else if (N.getOpcode() == ARMISD::Wrapper &&
 962                !(Subtarget->useMovt() &&
 963                  N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) {
 964       Base = N.getOperand(0);
 965     }
 966     Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
 967                                        MVT::i32);
 968     return true;
 969   }
 970
 971   // If the RHS is +/- imm8, fold into addr mode.
 972   int RHSC;
 973   if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4,
 974                               -256 + 1, 256, RHSC)) {
 975     Base = N.getOperand(0);
 976     if (Base.getOpcode() == ISD::FrameIndex) {
 977       int FI = cast<FrameIndexSDNode>(Base)->getIndex();
 978       Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
 979     }
 980
 981     ARM_AM::AddrOpc AddSub = ARM_AM::add;
 982     if (RHSC < 0) {
 983       AddSub = ARM_AM::sub;
 984       RHSC = -RHSC;
 985     }
 986     Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(AddSub, RHSC),
 987                                        MVT::i32);
 988     return true;
 989   }
 990
 991   Base = N;
 992   Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
 993                                      MVT::i32);
 994   return true;
 995 }
 996
 997 bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,
 998                                       SDValue &Align) {
 999   Addr = N;
1000
1001   unsigned Alignment = 0;
1002   if (LSBaseSDNode *LSN = dyn_cast<LSBaseSDNode>(Parent)) {
1003     // This case occurs only for VLD1-lane/dup and VST1-lane instructions.
1004     // The maximum alignment is equal to the memory size being referenced.
1005     unsigned LSNAlign = LSN->getAlignment();
1006     unsigned MemSize = LSN->getMemoryVT().getSizeInBits() / 8;
1007     if (LSNAlign >= MemSize && MemSize > 1)
1008       Alignment = MemSize;
1009   } else {
1010     // All other uses of addrmode6 are for intrinsics.  For now just record
1011     // the raw alignment value; it will be refined later based on the legal
1012     // alignment operands for the intrinsic.
1013     Alignment = cast<MemIntrinsicSDNode>(Parent)->getAlignment();
1014   }
1015
1016   Align = CurDAG->getTargetConstant(Alignment, MVT::i32);
1017   return true;
1018 }
1019
1020 bool ARMDAGToDAGISel::SelectAddrMode6Offset(SDNode *Op, SDValue N,
1021                                             SDValue &Offset) {
1022   LSBaseSDNode *LdSt = cast<LSBaseSDNode>(Op);
1023   ISD::MemIndexedMode AM = LdSt->getAddressingMode();
1024   if (AM != ISD::POST_INC)
1025     return false;
1026   Offset = N;
1027   if (ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N)) {
1028     if (NC->getZExtValue() * 8 == LdSt->getMemoryVT().getSizeInBits())
1029       Offset = CurDAG->getRegister(0, MVT::i32);
1030   }
1031   return true;
1032 }
1033
1034 bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N,
1035                                        SDValue &Offset, SDValue &Label) {
1036   if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) {
1037     Offset = N.getOperand(0);
1038     SDValue N1 = N.getOperand(1);
1039     Label = CurDAG->getTargetConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
1040                                       MVT::i32);
1041     return true;
1042   }
1043
1044   return false;
1045 }
1046
1047
1048 //===----------------------------------------------------------------------===//
1049 //                         Thumb Addressing Modes
1050 //===----------------------------------------------------------------------===//
1051
1052 bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue N,
1053                                             SDValue &Base, SDValue &Offset){
1054   if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) {
1055     ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N);
1056     if (!NC || !NC->isNullValue())
1057       return false;
1058
1059     Base = Offset = N;
1060     return true;
1061   }
1062
1063   Base = N.getOperand(0);
1064   Offset = N.getOperand(1);
1065   return true;
1066 }
1067
1068 bool
1069 ARMDAGToDAGISel::SelectThumbAddrModeRI(SDValue N, SDValue &Base,
1070                                        SDValue &Offset, unsigned Scale) {
1071   if (Scale == 4) {
1072     SDValue TmpBase, TmpOffImm;
1073     if (SelectThumbAddrModeSP(N, TmpBase, TmpOffImm))
1074       return false;  // We want to select tLDRspi / tSTRspi instead.
1075
1076     if (N.getOpcode() == ARMISD::Wrapper &&
1077         N.getOperand(0).getOpcode() == ISD::TargetConstantPool)
1078       return false;  // We want to select tLDRpci instead.
1079   }
1080
1081   if (!CurDAG->isBaseWithConstantOffset(N))
1082     return false;
1083
1084   // Thumb does not have [sp, r] address mode.
1085   RegisterSDNode *LHSR = dyn_cast<RegisterSDNode>(N.getOperand(0));
1086   RegisterSDNode *RHSR = dyn_cast<RegisterSDNode>(N.getOperand(1));
1087   if ((LHSR && LHSR->getReg() == ARM::SP) ||
1088       (RHSR && RHSR->getReg() == ARM::SP))
1089     return false;
1090
1091   // FIXME: Why do we explicitly check for a match here and then return false?
1092   // Presumably to allow something else to match, but shouldn't this be
1093   // documented?
1094   int RHSC;
1095   if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC))
1096     return false;
1097
1098   Base = N.getOperand(0);
1099   Offset = N.getOperand(1);
1100   return true;
1101 }
1102
1103 bool
1104 ARMDAGToDAGISel::SelectThumbAddrModeRI5S1(SDValue N,
1105                                           SDValue &Base,
1106                                           SDValue &Offset) {
1107   return SelectThumbAddrModeRI(N, Base, Offset, 1);
1108 }
1109
1110 bool
1111 ARMDAGToDAGISel::SelectThumbAddrModeRI5S2(SDValue N,
1112                                           SDValue &Base,
1113                                           SDValue &Offset) {
1114   return SelectThumbAddrModeRI(N, Base, Offset, 2);
1115 }
1116
1117 bool
1118 ARMDAGToDAGISel::SelectThumbAddrModeRI5S4(SDValue N,
1119                                           SDValue &Base,
1120                                           SDValue &Offset) {
1121   return SelectThumbAddrModeRI(N, Base, Offset, 4);
1122 }
1123
1124 bool
1125 ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale,
1126                                           SDValue &Base, SDValue &OffImm) {
1127   if (Scale == 4) {
1128     SDValue TmpBase, TmpOffImm;
1129     if (SelectThumbAddrModeSP(N, TmpBase, TmpOffImm))
1130       return false;  // We want to select tLDRspi / tSTRspi instead.
1131
1132     if (N.getOpcode() == ARMISD::Wrapper &&
1133         N.getOperand(0).getOpcode() == ISD::TargetConstantPool)
1134       return false;  // We want to select tLDRpci instead.
1135   }
1136
1137   if (!CurDAG->isBaseWithConstantOffset(N)) {
1138     if (N.getOpcode() == ARMISD::Wrapper &&
1139         !(Subtarget->useMovt() &&
1140           N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) {
1141       Base = N.getOperand(0);
1142     } else {
1143       Base = N;
1144     }
1145
1146     OffImm = CurDAG->getTargetConstant(0, MVT::i32);
1147     return true;
1148   }
1149
1150   RegisterSDNode *LHSR = dyn_cast<RegisterSDNode>(N.getOperand(0));
1151   RegisterSDNode *RHSR = dyn_cast<RegisterSDNode>(N.getOperand(1));
1152   if ((LHSR && LHSR->getReg() == ARM::SP) ||
1153       (RHSR && RHSR->getReg() == ARM::SP)) {
1154     ConstantSDNode *LHS = dyn_cast<ConstantSDNode>(N.getOperand(0));
1155     ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
1156     unsigned LHSC = LHS ? LHS->getZExtValue() : 0;
1157     unsigned RHSC = RHS ? RHS->getZExtValue() : 0;
1158
1159     // Thumb does not have [sp, #imm5] address mode for non-zero imm5.
1160     if (LHSC != 0 || RHSC != 0) return false;
1161
1162     Base = N;
1163     OffImm = CurDAG->getTargetConstant(0, MVT::i32);
1164     return true;
1165   }
1166
1167   // If the RHS is + imm5 * scale, fold into addr mode.
1168   int RHSC;
1169   if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC)) {
1170     Base = N.getOperand(0);
1171     OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32);
1172     return true;
1173   }
1174
1175   Base = N.getOperand(0);
1176   OffImm = CurDAG->getTargetConstant(0, MVT::i32);
1177   return true;
1178 }
1179
1180 bool
1181 ARMDAGToDAGISel::SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
1182                                            SDValue &OffImm) {
1183   return SelectThumbAddrModeImm5S(N, 4, Base, OffImm);
1184 }
1185
1186 bool
1187 ARMDAGToDAGISel::SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
1188                                            SDValue &OffImm) {
1189   return SelectThumbAddrModeImm5S(N, 2, Base, OffImm);
1190 }
1191
1192 bool
1193 ARMDAGToDAGISel::SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
1194                                            SDValue &OffImm) {
1195   return SelectThumbAddrModeImm5S(N, 1, Base, OffImm);
1196 }
1197
1198 bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N,
1199                                             SDValue &Base, SDValue &OffImm) {
1200   if (N.getOpcode() == ISD::FrameIndex) {
1201     int FI = cast<FrameIndexSDNode>(N)->getIndex();
1202     Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
1203     OffImm = CurDAG->getTargetConstant(0, MVT::i32);
1204     return true;
1205   }
1206
1207   if (!CurDAG->isBaseWithConstantOffset(N))
1208     return false;
1209
1210   RegisterSDNode *LHSR = dyn_cast<RegisterSDNode>(N.getOperand(0));
1211   if (N.getOperand(0).getOpcode() == ISD::FrameIndex ||
1212       (LHSR && LHSR->getReg() == ARM::SP)) {
1213     // If the RHS is + imm8 * scale, fold into addr mode.
1214     int RHSC;
1215     if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4, 0, 256, RHSC)) {
1216       Base = N.getOperand(0);
1217       if (Base.getOpcode() == ISD::FrameIndex) {
1218         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1219         Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
1220       }
1221       OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32);
1222       return true;
1223     }
1224   }
1225
1226   return false;
1227 }
1228
1229
1230 //===----------------------------------------------------------------------===//
1231 //                        Thumb 2 Addressing Modes
1232 //===----------------------------------------------------------------------===//
1233
1234
1235 bool ARMDAGToDAGISel::SelectT2ShifterOperandReg(SDValue N, SDValue &BaseReg,
1236                                                 SDValue &Opc) {
1237   if (DisableShifterOp)
1238     return false;
1239
1240   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
1241
1242   // Don't match base register only case. That is matched to a separate
1243   // lower complexity pattern with explicit register operand.
1244   if (ShOpcVal == ARM_AM::no_shift) return false;
1245
1246   BaseReg = N.getOperand(0);
1247   unsigned ShImmVal = 0;
1248   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1249     ShImmVal = RHS->getZExtValue() & 31;
1250     Opc = getI32Imm(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal));
1251     return true;
1252   }
1253
1254   return false;
1255 }
1256
1257 bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N,
1258                                             SDValue &Base, SDValue &OffImm) {
1259   // Match simple R + imm12 operands.
1260
1261   // Base only.
1262   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1263       !CurDAG->isBaseWithConstantOffset(N)) {
1264     if (N.getOpcode() == ISD::FrameIndex) {
1265       // Match frame index.
1266       int FI = cast<FrameIndexSDNode>(N)->getIndex();
1267       Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
1268       OffImm  = CurDAG->getTargetConstant(0, MVT::i32);
1269       return true;
1270     }
1271
1272     if (N.getOpcode() == ARMISD::Wrapper &&
1273                !(Subtarget->useMovt() &&
1274                  N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) {
1275       Base = N.getOperand(0);
1276       if (Base.getOpcode() == ISD::TargetConstantPool)
1277         return false;  // We want to select t2LDRpci instead.
1278     } else
1279       Base = N;
1280     OffImm  = CurDAG->getTargetConstant(0, MVT::i32);
1281     return true;
1282   }
1283
1284   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1285     if (SelectT2AddrModeImm8(N, Base, OffImm))
1286       // Let t2LDRi8 handle (R - imm8).
1287       return false;
1288
1289     int RHSC = (int)RHS->getZExtValue();
1290     if (N.getOpcode() == ISD::SUB)
1291       RHSC = -RHSC;
1292
1293     if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits (unsigned)
1294       Base   = N.getOperand(0);
1295       if (Base.getOpcode() == ISD::FrameIndex) {
1296         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1297         Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
1298       }
1299       OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32);
1300       return true;
1301     }
1302   }
1303
1304   // Base only.
1305   Base = N;
1306   OffImm  = CurDAG->getTargetConstant(0, MVT::i32);
1307   return true;
1308 }
1309
1310 bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N,
1311                                            SDValue &Base, SDValue &OffImm) {
1312   // Match simple R - imm8 operands.
1313   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1314       !CurDAG->isBaseWithConstantOffset(N))
1315     return false;
1316
1317   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1318     int RHSC = (int)RHS->getSExtValue();
1319     if (N.getOpcode() == ISD::SUB)
1320       RHSC = -RHSC;
1321
1322     if ((RHSC >= -255) && (RHSC < 0)) { // 8 bits (always negative)
1323       Base = N.getOperand(0);
1324       if (Base.getOpcode() == ISD::FrameIndex) {
1325         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1326         Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
1327       }
1328       OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32);
1329       return true;
1330     }
1331   }
1332
1333   return false;
1334 }
1335
1336 bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
1337                                                  SDValue &OffImm){
1338   unsigned Opcode = Op->getOpcode();
1339   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
1340     ? cast<LoadSDNode>(Op)->getAddressingMode()
1341     : cast<StoreSDNode>(Op)->getAddressingMode();
1342   int RHSC;
1343   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x100, RHSC)) { // 8 bits.
1344     OffImm = ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC))
1345       ? CurDAG->getTargetConstant(RHSC, MVT::i32)
1346       : CurDAG->getTargetConstant(-RHSC, MVT::i32);
1347     return true;
1348   }
1349
1350   return false;
1351 }
1352
1353 bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N,
1354                                             SDValue &Base,
1355                                             SDValue &OffReg, SDValue &ShImm) {
1356   // (R - imm8) should be handled by t2LDRi8. The rest are handled by t2LDRi12.
1357   if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N))
1358     return false;
1359
1360   // Leave (R + imm12) for t2LDRi12, (R - imm8) for t2LDRi8.
1361   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1362     int RHSC = (int)RHS->getZExtValue();
1363     if (RHSC >= 0 && RHSC < 0x1000) // 12 bits (unsigned)
1364       return false;
1365     else if (RHSC < 0 && RHSC >= -255) // 8 bits
1366       return false;
1367   }
1368
1369   // Look for (R + R) or (R + (R << [1,2,3])).
1370   unsigned ShAmt = 0;
1371   Base   = N.getOperand(0);
1372   OffReg = N.getOperand(1);
1373
1374   // Swap if it is ((R << c) + R).
1375   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(OffReg.getOpcode());
1376   if (ShOpcVal != ARM_AM::lsl) {
1377     ShOpcVal = ARM_AM::getShiftOpcForNode(Base.getOpcode());
1378     if (ShOpcVal == ARM_AM::lsl)
1379       std::swap(Base, OffReg);
1380   }
1381
1382   if (ShOpcVal == ARM_AM::lsl) {
1383     // Check to see if the RHS of the shift is a constant, if not, we can't fold
1384     // it.
1385     if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(OffReg.getOperand(1))) {
1386       ShAmt = Sh->getZExtValue();
1387       if (ShAmt < 4 && isShifterOpProfitable(OffReg, ShOpcVal, ShAmt))
1388         OffReg = OffReg.getOperand(0);
1389       else {
1390         ShAmt = 0;
1391         ShOpcVal = ARM_AM::no_shift;
1392       }
1393     } else {
1394       ShOpcVal = ARM_AM::no_shift;
1395     }
1396   }
1397
1398   ShImm = CurDAG->getTargetConstant(ShAmt, MVT::i32);
1399
1400   return true;
1401 }
1402
1403 //===--------------------------------------------------------------------===//
1404
1405 /// getAL - Returns a ARMCC::AL immediate node.
1406 static inline SDValue getAL(SelectionDAG *CurDAG) {
1407   return CurDAG->getTargetConstant((uint64_t)ARMCC::AL, MVT::i32);
1408 }
1409
1410 SDNode *ARMDAGToDAGISel::SelectARMIndexedLoad(SDNode *N) {
1411   LoadSDNode *LD = cast<LoadSDNode>(N);
1412   ISD::MemIndexedMode AM = LD->getAddressingMode();
1413   if (AM == ISD::UNINDEXED)
1414     return NULL;
1415
1416   EVT LoadedVT = LD->getMemoryVT();
1417   SDValue Offset, AMOpc;
1418   bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1419   unsigned Opcode = 0;
1420   bool Match = false;
1421   if (LoadedVT == MVT::i32 && isPre &&
1422       SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
1423     Opcode = ARM::LDR_PRE_IMM;
1424     Match = true;
1425   } else if (LoadedVT == MVT::i32 && !isPre &&
1426       SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
1427     Opcode = ARM::LDR_POST_IMM;
1428     Match = true;
1429   } else if (LoadedVT == MVT::i32 &&
1430       SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
1431     Opcode = isPre ? ARM::LDR_PRE_REG : ARM::LDR_POST_REG;
1432     Match = true;
1433
1434   } else if (LoadedVT == MVT::i16 &&
1435              SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
1436     Match = true;
1437     Opcode = (LD->getExtensionType() == ISD::SEXTLOAD)
1438       ? (isPre ? ARM::LDRSH_PRE : ARM::LDRSH_POST)
1439       : (isPre ? ARM::LDRH_PRE : ARM::LDRH_POST);
1440   } else if (LoadedVT == MVT::i8 || LoadedVT == MVT::i1) {
1441     if (LD->getExtensionType() == ISD::SEXTLOAD) {
1442       if (SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
1443         Match = true;
1444         Opcode = isPre ? ARM::LDRSB_PRE : ARM::LDRSB_POST;
1445       }
1446     } else {
1447       if (isPre &&
1448           SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
1449         Match = true;
1450         Opcode = ARM::LDRB_PRE_IMM;
1451       } else if (!isPre &&
1452                   SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
1453         Match = true;
1454         Opcode = ARM::LDRB_POST_IMM;
1455       } else if (SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
1456         Match = true;
1457         Opcode = isPre ? ARM::LDRB_PRE_REG : ARM::LDRB_POST_REG;
1458       }
1459     }
1460   }
1461
1462   if (Match) {
1463     if (Opcode == ARM::LDR_PRE_IMM || Opcode == ARM::LDRB_PRE_IMM) {
1464       SDValue Chain = LD->getChain();
1465       SDValue Base = LD->getBasePtr();
1466       SDValue Ops[]= { Base, AMOpc, getAL(CurDAG),
1467                        CurDAG->getRegister(0, MVT::i32), Chain };
1468       return CurDAG->getMachineNode(Opcode, N->getDebugLoc(), MVT::i32,
1469                                     MVT::i32, MVT::Other, Ops, 5);
1470     } else {
1471       SDValue Chain = LD->getChain();
1472       SDValue Base = LD->getBasePtr();
1473       SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG),
1474                        CurDAG->getRegister(0, MVT::i32), Chain };
1475       return CurDAG->getMachineNode(Opcode, N->getDebugLoc(), MVT::i32,
1476                                     MVT::i32, MVT::Other, Ops, 6);
1477     }
1478   }
1479
1480   return NULL;
1481 }
1482
1483 SDNode *ARMDAGToDAGISel::SelectT2IndexedLoad(SDNode *N) {
1484   LoadSDNode *LD = cast<LoadSDNode>(N);
1485   ISD::MemIndexedMode AM = LD->getAddressingMode();
1486   if (AM == ISD::UNINDEXED)
1487     return NULL;
1488
1489   EVT LoadedVT = LD->getMemoryVT();
1490   bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
1491   SDValue Offset;
1492   bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1493   unsigned Opcode = 0;
1494   bool Match = false;
1495   if (SelectT2AddrModeImm8Offset(N, LD->getOffset(), Offset)) {
1496     switch (LoadedVT.getSimpleVT().SimpleTy) {
1497     case MVT::i32:
1498       Opcode = isPre ? ARM::t2LDR_PRE : ARM::t2LDR_POST;
1499       break;
1500     case MVT::i16:
1501       if (isSExtLd)
1502         Opcode = isPre ? ARM::t2LDRSH_PRE : ARM::t2LDRSH_POST;
1503       else
1504         Opcode = isPre ? ARM::t2LDRH_PRE : ARM::t2LDRH_POST;
1505       break;
1506     case MVT::i8:
1507     case MVT::i1:
1508       if (isSExtLd)
1509         Opcode = isPre ? ARM::t2LDRSB_PRE : ARM::t2LDRSB_POST;
1510       else
1511         Opcode = isPre ? ARM::t2LDRB_PRE : ARM::t2LDRB_POST;
1512       break;
1513     default:
1514       return NULL;
1515     }
1516     Match = true;
1517   }
1518
1519   if (Match) {
1520     SDValue Chain = LD->getChain();
1521     SDValue Base = LD->getBasePtr();
1522     SDValue Ops[]= { Base, Offset, getAL(CurDAG),
1523                      CurDAG->getRegister(0, MVT::i32), Chain };
1524     return CurDAG->getMachineNode(Opcode, N->getDebugLoc(), MVT::i32, MVT::i32,
1525                                   MVT::Other, Ops, 5);
1526   }
1527
1528   return NULL;
1529 }
1530
1531 /// \brief Form a GPRPair pseudo register from a pair of GPR regs.
1532 SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) {
1533   DebugLoc dl = V0.getNode()->getDebugLoc();
1534   SDValue RegClass =
1535     CurDAG->getTargetConstant(ARM::GPRPairRegClassID, MVT::i32);
1536   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::gsub_0, MVT::i32);
1537   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::gsub_1, MVT::i32);
1538   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1539   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 5);
1540 }
1541
1542 /// \brief Form a D register from a pair of S registers.
1543 SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1544   DebugLoc dl = V0.getNode()->getDebugLoc();
1545   SDValue RegClass =
1546     CurDAG->getTargetConstant(ARM::DPR_VFP2RegClassID, MVT::i32);
1547   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, MVT::i32);
1548   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, MVT::i32);
1549   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1550   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 5);
1551 }
1552
1553 /// \brief Form a quad register from a pair of D registers.
1554 SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1555   DebugLoc dl = V0.getNode()->getDebugLoc();
1556   SDValue RegClass = CurDAG->getTargetConstant(ARM::QPRRegClassID, MVT::i32);
1557   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, MVT::i32);
1558   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, MVT::i32);
1559   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1560   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 5);
1561 }
1562
1563 /// \brief Form 4 consecutive D registers from a pair of Q registers.
1564 SDNode *ARMDAGToDAGISel::createQRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1565   DebugLoc dl = V0.getNode()->getDebugLoc();
1566   SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, MVT::i32);
1567   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, MVT::i32);
1568   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, MVT::i32);
1569   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1570   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 5);
1571 }
1572
1573 /// \brief Form 4 consecutive S registers.
1574 SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1,
1575                                    SDValue V2, SDValue V3) {
1576   DebugLoc dl = V0.getNode()->getDebugLoc();
1577   SDValue RegClass =
1578     CurDAG->getTargetConstant(ARM::QPR_VFP2RegClassID, MVT::i32);
1579   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, MVT::i32);
1580   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, MVT::i32);
1581   SDValue SubReg2 = CurDAG->getTargetConstant(ARM::ssub_2, MVT::i32);
1582   SDValue SubReg3 = CurDAG->getTargetConstant(ARM::ssub_3, MVT::i32);
1583   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1584                                     V2, SubReg2, V3, SubReg3 };
1585   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 9);
1586 }
1587
1588 /// \brief Form 4 consecutive D registers.
1589 SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1,
1590                                    SDValue V2, SDValue V3) {
1591   DebugLoc dl = V0.getNode()->getDebugLoc();
1592   SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, MVT::i32);
1593   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, MVT::i32);
1594   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, MVT::i32);
1595   SDValue SubReg2 = CurDAG->getTargetConstant(ARM::dsub_2, MVT::i32);
1596   SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, MVT::i32);
1597   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1598                                     V2, SubReg2, V3, SubReg3 };
1599   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 9);
1600 }
1601
1602 /// \brief Form 4 consecutive Q registers.
1603 SDNode *ARMDAGToDAGISel::createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1,
1604                                    SDValue V2, SDValue V3) {
1605   DebugLoc dl = V0.getNode()->getDebugLoc();
1606   SDValue RegClass = CurDAG->getTargetConstant(ARM::QQQQPRRegClassID, MVT::i32);
1607   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, MVT::i32);
1608   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, MVT::i32);
1609   SDValue SubReg2 = CurDAG->getTargetConstant(ARM::qsub_2, MVT::i32);
1610   SDValue SubReg3 = CurDAG->getTargetConstant(ARM::qsub_3, MVT::i32);
1611   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1612                                     V2, SubReg2, V3, SubReg3 };
1613   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 9);
1614 }
1615
1616 /// GetVLDSTAlign - Get the alignment (in bytes) for the alignment operand
1617 /// of a NEON VLD or VST instruction.  The supported values depend on the
1618 /// number of registers being loaded.
1619 SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, unsigned NumVecs,
1620                                        bool is64BitVector) {
1621   unsigned NumRegs = NumVecs;
1622   if (!is64BitVector && NumVecs < 3)
1623     NumRegs *= 2;
1624
1625   unsigned Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
1626   if (Alignment >= 32 && NumRegs == 4)
1627     Alignment = 32;
1628   else if (Alignment >= 16 && (NumRegs == 2 || NumRegs == 4))
1629     Alignment = 16;
1630   else if (Alignment >= 8)
1631     Alignment = 8;
1632   else
1633     Alignment = 0;
1634
1635   return CurDAG->getTargetConstant(Alignment, MVT::i32);
1636 }
1637
1638 // Get the register stride update opcode of a VLD/VST instruction that
1639 // is otherwise equivalent to the given fixed stride updating instruction.
1640 static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) {
1641   switch (Opc) {
1642   default: break;
1643   case ARM::VLD1d8wb_fixed: return ARM::VLD1d8wb_register;
1644   case ARM::VLD1d16wb_fixed: return ARM::VLD1d16wb_register;
1645   case ARM::VLD1d32wb_fixed: return ARM::VLD1d32wb_register;
1646   case ARM::VLD1d64wb_fixed: return ARM::VLD1d64wb_register;
1647   case ARM::VLD1q8wb_fixed: return ARM::VLD1q8wb_register;
1648   case ARM::VLD1q16wb_fixed: return ARM::VLD1q16wb_register;
1649   case ARM::VLD1q32wb_fixed: return ARM::VLD1q32wb_register;
1650   case ARM::VLD1q64wb_fixed: return ARM::VLD1q64wb_register;
1651
1652   case ARM::VST1d8wb_fixed: return ARM::VST1d8wb_register;
1653   case ARM::VST1d16wb_fixed: return ARM::VST1d16wb_register;
1654   case ARM::VST1d32wb_fixed: return ARM::VST1d32wb_register;
1655   case ARM::VST1d64wb_fixed: return ARM::VST1d64wb_register;
1656   case ARM::VST1q8wb_fixed: return ARM::VST1q8wb_register;
1657   case ARM::VST1q16wb_fixed: return ARM::VST1q16wb_register;
1658   case ARM::VST1q32wb_fixed: return ARM::VST1q32wb_register;
1659   case ARM::VST1q64wb_fixed: return ARM::VST1q64wb_register;
1660   case ARM::VST1d64TPseudoWB_fixed: return ARM::VST1d64TPseudoWB_register;
1661   case ARM::VST1d64QPseudoWB_fixed: return ARM::VST1d64QPseudoWB_register;
1662
1663   case ARM::VLD2d8wb_fixed: return ARM::VLD2d8wb_register;
1664   case ARM::VLD2d16wb_fixed: return ARM::VLD2d16wb_register;
1665   case ARM::VLD2d32wb_fixed: return ARM::VLD2d32wb_register;
1666   case ARM::VLD2q8PseudoWB_fixed: return ARM::VLD2q8PseudoWB_register;
1667   case ARM::VLD2q16PseudoWB_fixed: return ARM::VLD2q16PseudoWB_register;
1668   case ARM::VLD2q32PseudoWB_fixed: return ARM::VLD2q32PseudoWB_register;
1669
1670   case ARM::VST2d8wb_fixed: return ARM::VST2d8wb_register;
1671   case ARM::VST2d16wb_fixed: return ARM::VST2d16wb_register;
1672   case ARM::VST2d32wb_fixed: return ARM::VST2d32wb_register;
1673   case ARM::VST2q8PseudoWB_fixed: return ARM::VST2q8PseudoWB_register;
1674   case ARM::VST2q16PseudoWB_fixed: return ARM::VST2q16PseudoWB_register;
1675   case ARM::VST2q32PseudoWB_fixed: return ARM::VST2q32PseudoWB_register;
1676
1677   case ARM::VLD2DUPd8wb_fixed: return ARM::VLD2DUPd8wb_register;
1678   case ARM::VLD2DUPd16wb_fixed: return ARM::VLD2DUPd16wb_register;
1679   case ARM::VLD2DUPd32wb_fixed: return ARM::VLD2DUPd32wb_register;
1680   }
1681   return Opc; // If not one we handle, return it unchanged.
1682 }
1683
1684 SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
1685                                    const uint16_t *DOpcodes,
1686                                    const uint16_t *QOpcodes0,
1687                                    const uint16_t *QOpcodes1) {
1688   assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range");
1689   DebugLoc dl = N->getDebugLoc();
1690
1691   SDValue MemAddr, Align;
1692   unsigned AddrOpIdx = isUpdating ? 1 : 2;
1693   if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
1694     return NULL;
1695
1696   SDValue Chain = N->getOperand(0);
1697   EVT VT = N->getValueType(0);
1698   bool is64BitVector = VT.is64BitVector();
1699   Align = GetVLDSTAlign(Align, NumVecs, is64BitVector);
1700
1701   unsigned OpcodeIndex;
1702   switch (VT.getSimpleVT().SimpleTy) {
1703   default: llvm_unreachable("unhandled vld type");
1704     // Double-register operations:
1705   case MVT::v8i8:  OpcodeIndex = 0; break;
1706   case MVT::v4i16: OpcodeIndex = 1; break;
1707   case MVT::v2f32:
1708   case MVT::v2i32: OpcodeIndex = 2; break;
1709   case MVT::v1i64: OpcodeIndex = 3; break;
1710     // Quad-register operations:
1711   case MVT::v16i8: OpcodeIndex = 0; break;
1712   case MVT::v8i16: OpcodeIndex = 1; break;
1713   case MVT::v4f32:
1714   case MVT::v4i32: OpcodeIndex = 2; break;
1715   case MVT::v2i64: OpcodeIndex = 3;
1716     assert(NumVecs == 1 && "v2i64 type only supported for VLD1");
1717     break;
1718   }
1719
1720   EVT ResTy;
1721   if (NumVecs == 1)
1722     ResTy = VT;
1723   else {
1724     unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
1725     if (!is64BitVector)
1726       ResTyElts *= 2;
1727     ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
1728   }
1729   std::vector<EVT> ResTys;
1730   ResTys.push_back(ResTy);
1731   if (isUpdating)
1732     ResTys.push_back(MVT::i32);
1733   ResTys.push_back(MVT::Other);
1734
1735   SDValue Pred = getAL(CurDAG);
1736   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
1737   SDNode *VLd;
1738   SmallVector<SDValue, 7> Ops;
1739
1740   // Double registers and VLD1/VLD2 quad registers are directly supported.
1741   if (is64BitVector || NumVecs <= 2) {
1742     unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
1743                     QOpcodes0[OpcodeIndex]);
1744     Ops.push_back(MemAddr);
1745     Ops.push_back(Align);
1746     if (isUpdating) {
1747       SDValue Inc = N->getOperand(AddrOpIdx + 1);
1748       // FIXME: VLD1/VLD2 fixed increment doesn't need Reg0. Remove the reg0
1749       // case entirely when the rest are updated to that form, too.
1750       if ((NumVecs == 1 || NumVecs == 2) && !isa<ConstantSDNode>(Inc.getNode()))
1751         Opc = getVLDSTRegisterUpdateOpcode(Opc);
1752       // We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so
1753       // check for that explicitly too. Horribly hacky, but temporary.
1754       if ((NumVecs != 1 && NumVecs != 2 && Opc != ARM::VLD1q64wb_fixed) ||
1755           !isa<ConstantSDNode>(Inc.getNode()))
1756         Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc);
1757     }
1758     Ops.push_back(Pred);
1759     Ops.push_back(Reg0);
1760     Ops.push_back(Chain);
1761     VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(), Ops.size());
1762
1763   } else {
1764     // Otherwise, quad registers are loaded with two separate instructions,
1765     // where one loads the even registers and the other loads the odd registers.
1766     EVT AddrTy = MemAddr.getValueType();
1767
1768     // Load the even subregs.  This is always an updating load, so that it
1769     // provides the address to the second load for the odd subregs.
1770     SDValue ImplDef =
1771       SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
1772     const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain };
1773     SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
1774                                           ResTy, AddrTy, MVT::Other, OpsA, 7);
1775     Chain = SDValue(VLdA, 2);
1776
1777     // Load the odd subregs.
1778     Ops.push_back(SDValue(VLdA, 1));
1779     Ops.push_back(Align);
1780     if (isUpdating) {
1781       SDValue Inc = N->getOperand(AddrOpIdx + 1);
1782       assert(isa<ConstantSDNode>(Inc.getNode()) &&
1783              "only constant post-increment update allowed for VLD3/4");
1784       (void)Inc;
1785       Ops.push_back(Reg0);
1786     }
1787     Ops.push_back(SDValue(VLdA, 0));
1788     Ops.push_back(Pred);
1789     Ops.push_back(Reg0);
1790     Ops.push_back(Chain);
1791     VLd = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys,
1792                                  Ops.data(), Ops.size());
1793   }
1794
1795   // Transfer memoperands.
1796   MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
1797   MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1798   cast<MachineSDNode>(VLd)->setMemRefs(MemOp, MemOp + 1);
1799
1800   if (NumVecs == 1)
1801     return VLd;
1802
1803   // Extract out the subregisters.
1804   SDValue SuperReg = SDValue(VLd, 0);
1805   assert(ARM::dsub_7 == ARM::dsub_0+7 &&
1806          ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering");
1807   unsigned Sub0 = (is64BitVector ? ARM::dsub_0 : ARM::qsub_0);
1808   for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
1809     ReplaceUses(SDValue(N, Vec),
1810                 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
1811   ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1));
1812   if (isUpdating)
1813     ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLd, 2));
1814   return NULL;
1815 }
1816
1817 SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
1818                                    const uint16_t *DOpcodes,
1819                                    const uint16_t *QOpcodes0,
1820                                    const uint16_t *QOpcodes1) {
1821   assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range");
1822   DebugLoc dl = N->getDebugLoc();
1823
1824   SDValue MemAddr, Align;
1825   unsigned AddrOpIdx = isUpdating ? 1 : 2;
1826   unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
1827   if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
1828     return NULL;
1829
1830   MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
1831   MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1832
1833   SDValue Chain = N->getOperand(0);
1834   EVT VT = N->getOperand(Vec0Idx).getValueType();
1835   bool is64BitVector = VT.is64BitVector();
1836   Align = GetVLDSTAlign(Align, NumVecs, is64BitVector);
1837
1838   unsigned OpcodeIndex;
1839   switch (VT.getSimpleVT().SimpleTy) {
1840   default: llvm_unreachable("unhandled vst type");
1841     // Double-register operations:
1842   case MVT::v8i8:  OpcodeIndex = 0; break;
1843   case MVT::v4i16: OpcodeIndex = 1; break;
1844   case MVT::v2f32:
1845   case MVT::v2i32: OpcodeIndex = 2; break;
1846   case MVT::v1i64: OpcodeIndex = 3; break;
1847     // Quad-register operations:
1848   case MVT::v16i8: OpcodeIndex = 0; break;
1849   case MVT::v8i16: OpcodeIndex = 1; break;
1850   case MVT::v4f32:
1851   case MVT::v4i32: OpcodeIndex = 2; break;
1852   case MVT::v2i64: OpcodeIndex = 3;
1853     assert(NumVecs == 1 && "v2i64 type only supported for VST1");
1854     break;
1855   }
1856
1857   std::vector<EVT> ResTys;
1858   if (isUpdating)
1859     ResTys.push_back(MVT::i32);
1860   ResTys.push_back(MVT::Other);
1861
1862   SDValue Pred = getAL(CurDAG);
1863   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
1864   SmallVector<SDValue, 7> Ops;
1865
1866   // Double registers and VST1/VST2 quad registers are directly supported.
1867   if (is64BitVector || NumVecs <= 2) {
1868     SDValue SrcReg;
1869     if (NumVecs == 1) {
1870       SrcReg = N->getOperand(Vec0Idx);
1871     } else if (is64BitVector) {
1872       // Form a REG_SEQUENCE to force register allocation.
1873       SDValue V0 = N->getOperand(Vec0Idx + 0);
1874       SDValue V1 = N->getOperand(Vec0Idx + 1);
1875       if (NumVecs == 2)
1876         SrcReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
1877       else {
1878         SDValue V2 = N->getOperand(Vec0Idx + 2);
1879         // If it's a vst3, form a quad D-register and leave the last part as
1880         // an undef.
1881         SDValue V3 = (NumVecs == 3)
1882           ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
1883           : N->getOperand(Vec0Idx + 3);
1884         SrcReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
1885       }
1886     } else {
1887       // Form a QQ register.
1888       SDValue Q0 = N->getOperand(Vec0Idx);
1889       SDValue Q1 = N->getOperand(Vec0Idx + 1);
1890       SrcReg = SDValue(createQRegPairNode(MVT::v4i64, Q0, Q1), 0);
1891     }
1892
1893     unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
1894                     QOpcodes0[OpcodeIndex]);
1895     Ops.push_back(MemAddr);
1896     Ops.push_back(Align);
1897     if (isUpdating) {
1898       SDValue Inc = N->getOperand(AddrOpIdx + 1);
1899       // FIXME: VST1/VST2 fixed increment doesn't need Reg0. Remove the reg0
1900       // case entirely when the rest are updated to that form, too.
1901       if (NumVecs <= 2 && !isa<ConstantSDNode>(Inc.getNode()))
1902         Opc = getVLDSTRegisterUpdateOpcode(Opc);
1903       // We use a VST1 for v1i64 even if the pseudo says vld2/3/4, so
1904       // check for that explicitly too. Horribly hacky, but temporary.
1905       if ((NumVecs > 2 && Opc != ARM::VST1q64wb_fixed) ||
1906           !isa<ConstantSDNode>(Inc.getNode()))
1907         Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc);
1908     }
1909     Ops.push_back(SrcReg);
1910     Ops.push_back(Pred);
1911     Ops.push_back(Reg0);
1912     Ops.push_back(Chain);
1913     SDNode *VSt =
1914       CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(), Ops.size());
1915
1916     // Transfer memoperands.
1917     cast<MachineSDNode>(VSt)->setMemRefs(MemOp, MemOp + 1);
1918
1919     return VSt;
1920   }
1921
1922   // Otherwise, quad registers are stored with two separate instructions,
1923   // where one stores the even registers and the other stores the odd registers.
1924
1925   // Form the QQQQ REG_SEQUENCE.
1926   SDValue V0 = N->getOperand(Vec0Idx + 0);
1927   SDValue V1 = N->getOperand(Vec0Idx + 1);
1928   SDValue V2 = N->getOperand(Vec0Idx + 2);
1929   SDValue V3 = (NumVecs == 3)
1930     ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
1931     : N->getOperand(Vec0Idx + 3);
1932   SDValue RegSeq = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
1933
1934   // Store the even D registers.  This is always an updating store, so that it
1935   // provides the address to the second store for the odd subregs.
1936   const SDValue OpsA[] = { MemAddr, Align, Reg0, RegSeq, Pred, Reg0, Chain };
1937   SDNode *VStA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
1938                                         MemAddr.getValueType(),
1939                                         MVT::Other, OpsA, 7);
1940   cast<MachineSDNode>(VStA)->setMemRefs(MemOp, MemOp + 1);
1941   Chain = SDValue(VStA, 1);
1942
1943   // Store the odd D registers.
1944   Ops.push_back(SDValue(VStA, 0));
1945   Ops.push_back(Align);
1946   if (isUpdating) {
1947     SDValue Inc = N->getOperand(AddrOpIdx + 1);
1948     assert(isa<ConstantSDNode>(Inc.getNode()) &&
1949            "only constant post-increment update allowed for VST3/4");
1950     (void)Inc;
1951     Ops.push_back(Reg0);
1952   }
1953   Ops.push_back(RegSeq);
1954   Ops.push_back(Pred);
1955   Ops.push_back(Reg0);
1956   Ops.push_back(Chain);
1957   SDNode *VStB = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys,
1958                                         Ops.data(), Ops.size());
1959   cast<MachineSDNode>(VStB)->setMemRefs(MemOp, MemOp + 1);
1960   return VStB;
1961 }
1962
1963 SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad,
1964                                          bool isUpdating, unsigned NumVecs,
1965                                          const uint16_t *DOpcodes,
1966                                          const uint16_t *QOpcodes) {
1967   assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range");
1968   DebugLoc dl = N->getDebugLoc();
1969
1970   SDValue MemAddr, Align;
1971   unsigned AddrOpIdx = isUpdating ? 1 : 2;
1972   unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
1973   if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
1974     return NULL;
1975
1976   MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
1977   MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1978
1979   SDValue Chain = N->getOperand(0);
1980   unsigned Lane =
1981     cast<ConstantSDNode>(N->getOperand(Vec0Idx + NumVecs))->getZExtValue();
1982   EVT VT = N->getOperand(Vec0Idx).getValueType();
1983   bool is64BitVector = VT.is64BitVector();
1984
1985   unsigned Alignment = 0;
1986   if (NumVecs != 3) {
1987     Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
1988     unsigned NumBytes = NumVecs * VT.getVectorElementType().getSizeInBits()/8;
1989     if (Alignment > NumBytes)
1990       Alignment = NumBytes;
1991     if (Alignment < 8 && Alignment < NumBytes)
1992       Alignment = 0;
1993     // Alignment must be a power of two; make sure of that.
1994     Alignment = (Alignment & -Alignment);
1995     if (Alignment == 1)
1996       Alignment = 0;
1997   }
1998   Align = CurDAG->getTargetConstant(Alignment, MVT::i32);
1999
2000   unsigned OpcodeIndex;
2001   switch (VT.getSimpleVT().SimpleTy) {
2002   default: llvm_unreachable("unhandled vld/vst lane type");
2003     // Double-register operations:
2004   case MVT::v8i8:  OpcodeIndex = 0; break;
2005   case MVT::v4i16: OpcodeIndex = 1; break;
2006   case MVT::v2f32:
2007   case MVT::v2i32: OpcodeIndex = 2; break;
2008     // Quad-register operations:
2009   case MVT::v8i16: OpcodeIndex = 0; break;
2010   case MVT::v4f32:
2011   case MVT::v4i32: OpcodeIndex = 1; break;
2012   }
2013
2014   std::vector<EVT> ResTys;
2015   if (IsLoad) {
2016     unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2017     if (!is64BitVector)
2018       ResTyElts *= 2;
2019     ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(),
2020                                       MVT::i64, ResTyElts));
2021   }
2022   if (isUpdating)
2023     ResTys.push_back(MVT::i32);
2024   ResTys.push_back(MVT::Other);
2025
2026   SDValue Pred = getAL(CurDAG);
2027   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2028
2029   SmallVector<SDValue, 8> Ops;
2030   Ops.push_back(MemAddr);
2031   Ops.push_back(Align);
2032   if (isUpdating) {
2033     SDValue Inc = N->getOperand(AddrOpIdx + 1);
2034     Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc);
2035   }
2036
2037   SDValue SuperReg;
2038   SDValue V0 = N->getOperand(Vec0Idx + 0);
2039   SDValue V1 = N->getOperand(Vec0Idx + 1);
2040   if (NumVecs == 2) {
2041     if (is64BitVector)
2042       SuperReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
2043     else
2044       SuperReg = SDValue(createQRegPairNode(MVT::v4i64, V0, V1), 0);
2045   } else {
2046     SDValue V2 = N->getOperand(Vec0Idx + 2);
2047     SDValue V3 = (NumVecs == 3)
2048       ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2049       : N->getOperand(Vec0Idx + 3);
2050     if (is64BitVector)
2051       SuperReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
2052     else
2053       SuperReg = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
2054   }
2055   Ops.push_back(SuperReg);
2056   Ops.push_back(getI32Imm(Lane));
2057   Ops.push_back(Pred);
2058   Ops.push_back(Reg0);
2059   Ops.push_back(Chain);
2060
2061   unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2062                                   QOpcodes[OpcodeIndex]);
2063   SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys,
2064                                          Ops.data(), Ops.size());
2065   cast<MachineSDNode>(VLdLn)->setMemRefs(MemOp, MemOp + 1);
2066   if (!IsLoad)
2067     return VLdLn;
2068
2069   // Extract the subregisters.
2070   SuperReg = SDValue(VLdLn, 0);
2071   assert(ARM::dsub_7 == ARM::dsub_0+7 &&
2072          ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering");
2073   unsigned Sub0 = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
2074   for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
2075     ReplaceUses(SDValue(N, Vec),
2076                 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
2077   ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, 1));
2078   if (isUpdating)
2079     ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdLn, 2));
2080   return NULL;
2081 }
2082
2083 SDNode *ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating,
2084                                       unsigned NumVecs,
2085                                       const uint16_t *Opcodes) {
2086   assert(NumVecs >=2 && NumVecs <= 4 && "VLDDup NumVecs out-of-range");
2087   DebugLoc dl = N->getDebugLoc();
2088
2089   SDValue MemAddr, Align;
2090   if (!SelectAddrMode6(N, N->getOperand(1), MemAddr, Align))
2091     return NULL;
2092
2093   MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
2094   MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2095
2096   SDValue Chain = N->getOperand(0);
2097   EVT VT = N->getValueType(0);
2098
2099   unsigned Alignment = 0;
2100   if (NumVecs != 3) {
2101     Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
2102     unsigned NumBytes = NumVecs * VT.getVectorElementType().getSizeInBits()/8;
2103     if (Alignment > NumBytes)
2104       Alignment = NumBytes;
2105     if (Alignment < 8 && Alignment < NumBytes)
2106       Alignment = 0;
2107     // Alignment must be a power of two; make sure of that.
2108     Alignment = (Alignment & -Alignment);
2109     if (Alignment == 1)
2110       Alignment = 0;
2111   }
2112   Align = CurDAG->getTargetConstant(Alignment, MVT::i32);
2113
2114   unsigned OpcodeIndex;
2115   switch (VT.getSimpleVT().SimpleTy) {
2116   default: llvm_unreachable("unhandled vld-dup type");
2117   case MVT::v8i8:  OpcodeIndex = 0; break;
2118   case MVT::v4i16: OpcodeIndex = 1; break;
2119   case MVT::v2f32:
2120   case MVT::v2i32: OpcodeIndex = 2; break;
2121   }
2122
2123   SDValue Pred = getAL(CurDAG);
2124   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2125   SDValue SuperReg;
2126   unsigned Opc = Opcodes[OpcodeIndex];
2127   SmallVector<SDValue, 6> Ops;
2128   Ops.push_back(MemAddr);
2129   Ops.push_back(Align);
2130   if (isUpdating) {
2131     // fixed-stride update instructions don't have an explicit writeback
2132     // operand. It's implicit in the opcode itself.
2133     SDValue Inc = N->getOperand(2);
2134     if (!isa<ConstantSDNode>(Inc.getNode()))
2135       Ops.push_back(Inc);
2136     // FIXME: VLD3 and VLD4 haven't been updated to that form yet.
2137     else if (NumVecs > 2)
2138       Ops.push_back(Reg0);
2139   }
2140   Ops.push_back(Pred);
2141   Ops.push_back(Reg0);
2142   Ops.push_back(Chain);
2143
2144   unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2145   std::vector<EVT> ResTys;
2146   ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(), MVT::i64,ResTyElts));
2147   if (isUpdating)
2148     ResTys.push_back(MVT::i32);
2149   ResTys.push_back(MVT::Other);
2150   SDNode *VLdDup =
2151     CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(), Ops.size());
2152   cast<MachineSDNode>(VLdDup)->setMemRefs(MemOp, MemOp + 1);
2153   SuperReg = SDValue(VLdDup, 0);
2154
2155   // Extract the subregisters.
2156   assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
2157   unsigned SubIdx = ARM::dsub_0;
2158   for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
2159     ReplaceUses(SDValue(N, Vec),
2160                 CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, SuperReg));
2161   ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1));
2162   if (isUpdating)
2163     ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdDup, 2));
2164   return NULL;
2165 }
2166
2167 SDNode *ARMDAGToDAGISel::SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs,
2168                                     unsigned Opc) {
2169   assert(NumVecs >= 2 && NumVecs <= 4 && "VTBL NumVecs out-of-range");
2170   DebugLoc dl = N->getDebugLoc();
2171   EVT VT = N->getValueType(0);
2172   unsigned FirstTblReg = IsExt ? 2 : 1;
2173
2174   // Form a REG_SEQUENCE to force register allocation.
2175   SDValue RegSeq;
2176   SDValue V0 = N->getOperand(FirstTblReg + 0);
2177   SDValue V1 = N->getOperand(FirstTblReg + 1);
2178   if (NumVecs == 2)
2179     RegSeq = SDValue(createDRegPairNode(MVT::v16i8, V0, V1), 0);
2180   else {
2181     SDValue V2 = N->getOperand(FirstTblReg + 2);
2182     // If it's a vtbl3, form a quad D-register and leave the last part as
2183     // an undef.
2184     SDValue V3 = (NumVecs == 3)
2185       ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2186       : N->getOperand(FirstTblReg + 3);
2187     RegSeq = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
2188   }
2189
2190   SmallVector<SDValue, 6> Ops;
2191   if (IsExt)
2192     Ops.push_back(N->getOperand(1));
2193   Ops.push_back(RegSeq);
2194   Ops.push_back(N->getOperand(FirstTblReg + NumVecs));
2195   Ops.push_back(getAL(CurDAG)); // predicate
2196   Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // predicate register
2197   return CurDAG->getMachineNode(Opc, dl, VT, Ops.data(), Ops.size());
2198 }
2199
2200 SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDNode *N,
2201                                                      bool isSigned) {
2202   if (!Subtarget->hasV6T2Ops())
2203     return NULL;
2204
2205   unsigned Opc = isSigned
2206     ? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX)
2207     : (Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX);
2208
2209   // For unsigned extracts, check for a shift right and mask
2210   unsigned And_imm = 0;
2211   if (N->getOpcode() == ISD::AND) {
2212     if (isOpcWithIntImmediate(N, ISD::AND, And_imm)) {
2213
2214       // The immediate is a mask of the low bits iff imm & (imm+1) == 0
2215       if (And_imm & (And_imm + 1))
2216         return NULL;
2217
2218       unsigned Srl_imm = 0;
2219       if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL,
2220                                 Srl_imm)) {
2221         assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
2222
2223         // Note: The width operand is encoded as width-1.
2224         unsigned Width = CountTrailingOnes_32(And_imm) - 1;
2225         unsigned LSB = Srl_imm;
2226
2227         SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2228
2229         if ((LSB + Width + 1) == N->getValueType(0).getSizeInBits()) {
2230           // It's cheaper to use a right shift to extract the top bits.
2231           if (Subtarget->isThumb()) {
2232             Opc = isSigned ? ARM::t2ASRri : ARM::t2LSRri;
2233             SDValue Ops[] = { N->getOperand(0).getOperand(0),
2234                               CurDAG->getTargetConstant(LSB, MVT::i32),
2235                               getAL(CurDAG), Reg0, Reg0 };
2236             return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 5);
2237           }
2238
2239           // ARM models shift instructions as MOVsi with shifter operand.
2240           ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(ISD::SRL);
2241           SDValue ShOpc =
2242             CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, LSB),
2243                                       MVT::i32);
2244           SDValue Ops[] = { N->getOperand(0).getOperand(0), ShOpc,
2245                             getAL(CurDAG), Reg0, Reg0 };
2246           return CurDAG->SelectNodeTo(N, ARM::MOVsi, MVT::i32, Ops, 5);
2247         }
2248
2249         SDValue Ops[] = { N->getOperand(0).getOperand(0),
2250                           CurDAG->getTargetConstant(LSB, MVT::i32),
2251                           CurDAG->getTargetConstant(Width, MVT::i32),
2252           getAL(CurDAG), Reg0 };
2253         return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 5);
2254       }
2255     }
2256     return NULL;
2257   }
2258
2259   // Otherwise, we're looking for a shift of a shift
2260   unsigned Shl_imm = 0;
2261   if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) {
2262     assert(Shl_imm > 0 && Shl_imm < 32 && "bad amount in shift node!");
2263     unsigned Srl_imm = 0;
2264     if (isInt32Immediate(N->getOperand(1), Srl_imm)) {
2265       assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
2266       // Note: The width operand is encoded as width-1.
2267       unsigned Width = 32 - Srl_imm - 1;
2268       int LSB = Srl_imm - Shl_imm;
2269       if (LSB < 0)
2270         return NULL;
2271       SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2272       SDValue Ops[] = { N->getOperand(0).getOperand(0),
2273                         CurDAG->getTargetConstant(LSB, MVT::i32),
2274                         CurDAG->getTargetConstant(Width, MVT::i32),
2275                         getAL(CurDAG), Reg0 };
2276       return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 5);
2277     }
2278   }
2279   return NULL;
2280 }
2281
2282 SDNode *ARMDAGToDAGISel::
2283 SelectT2CMOVShiftOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
2284                     ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag) {
2285   SDValue CPTmp0;
2286   SDValue CPTmp1;
2287   if (SelectT2ShifterOperandReg(TrueVal, CPTmp0, CPTmp1)) {
2288     unsigned SOVal = cast<ConstantSDNode>(CPTmp1)->getZExtValue();
2289     unsigned SOShOp = ARM_AM::getSORegShOp(SOVal);
2290     unsigned Opc = 0;
2291     switch (SOShOp) {
2292     case ARM_AM::lsl: Opc = ARM::t2MOVCClsl; break;
2293     case ARM_AM::lsr: Opc = ARM::t2MOVCClsr; break;
2294     case ARM_AM::asr: Opc = ARM::t2MOVCCasr; break;
2295     case ARM_AM::ror: Opc = ARM::t2MOVCCror; break;
2296     default:
2297       llvm_unreachable("Unknown so_reg opcode!");
2298     }
2299     SDValue SOShImm =
2300       CurDAG->getTargetConstant(ARM_AM::getSORegOffset(SOVal), MVT::i32);
2301     SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32);
2302     SDValue Ops[] = { FalseVal, CPTmp0, SOShImm, CC, CCR, InFlag };
2303     return CurDAG->SelectNodeTo(N, Opc, MVT::i32,Ops, 6);
2304   }
2305   return 0;
2306 }
2307
2308 SDNode *ARMDAGToDAGISel::
2309 SelectARMCMOVShiftOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
2310                      ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag) {
2311   SDValue CPTmp0;
2312   SDValue CPTmp1;
2313   SDValue CPTmp2;
2314   if (SelectImmShifterOperand(TrueVal, CPTmp0, CPTmp2)) {
2315     SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32);
2316     SDValue Ops[] = { FalseVal, CPTmp0, CPTmp2, CC, CCR, InFlag };
2317     return CurDAG->SelectNodeTo(N, ARM::MOVCCsi, MVT::i32, Ops, 6);
2318   }
2319
2320   if (SelectRegShifterOperand(TrueVal, CPTmp0, CPTmp1, CPTmp2)) {
2321     SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32);
2322     SDValue Ops[] = { FalseVal, CPTmp0, CPTmp1, CPTmp2, CC, CCR, InFlag };
2323     return CurDAG->SelectNodeTo(N, ARM::MOVCCsr, MVT::i32, Ops, 7);
2324   }
2325   return 0;
2326 }
2327
2328 SDNode *ARMDAGToDAGISel::
2329 SelectT2CMOVImmOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
2330                   ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag) {
2331   ConstantSDNode *T = dyn_cast<ConstantSDNode>(TrueVal);
2332   if (!T)
2333     return 0;
2334
2335   unsigned Opc = 0;
2336   unsigned TrueImm = T->getZExtValue();
2337   if (is_t2_so_imm(TrueImm)) {
2338     Opc = ARM::t2MOVCCi;
2339   } else if (TrueImm <= 0xffff) {
2340     Opc = ARM::t2MOVCCi16;
2341   } else if (is_t2_so_imm_not(TrueImm)) {
2342     TrueImm = ~TrueImm;
2343     Opc = ARM::t2MVNCCi;
2344   } else if (TrueVal.getNode()->hasOneUse() && Subtarget->hasV6T2Ops()) {
2345     // Large immediate.
2346     Opc = ARM::t2MOVCCi32imm;
2347   }
2348
2349   if (Opc) {
2350     SDValue True = CurDAG->getTargetConstant(TrueImm, MVT::i32);
2351     SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32);
2352     SDValue Ops[] = { FalseVal, True, CC, CCR, InFlag };
2353     return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 5);
2354   }
2355
2356   return 0;
2357 }
2358
2359 SDNode *ARMDAGToDAGISel::
2360 SelectARMCMOVImmOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
2361                    ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag) {
2362   ConstantSDNode *T = dyn_cast<ConstantSDNode>(TrueVal);
2363   if (!T)
2364     return 0;
2365
2366   unsigned Opc = 0;
2367   unsigned TrueImm = T->getZExtValue();
2368   bool isSoImm = is_so_imm(TrueImm);
2369   if (isSoImm) {
2370     Opc = ARM::MOVCCi;
2371   } else if (Subtarget->hasV6T2Ops() && TrueImm <= 0xffff) {
2372     Opc = ARM::MOVCCi16;
2373   } else if (is_so_imm_not(TrueImm)) {
2374     TrueImm = ~TrueImm;
2375     Opc = ARM::MVNCCi;
2376   } else if (TrueVal.getNode()->hasOneUse() &&
2377              (Subtarget->hasV6T2Ops() || ARM_AM::isSOImmTwoPartVal(TrueImm))) {
2378     // Large immediate.
2379     Opc = ARM::MOVCCi32imm;
2380   }
2381
2382   if (Opc) {
2383     SDValue True = CurDAG->getTargetConstant(TrueImm, MVT::i32);
2384     SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32);
2385     SDValue Ops[] = { FalseVal, True, CC, CCR, InFlag };
2386     return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 5);
2387   }
2388
2389   return 0;
2390 }
2391
2392 SDNode *ARMDAGToDAGISel::SelectCMOVOp(SDNode *N) {
2393   EVT VT = N->getValueType(0);
2394   SDValue FalseVal = N->getOperand(0);
2395   SDValue TrueVal  = N->getOperand(1);
2396   SDValue CC = N->getOperand(2);
2397   SDValue CCR = N->getOperand(3);
2398   SDValue InFlag = N->getOperand(4);
2399   assert(CC.getOpcode() == ISD::Constant);
2400   assert(CCR.getOpcode() == ISD::Register);
2401   ARMCC::CondCodes CCVal =
2402     (ARMCC::CondCodes)cast<ConstantSDNode>(CC)->getZExtValue();
2403
2404   if (!Subtarget->isThumb1Only() && VT == MVT::i32) {
2405     // Pattern: (ARMcmov:i32 GPR:i32:$false, so_reg:i32:$true, (imm:i32):$cc)
2406     // Emits: (MOVCCs:i32 GPR:i32:$false, so_reg:i32:$true, (imm:i32):$cc)
2407     // Pattern complexity = 18  cost = 1  size = 0
2408     if (Subtarget->isThumb()) {
2409       SDNode *Res = SelectT2CMOVShiftOp(N, FalseVal, TrueVal,
2410                                         CCVal, CCR, InFlag);
2411       if (!Res)
2412         Res = SelectT2CMOVShiftOp(N, TrueVal, FalseVal,
2413                                ARMCC::getOppositeCondition(CCVal), CCR, InFlag);
2414       if (Res)
2415         return Res;
2416     } else {
2417       SDNode *Res = SelectARMCMOVShiftOp(N, FalseVal, TrueVal,
2418                                          CCVal, CCR, InFlag);
2419       if (!Res)
2420         Res = SelectARMCMOVShiftOp(N, TrueVal, FalseVal,
2421                                ARMCC::getOppositeCondition(CCVal), CCR, InFlag);
2422       if (Res)
2423         return Res;
2424     }
2425
2426     // Pattern: (ARMcmov:i32 GPR:i32:$false,
2427     //             (imm:i32)<<P:Pred_so_imm>>:$true,
2428     //             (imm:i32):$cc)
2429     // Emits: (MOVCCi:i32 GPR:i32:$false,
2430     //           (so_imm:i32 (imm:i32):$true), (imm:i32):$cc)
2431     // Pattern complexity = 10  cost = 1  size = 0
2432     if (Subtarget->isThumb()) {
2433       SDNode *Res = SelectT2CMOVImmOp(N, FalseVal, TrueVal,
2434                                         CCVal, CCR, InFlag);
2435       if (!Res)
2436         Res = SelectT2CMOVImmOp(N, TrueVal, FalseVal,
2437                                ARMCC::getOppositeCondition(CCVal), CCR, InFlag);
2438       if (Res)
2439         return Res;
2440     } else {
2441       SDNode *Res = SelectARMCMOVImmOp(N, FalseVal, TrueVal,
2442                                          CCVal, CCR, InFlag);
2443       if (!Res)
2444         Res = SelectARMCMOVImmOp(N, TrueVal, FalseVal,
2445                                ARMCC::getOppositeCondition(CCVal), CCR, InFlag);
2446       if (Res)
2447         return Res;
2448     }
2449   }
2450
2451   // Pattern: (ARMcmov:i32 GPR:i32:$false, GPR:i32:$true, (imm:i32):$cc)
2452   // Emits: (MOVCCr:i32 GPR:i32:$false, GPR:i32:$true, (imm:i32):$cc)
2453   // Pattern complexity = 6  cost = 1  size = 0
2454   //
2455   // Pattern: (ARMcmov:i32 GPR:i32:$false, GPR:i32:$true, (imm:i32):$cc)
2456   // Emits: (tMOVCCr:i32 GPR:i32:$false, GPR:i32:$true, (imm:i32):$cc)
2457   // Pattern complexity = 6  cost = 11  size = 0
2458   //
2459   // Also VMOVScc and VMOVDcc.
2460   SDValue Tmp2 = CurDAG->getTargetConstant(CCVal, MVT::i32);
2461   SDValue Ops[] = { FalseVal, TrueVal, Tmp2, CCR, InFlag };
2462   unsigned Opc = 0;
2463   switch (VT.getSimpleVT().SimpleTy) {
2464   default: llvm_unreachable("Illegal conditional move type!");
2465   case MVT::i32:
2466     Opc = Subtarget->isThumb()
2467       ? (Subtarget->hasThumb2() ? ARM::t2MOVCCr : ARM::tMOVCCr_pseudo)
2468       : ARM::MOVCCr;
2469     break;
2470   case MVT::f32:
2471     Opc = ARM::VMOVScc;
2472     break;
2473   case MVT::f64:
2474     Opc = ARM::VMOVDcc;
2475     break;
2476   }
2477   return CurDAG->SelectNodeTo(N, Opc, VT, Ops, 5);
2478 }
2479
2480 /// Target-specific DAG combining for ISD::XOR.
2481 /// Target-independent combining lowers SELECT_CC nodes of the form
2482 /// select_cc setg[ge] X,  0,  X, -X
2483 /// select_cc setgt    X, -1,  X, -X
2484 /// select_cc setl[te] X,  0, -X,  X
2485 /// select_cc setlt    X,  1, -X,  X
2486 /// which represent Integer ABS into:
2487 /// Y = sra (X, size(X)-1); xor (add (X, Y), Y)
2488 /// ARM instruction selection detects the latter and matches it to
2489 /// ARM::ABS or ARM::t2ABS machine node.
2490 SDNode *ARMDAGToDAGISel::SelectABSOp(SDNode *N){
2491   SDValue XORSrc0 = N->getOperand(0);
2492   SDValue XORSrc1 = N->getOperand(1);
2493   EVT VT = N->getValueType(0);
2494
2495   if (Subtarget->isThumb1Only())
2496     return NULL;
2497
2498   if (XORSrc0.getOpcode() != ISD::ADD || XORSrc1.getOpcode() != ISD::SRA)
2499     return NULL;
2500
2501   SDValue ADDSrc0 = XORSrc0.getOperand(0);
2502   SDValue ADDSrc1 = XORSrc0.getOperand(1);
2503   SDValue SRASrc0 = XORSrc1.getOperand(0);
2504   SDValue SRASrc1 = XORSrc1.getOperand(1);
2505   ConstantSDNode *SRAConstant =  dyn_cast<ConstantSDNode>(SRASrc1);
2506   EVT XType = SRASrc0.getValueType();
2507   unsigned Size = XType.getSizeInBits() - 1;
2508
2509   if (ADDSrc1 == XORSrc1 && ADDSrc0 == SRASrc0 &&
2510       XType.isInteger() && SRAConstant != NULL &&
2511       Size == SRAConstant->getZExtValue()) {
2512     unsigned Opcode = Subtarget->isThumb2() ? ARM::t2ABS : ARM::ABS;
2513     return CurDAG->SelectNodeTo(N, Opcode, VT, ADDSrc0);
2514   }
2515
2516   return NULL;
2517 }
2518
2519 SDNode *ARMDAGToDAGISel::SelectConcatVector(SDNode *N) {
2520   // The only time a CONCAT_VECTORS operation can have legal types is when
2521   // two 64-bit vectors are concatenated to a 128-bit vector.
2522   EVT VT = N->getValueType(0);
2523   if (!VT.is128BitVector() || N->getNumOperands() != 2)
2524     llvm_unreachable("unexpected CONCAT_VECTORS");
2525   return createDRegPairNode(VT, N->getOperand(0), N->getOperand(1));
2526 }
2527
2528 SDNode *ARMDAGToDAGISel::SelectAtomic64(SDNode *Node, unsigned Opc) {
2529   SmallVector<SDValue, 6> Ops;
2530   Ops.push_back(Node->getOperand(1)); // Ptr
2531   Ops.push_back(Node->getOperand(2)); // Low part of Val1
2532   Ops.push_back(Node->getOperand(3)); // High part of Val1
2533   if (Opc == ARM::ATOMCMPXCHG6432) {
2534     Ops.push_back(Node->getOperand(4)); // Low part of Val2
2535     Ops.push_back(Node->getOperand(5)); // High part of Val2
2536   }
2537   Ops.push_back(Node->getOperand(0)); // Chain
2538   MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
2539   MemOp[0] = cast<MemSDNode>(Node)->getMemOperand();
2540   SDNode *ResNode = CurDAG->getMachineNode(Opc, Node->getDebugLoc(),
2541                                            MVT::i32, MVT::i32, MVT::Other,
2542                                            Ops.data() ,Ops.size());
2543   cast<MachineSDNode>(ResNode)->setMemRefs(MemOp, MemOp + 1);
2544   return ResNode;
2545 }
2546
2547 SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
2548   DebugLoc dl = N->getDebugLoc();
2549
2550   if (N->isMachineOpcode())
2551     return NULL;   // Already selected.
2552
2553   switch (N->getOpcode()) {
2554   default: break;
2555   case ISD::XOR: {
2556     // Select special operations if XOR node forms integer ABS pattern
2557     SDNode *ResNode = SelectABSOp(N);
2558     if (ResNode)
2559       return ResNode;
2560     // Other cases are autogenerated.
2561     break;
2562   }
2563   case ISD::Constant: {
2564     unsigned Val = cast<ConstantSDNode>(N)->getZExtValue();
2565     bool UseCP = true;
2566     if (Subtarget->hasThumb2())
2567       // Thumb2-aware targets have the MOVT instruction, so all immediates can
2568       // be done with MOV + MOVT, at worst.
2569       UseCP = 0;
2570     else {
2571       if (Subtarget->isThumb()) {
2572         UseCP = (Val > 255 &&                          // MOV
2573                  ~Val > 255 &&                         // MOV + MVN
2574                  !ARM_AM::isThumbImmShiftedVal(Val));  // MOV + LSL
2575       } else
2576         UseCP = (ARM_AM::getSOImmVal(Val) == -1 &&     // MOV
2577                  ARM_AM::getSOImmVal(~Val) == -1 &&    // MVN
2578                  !ARM_AM::isSOImmTwoPartVal(Val));     // two instrs.
2579     }
2580
2581     if (UseCP) {
2582       SDValue CPIdx =
2583         CurDAG->getTargetConstantPool(ConstantInt::get(
2584                                   Type::getInt32Ty(*CurDAG->getContext()), Val),
2585                                       TLI.getPointerTy());
2586
2587       SDNode *ResNode;
2588       if (Subtarget->isThumb1Only()) {
2589         SDValue Pred = getAL(CurDAG);
2590         SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
2591         SDValue Ops[] = { CPIdx, Pred, PredReg, CurDAG->getEntryNode() };
2592         ResNode = CurDAG->getMachineNode(ARM::tLDRpci, dl, MVT::i32, MVT::Other,
2593                                          Ops, 4);
2594       } else {
2595         SDValue Ops[] = {
2596           CPIdx,
2597           CurDAG->getTargetConstant(0, MVT::i32),
2598           getAL(CurDAG),
2599           CurDAG->getRegister(0, MVT::i32),
2600           CurDAG->getEntryNode()
2601         };
2602         ResNode=CurDAG->getMachineNode(ARM::LDRcp, dl, MVT::i32, MVT::Other,
2603                                        Ops, 5);
2604       }
2605       ReplaceUses(SDValue(N, 0), SDValue(ResNode, 0));
2606       return NULL;
2607     }
2608
2609     // Other cases are autogenerated.
2610     break;
2611   }
2612   case ISD::FrameIndex: {
2613     // Selects to ADDri FI, 0 which in turn will become ADDri SP, imm.
2614     int FI = cast<FrameIndexSDNode>(N)->getIndex();
2615     SDValue TFI = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
2616     if (Subtarget->isThumb1Only()) {
2617       SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, MVT::i32),
2618                         getAL(CurDAG), CurDAG->getRegister(0, MVT::i32) };
2619       return CurDAG->SelectNodeTo(N, ARM::tADDrSPi, MVT::i32, Ops, 4);
2620     } else {
2621       unsigned Opc = ((Subtarget->isThumb() && Subtarget->hasThumb2()) ?
2622                       ARM::t2ADDri : ARM::ADDri);
2623       SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, MVT::i32),
2624                         getAL(CurDAG), CurDAG->getRegister(0, MVT::i32),
2625                         CurDAG->getRegister(0, MVT::i32) };
2626       return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 5);
2627     }
2628   }
2629   case ISD::SRL:
2630     if (SDNode *I = SelectV6T2BitfieldExtractOp(N, false))
2631       return I;
2632     break;
2633   case ISD::SRA:
2634     if (SDNode *I = SelectV6T2BitfieldExtractOp(N, true))
2635       return I;
2636     break;
2637   case ISD::MUL:
2638     if (Subtarget->isThumb1Only())
2639       break;
2640     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
2641       unsigned RHSV = C->getZExtValue();
2642       if (!RHSV) break;
2643       if (isPowerOf2_32(RHSV-1)) {  // 2^n+1?
2644         unsigned ShImm = Log2_32(RHSV-1);
2645         if (ShImm >= 32)
2646           break;
2647         SDValue V = N->getOperand(0);
2648         ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
2649         SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, MVT::i32);
2650         SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2651         if (Subtarget->isThumb()) {
2652           SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG), Reg0, Reg0 };
2653           return CurDAG->SelectNodeTo(N, ARM::t2ADDrs, MVT::i32, Ops, 6);
2654         } else {
2655           SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG), Reg0, Reg0 };
2656           return CurDAG->SelectNodeTo(N, ARM::ADDrsi, MVT::i32, Ops, 7);
2657         }
2658       }
2659       if (isPowerOf2_32(RHSV+1)) {  // 2^n-1?
2660         unsigned ShImm = Log2_32(RHSV+1);
2661         if (ShImm >= 32)
2662           break;
2663         SDValue V = N->getOperand(0);
2664         ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
2665         SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, MVT::i32);
2666         SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2667         if (Subtarget->isThumb()) {
2668           SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG), Reg0, Reg0 };
2669           return CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops, 6);
2670         } else {
2671           SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG), Reg0, Reg0 };
2672           return CurDAG->SelectNodeTo(N, ARM::RSBrsi, MVT::i32, Ops, 7);
2673         }
2674       }
2675     }
2676     break;
2677   case ISD::AND: {
2678     // Check for unsigned bitfield extract
2679     if (SDNode *I = SelectV6T2BitfieldExtractOp(N, false))
2680       return I;
2681
2682     // (and (or x, c2), c1) and top 16-bits of c1 and c2 match, lower 16-bits
2683     // of c1 are 0xffff, and lower 16-bit of c2 are 0. That is, the top 16-bits
2684     // are entirely contributed by c2 and lower 16-bits are entirely contributed
2685     // by x. That's equal to (or (and x, 0xffff), (and c1, 0xffff0000)).
2686     // Select it to: "movt x, ((c1 & 0xffff) >> 16)
2687     EVT VT = N->getValueType(0);
2688     if (VT != MVT::i32)
2689       break;
2690     unsigned Opc = (Subtarget->isThumb() && Subtarget->hasThumb2())
2691       ? ARM::t2MOVTi16
2692       : (Subtarget->hasV6T2Ops() ? ARM::MOVTi16 : 0);
2693     if (!Opc)
2694       break;
2695     SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
2696     ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2697     if (!N1C)
2698       break;
2699     if (N0.getOpcode() == ISD::OR && N0.getNode()->hasOneUse()) {
2700       SDValue N2 = N0.getOperand(1);
2701       ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
2702       if (!N2C)
2703         break;
2704       unsigned N1CVal = N1C->getZExtValue();
2705       unsigned N2CVal = N2C->getZExtValue();
2706       if ((N1CVal & 0xffff0000U) == (N2CVal & 0xffff0000U) &&
2707           (N1CVal & 0xffffU) == 0xffffU &&
2708           (N2CVal & 0xffffU) == 0x0U) {
2709         SDValue Imm16 = CurDAG->getTargetConstant((N2CVal & 0xFFFF0000U) >> 16,
2710                                                   MVT::i32);
2711         SDValue Ops[] = { N0.getOperand(0), Imm16,
2712                           getAL(CurDAG), CurDAG->getRegister(0, MVT::i32) };
2713         return CurDAG->getMachineNode(Opc, dl, VT, Ops, 4);
2714       }
2715     }
2716     break;
2717   }
2718   case ARMISD::VMOVRRD:
2719     return CurDAG->getMachineNode(ARM::VMOVRRD, dl, MVT::i32, MVT::i32,
2720                                   N->getOperand(0), getAL(CurDAG),
2721                                   CurDAG->getRegister(0, MVT::i32));
2722   case ISD::UMUL_LOHI: {
2723     if (Subtarget->isThumb1Only())
2724       break;
2725     if (Subtarget->isThumb()) {
2726       SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
2727                         getAL(CurDAG), CurDAG->getRegister(0, MVT::i32),
2728                         CurDAG->getRegister(0, MVT::i32) };
2729       return CurDAG->getMachineNode(ARM::t2UMULL, dl, MVT::i32, MVT::i32,Ops,4);
2730     } else {
2731       SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
2732                         getAL(CurDAG), CurDAG->getRegister(0, MVT::i32),
2733                         CurDAG->getRegister(0, MVT::i32) };
2734       return CurDAG->getMachineNode(Subtarget->hasV6Ops() ?
2735                                     ARM::UMULL : ARM::UMULLv5,
2736                                     dl, MVT::i32, MVT::i32, Ops, 5);
2737     }
2738   }
2739   case ISD::SMUL_LOHI: {
2740     if (Subtarget->isThumb1Only())
2741       break;
2742     if (Subtarget->isThumb()) {
2743       SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
2744                         getAL(CurDAG), CurDAG->getRegister(0, MVT::i32) };
2745       return CurDAG->getMachineNode(ARM::t2SMULL, dl, MVT::i32, MVT::i32,Ops,4);
2746     } else {
2747       SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
2748                         getAL(CurDAG), CurDAG->getRegister(0, MVT::i32),
2749                         CurDAG->getRegister(0, MVT::i32) };
2750       return CurDAG->getMachineNode(Subtarget->hasV6Ops() ?
2751                                     ARM::SMULL : ARM::SMULLv5,
2752                                     dl, MVT::i32, MVT::i32, Ops, 5);
2753     }
2754   }
2755   case ARMISD::UMLAL:{
2756     if (Subtarget->isThumb()) {
2757       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
2758                         N->getOperand(3), getAL(CurDAG),
2759                         CurDAG->getRegister(0, MVT::i32)};
2760       return CurDAG->getMachineNode(ARM::t2UMLAL, dl, MVT::i32, MVT::i32, Ops, 6);
2761     }else{
2762       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
2763                         N->getOperand(3), getAL(CurDAG),
2764                         CurDAG->getRegister(0, MVT::i32),
2765                         CurDAG->getRegister(0, MVT::i32) };
2766       return CurDAG->getMachineNode(Subtarget->hasV6Ops() ?
2767                                       ARM::UMLAL : ARM::UMLALv5,
2768                                       dl, MVT::i32, MVT::i32, Ops, 7);
2769     }
2770   }
2771   case ARMISD::SMLAL:{
2772     if (Subtarget->isThumb()) {
2773       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
2774                         N->getOperand(3), getAL(CurDAG),
2775                         CurDAG->getRegister(0, MVT::i32)};
2776       return CurDAG->getMachineNode(ARM::t2SMLAL, dl, MVT::i32, MVT::i32, Ops, 6);
2777     }else{
2778       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
2779                         N->getOperand(3), getAL(CurDAG),
2780                         CurDAG->getRegister(0, MVT::i32),
2781                         CurDAG->getRegister(0, MVT::i32) };
2782       return CurDAG->getMachineNode(Subtarget->hasV6Ops() ?
2783                                       ARM::SMLAL : ARM::SMLALv5,
2784                                       dl, MVT::i32, MVT::i32, Ops, 7);
2785     }
2786   }
2787   case ISD::LOAD: {
2788     SDNode *ResNode = 0;
2789     if (Subtarget->isThumb() && Subtarget->hasThumb2())
2790       ResNode = SelectT2IndexedLoad(N);
2791     else
2792       ResNode = SelectARMIndexedLoad(N);
2793     if (ResNode)
2794       return ResNode;
2795     // Other cases are autogenerated.
2796     break;
2797   }
2798   case ARMISD::BRCOND: {
2799     // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
2800     // Emits: (Bcc:void (bb:Other):$dst, (imm:i32):$cc)
2801     // Pattern complexity = 6  cost = 1  size = 0
2802
2803     // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
2804     // Emits: (tBcc:void (bb:Other):$dst, (imm:i32):$cc)
2805     // Pattern complexity = 6  cost = 1  size = 0
2806
2807     // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
2808     // Emits: (t2Bcc:void (bb:Other):$dst, (imm:i32):$cc)
2809     // Pattern complexity = 6  cost = 1  size = 0
2810
2811     unsigned Opc = Subtarget->isThumb() ?
2812       ((Subtarget->hasThumb2()) ? ARM::t2Bcc : ARM::tBcc) : ARM::Bcc;
2813     SDValue Chain = N->getOperand(0);
2814     SDValue N1 = N->getOperand(1);
2815     SDValue N2 = N->getOperand(2);
2816     SDValue N3 = N->getOperand(3);
2817     SDValue InFlag = N->getOperand(4);
2818     assert(N1.getOpcode() == ISD::BasicBlock);
2819     assert(N2.getOpcode() == ISD::Constant);
2820     assert(N3.getOpcode() == ISD::Register);
2821
2822     SDValue Tmp2 = CurDAG->getTargetConstant(((unsigned)
2823                                cast<ConstantSDNode>(N2)->getZExtValue()),
2824                                MVT::i32);
2825     SDValue Ops[] = { N1, Tmp2, N3, Chain, InFlag };
2826     SDNode *ResNode = CurDAG->getMachineNode(Opc, dl, MVT::Other,
2827                                              MVT::Glue, Ops, 5);
2828     Chain = SDValue(ResNode, 0);
2829     if (N->getNumValues() == 2) {
2830       InFlag = SDValue(ResNode, 1);
2831       ReplaceUses(SDValue(N, 1), InFlag);
2832     }
2833     ReplaceUses(SDValue(N, 0),
2834                 SDValue(Chain.getNode(), Chain.getResNo()));
2835     return NULL;
2836   }
2837   case ARMISD::CMOV:
2838     return SelectCMOVOp(N);
2839   case ARMISD::VZIP: {
2840     unsigned Opc = 0;
2841     EVT VT = N->getValueType(0);
2842     switch (VT.getSimpleVT().SimpleTy) {
2843     default: return NULL;
2844     case MVT::v8i8:  Opc = ARM::VZIPd8; break;
2845     case MVT::v4i16: Opc = ARM::VZIPd16; break;
2846     case MVT::v2f32:
2847     // vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
2848     case MVT::v2i32: Opc = ARM::VTRNd32; break;
2849     case MVT::v16i8: Opc = ARM::VZIPq8; break;
2850     case MVT::v8i16: Opc = ARM::VZIPq16; break;
2851     case MVT::v4f32:
2852     case MVT::v4i32: Opc = ARM::VZIPq32; break;
2853     }
2854     SDValue Pred = getAL(CurDAG);
2855     SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
2856     SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
2857     return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops, 4);
2858   }
2859   case ARMISD::VUZP: {
2860     unsigned Opc = 0;
2861     EVT VT = N->getValueType(0);
2862     switch (VT.getSimpleVT().SimpleTy) {
2863     default: return NULL;
2864     case MVT::v8i8:  Opc = ARM::VUZPd8; break;
2865     case MVT::v4i16: Opc = ARM::VUZPd16; break;
2866     case MVT::v2f32:
2867     // vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
2868     case MVT::v2i32: Opc = ARM::VTRNd32; break;
2869     case MVT::v16i8: Opc = ARM::VUZPq8; break;
2870     case MVT::v8i16: Opc = ARM::VUZPq16; break;
2871     case MVT::v4f32:
2872     case MVT::v4i32: Opc = ARM::VUZPq32; break;
2873     }
2874     SDValue Pred = getAL(CurDAG);
2875     SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
2876     SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
2877     return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops, 4);
2878   }
2879   case ARMISD::VTRN: {
2880     unsigned Opc = 0;
2881     EVT VT = N->getValueType(0);
2882     switch (VT.getSimpleVT().SimpleTy) {
2883     default: return NULL;
2884     case MVT::v8i8:  Opc = ARM::VTRNd8; break;
2885     case MVT::v4i16: Opc = ARM::VTRNd16; break;
2886     case MVT::v2f32:
2887     case MVT::v2i32: Opc = ARM::VTRNd32; break;
2888     case MVT::v16i8: Opc = ARM::VTRNq8; break;
2889     case MVT::v8i16: Opc = ARM::VTRNq16; break;
2890     case MVT::v4f32:
2891     case MVT::v4i32: Opc = ARM::VTRNq32; break;
2892     }
2893     SDValue Pred = getAL(CurDAG);
2894     SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
2895     SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
2896     return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops, 4);
2897   }
2898   case ARMISD::BUILD_VECTOR: {
2899     EVT VecVT = N->getValueType(0);
2900     EVT EltVT = VecVT.getVectorElementType();
2901     unsigned NumElts = VecVT.getVectorNumElements();
2902     if (EltVT == MVT::f64) {
2903       assert(NumElts == 2 && "unexpected type for BUILD_VECTOR");
2904       return createDRegPairNode(VecVT, N->getOperand(0), N->getOperand(1));
2905     }
2906     assert(EltVT == MVT::f32 && "unexpected type for BUILD_VECTOR");
2907     if (NumElts == 2)
2908       return createSRegPairNode(VecVT, N->getOperand(0), N->getOperand(1));
2909     assert(NumElts == 4 && "unexpected type for BUILD_VECTOR");
2910     return createQuadSRegsNode(VecVT, N->getOperand(0), N->getOperand(1),
2911                      N->getOperand(2), N->getOperand(3));
2912   }
2913
2914   case ARMISD::VLD2DUP: {
2915     static const uint16_t Opcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
2916                                         ARM::VLD2DUPd32 };
2917     return SelectVLDDup(N, false, 2, Opcodes);
2918   }
2919
2920   case ARMISD::VLD3DUP: {
2921     static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo,
2922                                         ARM::VLD3DUPd16Pseudo,
2923                                         ARM::VLD3DUPd32Pseudo };
2924     return SelectVLDDup(N, false, 3, Opcodes);
2925   }
2926
2927   case ARMISD::VLD4DUP: {
2928     static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo,
2929                                         ARM::VLD4DUPd16Pseudo,
2930                                         ARM::VLD4DUPd32Pseudo };
2931     return SelectVLDDup(N, false, 4, Opcodes);
2932   }
2933
2934   case ARMISD::VLD2DUP_UPD: {
2935     static const uint16_t Opcodes[] = { ARM::VLD2DUPd8wb_fixed,
2936                                         ARM::VLD2DUPd16wb_fixed,
2937                                         ARM::VLD2DUPd32wb_fixed };
2938     return SelectVLDDup(N, true, 2, Opcodes);
2939   }
2940
2941   case ARMISD::VLD3DUP_UPD: {
2942     static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo_UPD,
2943                                         ARM::VLD3DUPd16Pseudo_UPD,
2944                                         ARM::VLD3DUPd32Pseudo_UPD };
2945     return SelectVLDDup(N, true, 3, Opcodes);
2946   }
2947
2948   case ARMISD::VLD4DUP_UPD: {
2949     static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo_UPD,
2950                                         ARM::VLD4DUPd16Pseudo_UPD,
2951                                         ARM::VLD4DUPd32Pseudo_UPD };
2952     return SelectVLDDup(N, true, 4, Opcodes);
2953   }
2954
2955   case ARMISD::VLD1_UPD: {
2956     static const uint16_t DOpcodes[] = { ARM::VLD1d8wb_fixed,
2957                                          ARM::VLD1d16wb_fixed,
2958                                          ARM::VLD1d32wb_fixed,
2959                                          ARM::VLD1d64wb_fixed };
2960     static const uint16_t QOpcodes[] = { ARM::VLD1q8wb_fixed,
2961                                          ARM::VLD1q16wb_fixed,
2962                                          ARM::VLD1q32wb_fixed,
2963                                          ARM::VLD1q64wb_fixed };
2964     return SelectVLD(N, true, 1, DOpcodes, QOpcodes, 0);
2965   }
2966
2967   case ARMISD::VLD2_UPD: {
2968     static const uint16_t DOpcodes[] = { ARM::VLD2d8wb_fixed,
2969                                          ARM::VLD2d16wb_fixed,
2970                                          ARM::VLD2d32wb_fixed,
2971                                          ARM::VLD1q64wb_fixed};
2972     static const uint16_t QOpcodes[] = { ARM::VLD2q8PseudoWB_fixed,
2973                                          ARM::VLD2q16PseudoWB_fixed,
2974                                          ARM::VLD2q32PseudoWB_fixed };
2975     return SelectVLD(N, true, 2, DOpcodes, QOpcodes, 0);
2976   }
2977
2978   case ARMISD::VLD3_UPD: {
2979     static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo_UPD,
2980                                          ARM::VLD3d16Pseudo_UPD,
2981                                          ARM::VLD3d32Pseudo_UPD,
2982                                          ARM::VLD1q64wb_fixed};
2983     static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
2984                                           ARM::VLD3q16Pseudo_UPD,
2985                                           ARM::VLD3q32Pseudo_UPD };
2986     static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD,
2987                                           ARM::VLD3q16oddPseudo_UPD,
2988                                           ARM::VLD3q32oddPseudo_UPD };
2989     return SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
2990   }
2991
2992   case ARMISD::VLD4_UPD: {
2993     static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo_UPD,
2994                                          ARM::VLD4d16Pseudo_UPD,
2995                                          ARM::VLD4d32Pseudo_UPD,
2996                                          ARM::VLD1q64wb_fixed};
2997     static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
2998                                           ARM::VLD4q16Pseudo_UPD,
2999                                           ARM::VLD4q32Pseudo_UPD };
3000     static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo_UPD,
3001                                           ARM::VLD4q16oddPseudo_UPD,
3002                                           ARM::VLD4q32oddPseudo_UPD };
3003     return SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
3004   }
3005
3006   case ARMISD::VLD2LN_UPD: {
3007     static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo_UPD,
3008                                          ARM::VLD2LNd16Pseudo_UPD,
3009                                          ARM::VLD2LNd32Pseudo_UPD };
3010     static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo_UPD,
3011                                          ARM::VLD2LNq32Pseudo_UPD };
3012     return SelectVLDSTLane(N, true, true, 2, DOpcodes, QOpcodes);
3013   }
3014
3015   case ARMISD::VLD3LN_UPD: {
3016     static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo_UPD,
3017                                          ARM::VLD3LNd16Pseudo_UPD,
3018                                          ARM::VLD3LNd32Pseudo_UPD };
3019     static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo_UPD,
3020                                          ARM::VLD3LNq32Pseudo_UPD };
3021     return SelectVLDSTLane(N, true, true, 3, DOpcodes, QOpcodes);
3022   }
3023
3024   case ARMISD::VLD4LN_UPD: {
3025     static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo_UPD,
3026                                          ARM::VLD4LNd16Pseudo_UPD,
3027                                          ARM::VLD4LNd32Pseudo_UPD };
3028     static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo_UPD,
3029                                          ARM::VLD4LNq32Pseudo_UPD };
3030     return SelectVLDSTLane(N, true, true, 4, DOpcodes, QOpcodes);
3031   }
3032
3033   case ARMISD::VST1_UPD: {
3034     static const uint16_t DOpcodes[] = { ARM::VST1d8wb_fixed,
3035                                          ARM::VST1d16wb_fixed,
3036                                          ARM::VST1d32wb_fixed,
3037                                          ARM::VST1d64wb_fixed };
3038     static const uint16_t QOpcodes[] = { ARM::VST1q8wb_fixed,
3039                                          ARM::VST1q16wb_fixed,
3040                                          ARM::VST1q32wb_fixed,
3041                                          ARM::VST1q64wb_fixed };
3042     return SelectVST(N, true, 1, DOpcodes, QOpcodes, 0);
3043   }
3044
3045   case ARMISD::VST2_UPD: {
3046     static const uint16_t DOpcodes[] = { ARM::VST2d8wb_fixed,
3047                                          ARM::VST2d16wb_fixed,
3048                                          ARM::VST2d32wb_fixed,
3049                                          ARM::VST1q64wb_fixed};
3050     static const uint16_t QOpcodes[] = { ARM::VST2q8PseudoWB_fixed,
3051                                          ARM::VST2q16PseudoWB_fixed,
3052                                          ARM::VST2q32PseudoWB_fixed };
3053     return SelectVST(N, true, 2, DOpcodes, QOpcodes, 0);
3054   }
3055
3056   case ARMISD::VST3_UPD: {
3057     static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo_UPD,
3058                                          ARM::VST3d16Pseudo_UPD,
3059                                          ARM::VST3d32Pseudo_UPD,
3060                                          ARM::VST1d64TPseudoWB_fixed};
3061     static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
3062                                           ARM::VST3q16Pseudo_UPD,
3063                                           ARM::VST3q32Pseudo_UPD };
3064     static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD,
3065                                           ARM::VST3q16oddPseudo_UPD,
3066                                           ARM::VST3q32oddPseudo_UPD };
3067     return SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
3068   }
3069
3070   case ARMISD::VST4_UPD: {
3071     static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo_UPD,
3072                                          ARM::VST4d16Pseudo_UPD,
3073                                          ARM::VST4d32Pseudo_UPD,
3074                                          ARM::VST1d64QPseudoWB_fixed};
3075     static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
3076                                           ARM::VST4q16Pseudo_UPD,
3077                                           ARM::VST4q32Pseudo_UPD };
3078     static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo_UPD,
3079                                           ARM::VST4q16oddPseudo_UPD,
3080                                           ARM::VST4q32oddPseudo_UPD };
3081     return SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
3082   }
3083
3084   case ARMISD::VST2LN_UPD: {
3085     static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo_UPD,
3086                                          ARM::VST2LNd16Pseudo_UPD,
3087                                          ARM::VST2LNd32Pseudo_UPD };
3088     static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo_UPD,
3089                                          ARM::VST2LNq32Pseudo_UPD };
3090     return SelectVLDSTLane(N, false, true, 2, DOpcodes, QOpcodes);
3091   }
3092
3093   case ARMISD::VST3LN_UPD: {
3094     static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo_UPD,
3095                                          ARM::VST3LNd16Pseudo_UPD,
3096                                          ARM::VST3LNd32Pseudo_UPD };
3097     static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo_UPD,
3098                                          ARM::VST3LNq32Pseudo_UPD };
3099     return SelectVLDSTLane(N, false, true, 3, DOpcodes, QOpcodes);
3100   }
3101
3102   case ARMISD::VST4LN_UPD: {
3103     static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo_UPD,
3104                                          ARM::VST4LNd16Pseudo_UPD,
3105                                          ARM::VST4LNd32Pseudo_UPD };
3106     static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo_UPD,
3107                                          ARM::VST4LNq32Pseudo_UPD };
3108     return SelectVLDSTLane(N, false, true, 4, DOpcodes, QOpcodes);
3109   }
3110
3111   case ISD::INTRINSIC_VOID:
3112   case ISD::INTRINSIC_W_CHAIN: {
3113     unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
3114     switch (IntNo) {
3115     default:
3116       break;
3117
3118     case Intrinsic::arm_ldrexd: {
3119       SDValue MemAddr = N->getOperand(2);
3120       DebugLoc dl = N->getDebugLoc();
3121       SDValue Chain = N->getOperand(0);
3122
3123       bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2();
3124       unsigned NewOpc = isThumb ? ARM::t2LDREXD :ARM::LDREXD;
3125
3126       // arm_ldrexd returns a i64 value in {i32, i32}
3127       std::vector<EVT> ResTys;
3128       if (isThumb) {
3129         ResTys.push_back(MVT::i32);
3130         ResTys.push_back(MVT::i32);
3131       } else
3132         ResTys.push_back(MVT::Untyped);
3133       ResTys.push_back(MVT::Other);
3134
3135       // Place arguments in the right order.
3136       SmallVector<SDValue, 7> Ops;
3137       Ops.push_back(MemAddr);
3138       Ops.push_back(getAL(CurDAG));
3139       Ops.push_back(CurDAG->getRegister(0, MVT::i32));
3140       Ops.push_back(Chain);
3141       SDNode *Ld = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops.data(),
3142                                           Ops.size());
3143       // Transfer memoperands.
3144       MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
3145       MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
3146       cast<MachineSDNode>(Ld)->setMemRefs(MemOp, MemOp + 1);
3147
3148       // Remap uses.
3149       SDValue Glue = isThumb ? SDValue(Ld, 2) : SDValue(Ld, 1);
3150       if (!SDValue(N, 0).use_empty()) {
3151         SDValue Result;
3152         if (isThumb)
3153           Result = SDValue(Ld, 0);
3154         else {
3155           SDValue SubRegIdx = CurDAG->getTargetConstant(ARM::gsub_0, MVT::i32);
3156           SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
3157               dl, MVT::i32, MVT::Glue, SDValue(Ld, 0), SubRegIdx, Glue);
3158           Result = SDValue(ResNode,0);
3159           Glue = Result.getValue(1);
3160         }
3161         ReplaceUses(SDValue(N, 0), Result);
3162       }
3163       if (!SDValue(N, 1).use_empty()) {
3164         SDValue Result;
3165         if (isThumb)
3166           Result = SDValue(Ld, 1);
3167         else {
3168           SDValue SubRegIdx = CurDAG->getTargetConstant(ARM::gsub_1, MVT::i32);
3169           SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
3170               dl, MVT::i32, MVT::Glue, SDValue(Ld, 0), SubRegIdx, Glue);
3171           Result = SDValue(ResNode,0);
3172           Glue = Result.getValue(1);
3173         }
3174         ReplaceUses(SDValue(N, 1), Result);
3175       }
3176       ReplaceUses(SDValue(N, 2), Glue);
3177       return NULL;
3178     }
3179
3180     case Intrinsic::arm_strexd: {
3181       DebugLoc dl = N->getDebugLoc();
3182       SDValue Chain = N->getOperand(0);
3183       SDValue Val0 = N->getOperand(2);
3184       SDValue Val1 = N->getOperand(3);
3185       SDValue MemAddr = N->getOperand(4);
3186
3187       // Store exclusive double return a i32 value which is the return status
3188       // of the issued store.
3189       std::vector<EVT> ResTys;
3190       ResTys.push_back(MVT::i32);
3191       ResTys.push_back(MVT::Other);
3192
3193       bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2();
3194       // Place arguments in the right order.
3195       SmallVector<SDValue, 7> Ops;
3196       if (isThumb) {
3197         Ops.push_back(Val0);
3198         Ops.push_back(Val1);
3199       } else
3200         // arm_strexd uses GPRPair.
3201         Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, Val0, Val1), 0));
3202       Ops.push_back(MemAddr);
3203       Ops.push_back(getAL(CurDAG));
3204       Ops.push_back(CurDAG->getRegister(0, MVT::i32));
3205       Ops.push_back(Chain);
3206
3207       unsigned NewOpc = isThumb ? ARM::t2STREXD : ARM::STREXD;
3208
3209       SDNode *St = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops.data(),
3210                                           Ops.size());
3211       // Transfer memoperands.
3212       MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
3213       MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
3214       cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1);
3215
3216       return St;
3217     }
3218
3219     case Intrinsic::arm_neon_vld1: {
3220       static const uint16_t DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16,
3221                                            ARM::VLD1d32, ARM::VLD1d64 };
3222       static const uint16_t QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
3223                                            ARM::VLD1q32, ARM::VLD1q64};
3224       return SelectVLD(N, false, 1, DOpcodes, QOpcodes, 0);
3225     }
3226
3227     case Intrinsic::arm_neon_vld2: {
3228       static const uint16_t DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16,
3229                                            ARM::VLD2d32, ARM::VLD1q64 };
3230       static const uint16_t QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo,
3231                                            ARM::VLD2q32Pseudo };
3232       return SelectVLD(N, false, 2, DOpcodes, QOpcodes, 0);
3233     }
3234
3235     case Intrinsic::arm_neon_vld3: {
3236       static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo,
3237                                            ARM::VLD3d16Pseudo,
3238                                            ARM::VLD3d32Pseudo,
3239                                            ARM::VLD1d64TPseudo };
3240       static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
3241                                             ARM::VLD3q16Pseudo_UPD,
3242                                             ARM::VLD3q32Pseudo_UPD };
3243       static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo,
3244                                             ARM::VLD3q16oddPseudo,
3245                                             ARM::VLD3q32oddPseudo };
3246       return SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
3247     }
3248
3249     case Intrinsic::arm_neon_vld4: {
3250       static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo,
3251                                            ARM::VLD4d16Pseudo,
3252                                            ARM::VLD4d32Pseudo,
3253                                            ARM::VLD1d64QPseudo };
3254       static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
3255                                             ARM::VLD4q16Pseudo_UPD,
3256                                             ARM::VLD4q32Pseudo_UPD };
3257       static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo,
3258                                             ARM::VLD4q16oddPseudo,
3259                                             ARM::VLD4q32oddPseudo };
3260       return SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
3261     }
3262
3263     case Intrinsic::arm_neon_vld2lane: {
3264       static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo,
3265                                            ARM::VLD2LNd16Pseudo,
3266                                            ARM::VLD2LNd32Pseudo };
3267       static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo,
3268                                            ARM::VLD2LNq32Pseudo };
3269       return SelectVLDSTLane(N, true, false, 2, DOpcodes, QOpcodes);
3270     }
3271
3272     case Intrinsic::arm_neon_vld3lane: {
3273       static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo,
3274                                            ARM::VLD3LNd16Pseudo,
3275                                            ARM::VLD3LNd32Pseudo };
3276       static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo,
3277                                            ARM::VLD3LNq32Pseudo };
3278       return SelectVLDSTLane(N, true, false, 3, DOpcodes, QOpcodes);
3279     }
3280
3281     case Intrinsic::arm_neon_vld4lane: {
3282       static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo,
3283                                            ARM::VLD4LNd16Pseudo,
3284                                            ARM::VLD4LNd32Pseudo };
3285       static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo,
3286                                            ARM::VLD4LNq32Pseudo };
3287       return SelectVLDSTLane(N, true, false, 4, DOpcodes, QOpcodes);
3288     }
3289
3290     case Intrinsic::arm_neon_vst1: {
3291       static const uint16_t DOpcodes[] = { ARM::VST1d8, ARM::VST1d16,
3292                                            ARM::VST1d32, ARM::VST1d64 };
3293       static const uint16_t QOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
3294                                            ARM::VST1q32, ARM::VST1q64 };
3295       return SelectVST(N, false, 1, DOpcodes, QOpcodes, 0);
3296     }
3297
3298     case Intrinsic::arm_neon_vst2: {
3299       static const uint16_t DOpcodes[] = { ARM::VST2d8, ARM::VST2d16,
3300                                            ARM::VST2d32, ARM::VST1q64 };
3301       static uint16_t QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo,
3302                                      ARM::VST2q32Pseudo };
3303       return SelectVST(N, false, 2, DOpcodes, QOpcodes, 0);
3304     }
3305
3306     case Intrinsic::arm_neon_vst3: {
3307       static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo,
3308                                            ARM::VST3d16Pseudo,
3309                                            ARM::VST3d32Pseudo,
3310                                            ARM::VST1d64TPseudo };
3311       static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
3312                                             ARM::VST3q16Pseudo_UPD,
3313                                             ARM::VST3q32Pseudo_UPD };
3314       static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo,
3315                                             ARM::VST3q16oddPseudo,
3316                                             ARM::VST3q32oddPseudo };
3317       return SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
3318     }
3319
3320     case Intrinsic::arm_neon_vst4: {
3321       static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo,
3322                                            ARM::VST4d16Pseudo,
3323                                            ARM::VST4d32Pseudo,
3324                                            ARM::VST1d64QPseudo };
3325       static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
3326                                             ARM::VST4q16Pseudo_UPD,
3327                                             ARM::VST4q32Pseudo_UPD };
3328       static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo,
3329                                             ARM::VST4q16oddPseudo,
3330                                             ARM::VST4q32oddPseudo };
3331       return SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
3332     }
3333
3334     case Intrinsic::arm_neon_vst2lane: {
3335       static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo,
3336                                            ARM::VST2LNd16Pseudo,
3337                                            ARM::VST2LNd32Pseudo };
3338       static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo,
3339                                            ARM::VST2LNq32Pseudo };
3340       return SelectVLDSTLane(N, false, false, 2, DOpcodes, QOpcodes);
3341     }
3342
3343     case Intrinsic::arm_neon_vst3lane: {
3344       static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo,
3345                                            ARM::VST3LNd16Pseudo,
3346                                            ARM::VST3LNd32Pseudo };
3347       static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo,
3348                                            ARM::VST3LNq32Pseudo };
3349       return SelectVLDSTLane(N, false, false, 3, DOpcodes, QOpcodes);
3350     }
3351
3352     case Intrinsic::arm_neon_vst4lane: {
3353       static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo,
3354                                            ARM::VST4LNd16Pseudo,
3355                                            ARM::VST4LNd32Pseudo };
3356       static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo,
3357                                            ARM::VST4LNq32Pseudo };
3358       return SelectVLDSTLane(N, false, false, 4, DOpcodes, QOpcodes);
3359     }
3360     }
3361     break;
3362   }
3363
3364   case ISD::INTRINSIC_WO_CHAIN: {
3365     unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
3366     switch (IntNo) {
3367     default:
3368       break;
3369
3370     case Intrinsic::arm_neon_vtbl2:
3371       return SelectVTBL(N, false, 2, ARM::VTBL2);
3372     case Intrinsic::arm_neon_vtbl3:
3373       return SelectVTBL(N, false, 3, ARM::VTBL3Pseudo);
3374     case Intrinsic::arm_neon_vtbl4:
3375       return SelectVTBL(N, false, 4, ARM::VTBL4Pseudo);
3376
3377     case Intrinsic::arm_neon_vtbx2:
3378       return SelectVTBL(N, true, 2, ARM::VTBX2);
3379     case Intrinsic::arm_neon_vtbx3:
3380       return SelectVTBL(N, true, 3, ARM::VTBX3Pseudo);
3381     case Intrinsic::arm_neon_vtbx4:
3382       return SelectVTBL(N, true, 4, ARM::VTBX4Pseudo);
3383     }
3384     break;
3385   }
3386
3387   case ARMISD::VTBL1: {
3388     DebugLoc dl = N->getDebugLoc();
3389     EVT VT = N->getValueType(0);
3390     SmallVector<SDValue, 6> Ops;
3391
3392     Ops.push_back(N->getOperand(0));
3393     Ops.push_back(N->getOperand(1));
3394     Ops.push_back(getAL(CurDAG));                    // Predicate
3395     Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // Predicate Register
3396     return CurDAG->getMachineNode(ARM::VTBL1, dl, VT, Ops.data(), Ops.size());
3397   }
3398   case ARMISD::VTBL2: {
3399     DebugLoc dl = N->getDebugLoc();
3400     EVT VT = N->getValueType(0);
3401
3402     // Form a REG_SEQUENCE to force register allocation.
3403     SDValue V0 = N->getOperand(0);
3404     SDValue V1 = N->getOperand(1);
3405     SDValue RegSeq = SDValue(createDRegPairNode(MVT::v16i8, V0, V1), 0);
3406
3407     SmallVector<SDValue, 6> Ops;
3408     Ops.push_back(RegSeq);
3409     Ops.push_back(N->getOperand(2));
3410     Ops.push_back(getAL(CurDAG));                    // Predicate
3411     Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // Predicate Register
3412     return CurDAG->getMachineNode(ARM::VTBL2, dl, VT,
3413                                   Ops.data(), Ops.size());
3414   }
3415
3416   case ISD::CONCAT_VECTORS:
3417     return SelectConcatVector(N);
3418
3419   case ARMISD::ATOMOR64_DAG:
3420     return SelectAtomic64(N, ARM::ATOMOR6432);
3421   case ARMISD::ATOMXOR64_DAG:
3422     return SelectAtomic64(N, ARM::ATOMXOR6432);
3423   case ARMISD::ATOMADD64_DAG:
3424     return SelectAtomic64(N, ARM::ATOMADD6432);
3425   case ARMISD::ATOMSUB64_DAG:
3426     return SelectAtomic64(N, ARM::ATOMSUB6432);
3427   case ARMISD::ATOMNAND64_DAG:
3428     return SelectAtomic64(N, ARM::ATOMNAND6432);
3429   case ARMISD::ATOMAND64_DAG:
3430     return SelectAtomic64(N, ARM::ATOMAND6432);
3431   case ARMISD::ATOMSWAP64_DAG:
3432     return SelectAtomic64(N, ARM::ATOMSWAP6432);
3433   case ARMISD::ATOMCMPXCHG64_DAG:
3434     return SelectAtomic64(N, ARM::ATOMCMPXCHG6432);
3435
3436   case ARMISD::ATOMMIN64_DAG:
3437     return SelectAtomic64(N, ARM::ATOMMIN6432);
3438   case ARMISD::ATOMUMIN64_DAG:
3439     return SelectAtomic64(N, ARM::ATOMUMIN6432);
3440   case ARMISD::ATOMMAX64_DAG:
3441     return SelectAtomic64(N, ARM::ATOMMAX6432);
3442   case ARMISD::ATOMUMAX64_DAG:
3443     return SelectAtomic64(N, ARM::ATOMUMAX6432);
3444   }
3445
3446   return SelectCode(N);
3447 }
3448
3449 bool ARMDAGToDAGISel::
3450 SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
3451                              std::vector<SDValue> &OutOps) {
3452   assert(ConstraintCode == 'm' && "unexpected asm memory constraint");
3453   // Require the address to be in a register.  That is safe for all ARM
3454   // variants and it is hard to do anything much smarter without knowing
3455   // how the operand is used.
3456   OutOps.push_back(Op);
3457   return false;
3458 }
3459
3460 /// createARMISelDag - This pass converts a legalized DAG into a
3461 /// ARM-specific DAG, ready for instruction scheduling.
3462 ///
3463 FunctionPass *llvm::createARMISelDag(ARMBaseTargetMachine &TM,
3464                                      CodeGenOpt::Level OptLevel) {
3465   return new ARMDAGToDAGISel(TM, OptLevel);
3466 }