lib/Target/ARM/ARMISelDAGToDAG.cpp

   1 //===-- ARMISelDAGToDAG.cpp - A dag to dag inst selector for ARM ----------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file defines an instruction selector for the ARM target.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #define DEBUG_TYPE "arm-isel"
  15 #include "ARM.h"
  16 #include "ARMBaseInstrInfo.h"
  17 #include "ARMTargetMachine.h"
  18 #include "MCTargetDesc/ARMAddressingModes.h"
  19 #include "llvm/CallingConv.h"
  20 #include "llvm/Constants.h"
  21 #include "llvm/DerivedTypes.h"
  22 #include "llvm/Function.h"
  23 #include "llvm/Intrinsics.h"
  24 #include "llvm/LLVMContext.h"
  25 #include "llvm/CodeGen/MachineFrameInfo.h"
  26 #include "llvm/CodeGen/MachineFunction.h"
  27 #include "llvm/CodeGen/MachineInstrBuilder.h"
  28 #include "llvm/CodeGen/SelectionDAG.h"
  29 #include "llvm/CodeGen/SelectionDAGISel.h"
  30 #include "llvm/Target/TargetLowering.h"
  31 #include "llvm/Target/TargetOptions.h"
  32 #include "llvm/Support/CommandLine.h"
  33 #include "llvm/Support/Compiler.h"
  34 #include "llvm/Support/Debug.h"
  35 #include "llvm/Support/ErrorHandling.h"
  36 #include "llvm/Support/raw_ostream.h"
  37
  38 using namespace llvm;
  39
  40 static cl::opt<bool>
  41 DisableShifterOp("disable-shifter-op", cl::Hidden,
  42   cl::desc("Disable isel of shifter-op"),
  43   cl::init(false));
  44
  45 static cl::opt<bool>
  46 CheckVMLxHazard("check-vmlx-hazard", cl::Hidden,
  47   cl::desc("Check fp vmla / vmls hazard at isel time"),
  48   cl::init(true));
  49
  50 //===--------------------------------------------------------------------===//
  51 /// ARMDAGToDAGISel - ARM specific code to select ARM machine
  52 /// instructions for SelectionDAG operations.
  53 ///
  54 namespace {
  55
  56 enum AddrMode2Type {
  57   AM2_BASE, // Simple AM2 (+-imm12)
  58   AM2_SHOP  // Shifter-op AM2
  59 };
  60
  61 class ARMDAGToDAGISel : public SelectionDAGISel {
  62   ARMBaseTargetMachine &TM;
  63   const ARMBaseInstrInfo *TII;
  64
  65   /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
  66   /// make the right decision when generating code for different targets.
  67   const ARMSubtarget *Subtarget;
  68
  69 public:
  70   explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm,
  71                            CodeGenOpt::Level OptLevel)
  72     : SelectionDAGISel(tm, OptLevel), TM(tm),
  73       TII(static_cast<const ARMBaseInstrInfo*>(TM.getInstrInfo())),
  74       Subtarget(&TM.getSubtarget<ARMSubtarget>()) {
  75   }
  76
  77   virtual const char *getPassName() const {
  78     return "ARM Instruction Selection";
  79   }
  80
  81   /// getI32Imm - Return a target constant of type i32 with the specified
  82   /// value.
  83   inline SDValue getI32Imm(unsigned Imm) {
  84     return CurDAG->getTargetConstant(Imm, MVT::i32);
  85   }
  86
  87   SDNode *Select(SDNode *N);
  88
  89
  90   bool hasNoVMLxHazardUse(SDNode *N) const;
  91   bool isShifterOpProfitable(const SDValue &Shift,
  92                              ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt);
  93   bool SelectRegShifterOperand(SDValue N, SDValue &A,
  94                                SDValue &B, SDValue &C,
  95                                bool CheckProfitability = true);
  96   bool SelectImmShifterOperand(SDValue N, SDValue &A,
  97                                SDValue &B, bool CheckProfitability = true);
  98   bool SelectShiftRegShifterOperand(SDValue N, SDValue &A,
  99                                     SDValue &B, SDValue &C) {
 100     // Don't apply the profitability check
 101     return SelectRegShifterOperand(N, A, B, C, false);
 102   }
 103   bool SelectShiftImmShifterOperand(SDValue N, SDValue &A,
 104                                     SDValue &B) {
 105     // Don't apply the profitability check
 106     return SelectImmShifterOperand(N, A, B, false);
 107   }
 108
 109   bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
 110   bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc);
 111
 112   AddrMode2Type SelectAddrMode2Worker(SDValue N, SDValue &Base,
 113                                       SDValue &Offset, SDValue &Opc);
 114   bool SelectAddrMode2Base(SDValue N, SDValue &Base, SDValue &Offset,
 115                            SDValue &Opc) {
 116     return SelectAddrMode2Worker(N, Base, Offset, Opc) == AM2_BASE;
 117   }
 118
 119   bool SelectAddrMode2ShOp(SDValue N, SDValue &Base, SDValue &Offset,
 120                            SDValue &Opc) {
 121     return SelectAddrMode2Worker(N, Base, Offset, Opc) == AM2_SHOP;
 122   }
 123
 124   bool SelectAddrMode2(SDValue N, SDValue &Base, SDValue &Offset,
 125                        SDValue &Opc) {
 126     SelectAddrMode2Worker(N, Base, Offset, Opc);
 127 //    return SelectAddrMode2ShOp(N, Base, Offset, Opc);
 128     // This always matches one way or another.
 129     return true;
 130   }
 131
 132   bool SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
 133                              SDValue &Offset, SDValue &Opc);
 134   bool SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
 135                              SDValue &Offset, SDValue &Opc);
 136   bool SelectAddrOffsetNone(SDValue N, SDValue &Base);
 137   bool SelectAddrMode3(SDValue N, SDValue &Base,
 138                        SDValue &Offset, SDValue &Opc);
 139   bool SelectAddrMode3Offset(SDNode *Op, SDValue N,
 140                              SDValue &Offset, SDValue &Opc);
 141   bool SelectAddrMode5(SDValue N, SDValue &Base,
 142                        SDValue &Offset);
 143   bool SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,SDValue &Align);
 144   bool SelectAddrMode6Offset(SDNode *Op, SDValue N, SDValue &Offset);
 145
 146   bool SelectAddrModePC(SDValue N, SDValue &Offset, SDValue &Label);
 147
 148   // Thumb Addressing Modes:
 149   bool SelectThumbAddrModeRR(SDValue N, SDValue &Base, SDValue &Offset);
 150   bool SelectThumbAddrModeRI(SDValue N, SDValue &Base, SDValue &Offset,
 151                              unsigned Scale);
 152   bool SelectThumbAddrModeRI5S1(SDValue N, SDValue &Base, SDValue &Offset);
 153   bool SelectThumbAddrModeRI5S2(SDValue N, SDValue &Base, SDValue &Offset);
 154   bool SelectThumbAddrModeRI5S4(SDValue N, SDValue &Base, SDValue &Offset);
 155   bool SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, SDValue &Base,
 156                                 SDValue &OffImm);
 157   bool SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
 158                                  SDValue &OffImm);
 159   bool SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
 160                                  SDValue &OffImm);
 161   bool SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
 162                                  SDValue &OffImm);
 163   bool SelectThumbAddrModeSP(SDValue N, SDValue &Base, SDValue &OffImm);
 164
 165   // Thumb 2 Addressing Modes:
 166   bool SelectT2ShifterOperandReg(SDValue N,
 167                                  SDValue &BaseReg, SDValue &Opc);
 168   bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
 169   bool SelectT2AddrModeImm8(SDValue N, SDValue &Base,
 170                             SDValue &OffImm);
 171   bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
 172                                  SDValue &OffImm);
 173   bool SelectT2AddrModeSoReg(SDValue N, SDValue &Base,
 174                              SDValue &OffReg, SDValue &ShImm);
 175
 176   inline bool is_so_imm(unsigned Imm) const {
 177     return ARM_AM::getSOImmVal(Imm) != -1;
 178   }
 179
 180   inline bool is_so_imm_not(unsigned Imm) const {
 181     return ARM_AM::getSOImmVal(~Imm) != -1;
 182   }
 183
 184   inline bool is_t2_so_imm(unsigned Imm) const {
 185     return ARM_AM::getT2SOImmVal(Imm) != -1;
 186   }
 187
 188   inline bool is_t2_so_imm_not(unsigned Imm) const {
 189     return ARM_AM::getT2SOImmVal(~Imm) != -1;
 190   }
 191
 192   // Include the pieces autogenerated from the target description.
 193 #include "ARMGenDAGISel.inc"
 194
 195 private:
 196   /// SelectARMIndexedLoad - Indexed (pre/post inc/dec) load matching code for
 197   /// ARM.
 198   SDNode *SelectARMIndexedLoad(SDNode *N);
 199   SDNode *SelectT2IndexedLoad(SDNode *N);
 200
 201   /// SelectVLD - Select NEON load intrinsics.  NumVecs should be
 202   /// 1, 2, 3 or 4.  The opcode arrays specify the instructions used for
 203   /// loads of D registers and even subregs and odd subregs of Q registers.
 204   /// For NumVecs <= 2, QOpcodes1 is not used.
 205   SDNode *SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
 206                     unsigned *DOpcodes,
 207                     unsigned *QOpcodes0, unsigned *QOpcodes1);
 208
 209   /// SelectVST - Select NEON store intrinsics.  NumVecs should
 210   /// be 1, 2, 3 or 4.  The opcode arrays specify the instructions used for
 211   /// stores of D registers and even subregs and odd subregs of Q registers.
 212   /// For NumVecs <= 2, QOpcodes1 is not used.
 213   SDNode *SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
 214                     unsigned *DOpcodes,
 215                     unsigned *QOpcodes0, unsigned *QOpcodes1);
 216
 217   /// SelectVLDSTLane - Select NEON load/store lane intrinsics.  NumVecs should
 218   /// be 2, 3 or 4.  The opcode arrays specify the instructions used for
 219   /// load/store of D registers and Q registers.
 220   SDNode *SelectVLDSTLane(SDNode *N, bool IsLoad,
 221                           bool isUpdating, unsigned NumVecs,
 222                           unsigned *DOpcodes, unsigned *QOpcodes);
 223
 224   /// SelectVLDDup - Select NEON load-duplicate intrinsics.  NumVecs
 225   /// should be 2, 3 or 4.  The opcode array specifies the instructions used
 226   /// for loading D registers.  (Q registers are not supported.)
 227   SDNode *SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs,
 228                        unsigned *Opcodes);
 229
 230   /// SelectVTBL - Select NEON VTBL and VTBX intrinsics.  NumVecs should be 2,
 231   /// 3 or 4.  These are custom-selected so that a REG_SEQUENCE can be
 232   /// generated to force the table registers to be consecutive.
 233   SDNode *SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs, unsigned Opc);
 234
 235   /// SelectV6T2BitfieldExtractOp - Select SBFX/UBFX instructions for ARM.
 236   SDNode *SelectV6T2BitfieldExtractOp(SDNode *N, bool isSigned);
 237
 238   /// SelectCMOVOp - Select CMOV instructions for ARM.
 239   SDNode *SelectCMOVOp(SDNode *N);
 240   SDNode *SelectT2CMOVShiftOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
 241                               ARMCC::CondCodes CCVal, SDValue CCR,
 242                               SDValue InFlag);
 243   SDNode *SelectARMCMOVShiftOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
 244                                ARMCC::CondCodes CCVal, SDValue CCR,
 245                                SDValue InFlag);
 246   SDNode *SelectT2CMOVImmOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
 247                               ARMCC::CondCodes CCVal, SDValue CCR,
 248                               SDValue InFlag);
 249   SDNode *SelectARMCMOVImmOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
 250                                ARMCC::CondCodes CCVal, SDValue CCR,
 251                                SDValue InFlag);
 252
 253   SDNode *SelectConcatVector(SDNode *N);
 254
 255   /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
 256   /// inline asm expressions.
 257   virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
 258                                             char ConstraintCode,
 259                                             std::vector<SDValue> &OutOps);
 260
 261   // Form pairs of consecutive S, D, or Q registers.
 262   SDNode *PairSRegs(EVT VT, SDValue V0, SDValue V1);
 263   SDNode *PairDRegs(EVT VT, SDValue V0, SDValue V1);
 264   SDNode *PairQRegs(EVT VT, SDValue V0, SDValue V1);
 265
 266   // Form sequences of 4 consecutive S, D, or Q registers.
 267   SDNode *QuadSRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
 268   SDNode *QuadDRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
 269   SDNode *QuadQRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
 270
 271   // Get the alignment operand for a NEON VLD or VST instruction.
 272   SDValue GetVLDSTAlign(SDValue Align, unsigned NumVecs, bool is64BitVector);
 273 };
 274 }
 275
 276 /// isInt32Immediate - This method tests to see if the node is a 32-bit constant
 277 /// operand. If so Imm will receive the 32-bit value.
 278 static bool isInt32Immediate(SDNode *N, unsigned &Imm) {
 279   if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) {
 280     Imm = cast<ConstantSDNode>(N)->getZExtValue();
 281     return true;
 282   }
 283   return false;
 284 }
 285
 286 // isInt32Immediate - This method tests to see if a constant operand.
 287 // If so Imm will receive the 32 bit value.
 288 static bool isInt32Immediate(SDValue N, unsigned &Imm) {
 289   return isInt32Immediate(N.getNode(), Imm);
 290 }
 291
 292 // isOpcWithIntImmediate - This method tests to see if the node is a specific
 293 // opcode and that it has a immediate integer right operand.
 294 // If so Imm will receive the 32 bit value.
 295 static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
 296   return N->getOpcode() == Opc &&
 297          isInt32Immediate(N->getOperand(1).getNode(), Imm);
 298 }
 299
 300 /// \brief Check whether a particular node is a constant value representable as
 301 /// (N * Scale) where (N in [\arg RangeMin, \arg RangeMax).
 302 ///
 303 /// \param ScaledConstant [out] - On success, the pre-scaled constant value.
 304 static bool isScaledConstantInRange(SDValue Node, unsigned Scale,
 305                                     int RangeMin, int RangeMax,
 306                                     int &ScaledConstant) {
 307   assert(Scale && "Invalid scale!");
 308
 309   // Check that this is a constant.
 310   const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Node);
 311   if (!C)
 312     return false;
 313
 314   ScaledConstant = (int) C->getZExtValue();
 315   if ((ScaledConstant % Scale) != 0)
 316     return false;
 317
 318   ScaledConstant /= Scale;
 319   return ScaledConstant >= RangeMin && ScaledConstant < RangeMax;
 320 }
 321
 322 /// hasNoVMLxHazardUse - Return true if it's desirable to select a FP MLA / MLS
 323 /// node. VFP / NEON fp VMLA / VMLS instructions have special RAW hazards (at
 324 /// least on current ARM implementations) which should be avoidded.
 325 bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const {
 326   if (OptLevel == CodeGenOpt::None)
 327     return true;
 328
 329   if (!CheckVMLxHazard)
 330     return true;
 331
 332   if (!Subtarget->isCortexA8() && !Subtarget->isCortexA9())
 333     return true;
 334
 335   if (!N->hasOneUse())
 336     return false;
 337
 338   SDNode *Use = *N->use_begin();
 339   if (Use->getOpcode() == ISD::CopyToReg)
 340     return true;
 341   if (Use->isMachineOpcode()) {
 342     const MCInstrDesc &MCID = TII->get(Use->getMachineOpcode());
 343     if (MCID.mayStore())
 344       return true;
 345     unsigned Opcode = MCID.getOpcode();
 346     if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
 347       return true;
 348     // vmlx feeding into another vmlx. We actually want to unfold
 349     // the use later in the MLxExpansion pass. e.g.
 350     // vmla
 351     // vmla (stall 8 cycles)
 352     //
 353     // vmul (5 cycles)
 354     // vadd (5 cycles)
 355     // vmla
 356     // This adds up to about 18 - 19 cycles.
 357     //
 358     // vmla
 359     // vmul (stall 4 cycles)
 360     // vadd adds up to about 14 cycles.
 361     return TII->isFpMLxInstruction(Opcode);
 362   }
 363
 364   return false;
 365 }
 366
 367 bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift,
 368                                             ARM_AM::ShiftOpc ShOpcVal,
 369                                             unsigned ShAmt) {
 370   if (!Subtarget->isCortexA9())
 371     return true;
 372   if (Shift.hasOneUse())
 373     return true;
 374   // R << 2 is free.
 375   return ShOpcVal == ARM_AM::lsl && ShAmt == 2;
 376 }
 377
 378 bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N,
 379                                               SDValue &BaseReg,
 380                                               SDValue &Opc,
 381                                               bool CheckProfitability) {
 382   if (DisableShifterOp)
 383     return false;
 384
 385   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
 386
 387   // Don't match base register only case. That is matched to a separate
 388   // lower complexity pattern with explicit register operand.
 389   if (ShOpcVal == ARM_AM::no_shift) return false;
 390
 391   BaseReg = N.getOperand(0);
 392   unsigned ShImmVal = 0;
 393   ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
 394   if (!RHS) return false;
 395   ShImmVal = RHS->getZExtValue() & 31;
 396   Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
 397                                   MVT::i32);
 398   return true;
 399 }
 400
 401 bool ARMDAGToDAGISel::SelectRegShifterOperand(SDValue N,
 402                                               SDValue &BaseReg,
 403                                               SDValue &ShReg,
 404                                               SDValue &Opc,
 405                                               bool CheckProfitability) {
 406   if (DisableShifterOp)
 407     return false;
 408
 409   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
 410
 411   // Don't match base register only case. That is matched to a separate
 412   // lower complexity pattern with explicit register operand.
 413   if (ShOpcVal == ARM_AM::no_shift) return false;
 414
 415   BaseReg = N.getOperand(0);
 416   unsigned ShImmVal = 0;
 417   ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
 418   if (RHS) return false;
 419
 420   ShReg = N.getOperand(1);
 421   if (CheckProfitability && !isShifterOpProfitable(N, ShOpcVal, ShImmVal))
 422     return false;
 423   Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
 424                                   MVT::i32);
 425   return true;
 426 }
 427
 428
 429 bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,
 430                                           SDValue &Base,
 431                                           SDValue &OffImm) {
 432   // Match simple R + imm12 operands.
 433
 434   // Base only.
 435   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
 436       !CurDAG->isBaseWithConstantOffset(N)) {
 437     if (N.getOpcode() == ISD::FrameIndex) {
 438       // Match frame index.
 439       int FI = cast<FrameIndexSDNode>(N)->getIndex();
 440       Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
 441       OffImm  = CurDAG->getTargetConstant(0, MVT::i32);
 442       return true;
 443     }
 444
 445     if (N.getOpcode() == ARMISD::Wrapper &&
 446         !(Subtarget->useMovt() &&
 447                      N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) {
 448       Base = N.getOperand(0);
 449     } else
 450       Base = N;
 451     OffImm  = CurDAG->getTargetConstant(0, MVT::i32);
 452     return true;
 453   }
 454
 455   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
 456     int RHSC = (int)RHS->getZExtValue();
 457     if (N.getOpcode() == ISD::SUB)
 458       RHSC = -RHSC;
 459
 460     if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits (unsigned)
 461       Base   = N.getOperand(0);
 462       if (Base.getOpcode() == ISD::FrameIndex) {
 463         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
 464         Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
 465       }
 466       OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32);
 467       return true;
 468     }
 469   }
 470
 471   // Base only.
 472   Base = N;
 473   OffImm  = CurDAG->getTargetConstant(0, MVT::i32);
 474   return true;
 475 }
 476
 477
 478
 479 bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset,
 480                                       SDValue &Opc) {
 481   if (N.getOpcode() == ISD::MUL &&
 482       (!Subtarget->isCortexA9() || N.hasOneUse())) {
 483     if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
 484       // X * [3,5,9] -> X + X * [2,4,8] etc.
 485       int RHSC = (int)RHS->getZExtValue();
 486       if (RHSC & 1) {
 487         RHSC = RHSC & ~1;
 488         ARM_AM::AddrOpc AddSub = ARM_AM::add;
 489         if (RHSC < 0) {
 490           AddSub = ARM_AM::sub;
 491           RHSC = - RHSC;
 492         }
 493         if (isPowerOf2_32(RHSC)) {
 494           unsigned ShAmt = Log2_32(RHSC);
 495           Base = Offset = N.getOperand(0);
 496           Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt,
 497                                                             ARM_AM::lsl),
 498                                           MVT::i32);
 499           return true;
 500         }
 501       }
 502     }
 503   }
 504
 505   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
 506       // ISD::OR that is equivalent to an ISD::ADD.
 507       !CurDAG->isBaseWithConstantOffset(N))
 508     return false;
 509
 510   // Leave simple R +/- imm12 operands for LDRi12
 511   if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::OR) {
 512     int RHSC;
 513     if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
 514                                 -0x1000+1, 0x1000, RHSC)) // 12 bits.
 515       return false;
 516   }
 517
 518   if (Subtarget->isCortexA9() && !N.hasOneUse())
 519     // Compute R +/- (R << N) and reuse it.
 520     return false;
 521
 522   // Otherwise this is R +/- [possibly shifted] R.
 523   ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::SUB ? ARM_AM::sub:ARM_AM::add;
 524   ARM_AM::ShiftOpc ShOpcVal =
 525     ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode());
 526   unsigned ShAmt = 0;
 527
 528   Base   = N.getOperand(0);
 529   Offset = N.getOperand(1);
 530
 531   if (ShOpcVal != ARM_AM::no_shift) {
 532     // Check to see if the RHS of the shift is a constant, if not, we can't fold
 533     // it.
 534     if (ConstantSDNode *Sh =
 535            dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) {
 536       ShAmt = Sh->getZExtValue();
 537       if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt))
 538         Offset = N.getOperand(1).getOperand(0);
 539       else {
 540         ShAmt = 0;
 541         ShOpcVal = ARM_AM::no_shift;
 542       }
 543     } else {
 544       ShOpcVal = ARM_AM::no_shift;
 545     }
 546   }
 547
 548   // Try matching (R shl C) + (R).
 549   if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift &&
 550       !(Subtarget->isCortexA9() || N.getOperand(0).hasOneUse())) {
 551     ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode());
 552     if (ShOpcVal != ARM_AM::no_shift) {
 553       // Check to see if the RHS of the shift is a constant, if not, we can't
 554       // fold it.
 555       if (ConstantSDNode *Sh =
 556           dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) {
 557         ShAmt = Sh->getZExtValue();
 558         if (!Subtarget->isCortexA9() ||
 559             (N.hasOneUse() &&
 560              isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt))) {
 561           Offset = N.getOperand(0).getOperand(0);
 562           Base = N.getOperand(1);
 563         } else {
 564           ShAmt = 0;
 565           ShOpcVal = ARM_AM::no_shift;
 566         }
 567       } else {
 568         ShOpcVal = ARM_AM::no_shift;
 569       }
 570     }
 571   }
 572
 573   Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
 574                                   MVT::i32);
 575   return true;
 576 }
 577
 578
 579
 580
 581 //-----
 582
 583 AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N,
 584                                                      SDValue &Base,
 585                                                      SDValue &Offset,
 586                                                      SDValue &Opc) {
 587   if (N.getOpcode() == ISD::MUL &&
 588       (!Subtarget->isCortexA9() || N.hasOneUse())) {
 589     if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
 590       // X * [3,5,9] -> X + X * [2,4,8] etc.
 591       int RHSC = (int)RHS->getZExtValue();
 592       if (RHSC & 1) {
 593         RHSC = RHSC & ~1;
 594         ARM_AM::AddrOpc AddSub = ARM_AM::add;
 595         if (RHSC < 0) {
 596           AddSub = ARM_AM::sub;
 597           RHSC = - RHSC;
 598         }
 599         if (isPowerOf2_32(RHSC)) {
 600           unsigned ShAmt = Log2_32(RHSC);
 601           Base = Offset = N.getOperand(0);
 602           Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt,
 603                                                             ARM_AM::lsl),
 604                                           MVT::i32);
 605           return AM2_SHOP;
 606         }
 607       }
 608     }
 609   }
 610
 611   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
 612       // ISD::OR that is equivalent to an ADD.
 613       !CurDAG->isBaseWithConstantOffset(N)) {
 614     Base = N;
 615     if (N.getOpcode() == ISD::FrameIndex) {
 616       int FI = cast<FrameIndexSDNode>(N)->getIndex();
 617       Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
 618     } else if (N.getOpcode() == ARMISD::Wrapper &&
 619                !(Subtarget->useMovt() &&
 620                  N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) {
 621       Base = N.getOperand(0);
 622     }
 623     Offset = CurDAG->getRegister(0, MVT::i32);
 624     Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(ARM_AM::add, 0,
 625                                                       ARM_AM::no_shift),
 626                                     MVT::i32);
 627     return AM2_BASE;
 628   }
 629
 630   // Match simple R +/- imm12 operands.
 631   if (N.getOpcode() != ISD::SUB) {
 632     int RHSC;
 633     if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
 634                                 -0x1000+1, 0x1000, RHSC)) { // 12 bits.
 635       Base = N.getOperand(0);
 636       if (Base.getOpcode() == ISD::FrameIndex) {
 637         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
 638         Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
 639       }
 640       Offset = CurDAG->getRegister(0, MVT::i32);
 641
 642       ARM_AM::AddrOpc AddSub = ARM_AM::add;
 643       if (RHSC < 0) {
 644         AddSub = ARM_AM::sub;
 645         RHSC = - RHSC;
 646       }
 647       Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, RHSC,
 648                                                         ARM_AM::no_shift),
 649                                       MVT::i32);
 650       return AM2_BASE;
 651     }
 652   }
 653
 654   if (Subtarget->isCortexA9() && !N.hasOneUse()) {
 655     // Compute R +/- (R << N) and reuse it.
 656     Base = N;
 657     Offset = CurDAG->getRegister(0, MVT::i32);
 658     Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(ARM_AM::add, 0,
 659                                                       ARM_AM::no_shift),
 660                                     MVT::i32);
 661     return AM2_BASE;
 662   }
 663
 664   // Otherwise this is R +/- [possibly shifted] R.
 665   ARM_AM::AddrOpc AddSub = N.getOpcode() != ISD::SUB ? ARM_AM::add:ARM_AM::sub;
 666   ARM_AM::ShiftOpc ShOpcVal =
 667     ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode());
 668   unsigned ShAmt = 0;
 669
 670   Base   = N.getOperand(0);
 671   Offset = N.getOperand(1);
 672
 673   if (ShOpcVal != ARM_AM::no_shift) {
 674     // Check to see if the RHS of the shift is a constant, if not, we can't fold
 675     // it.
 676     if (ConstantSDNode *Sh =
 677            dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) {
 678       ShAmt = Sh->getZExtValue();
 679       if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt))
 680         Offset = N.getOperand(1).getOperand(0);
 681       else {
 682         ShAmt = 0;
 683         ShOpcVal = ARM_AM::no_shift;
 684       }
 685     } else {
 686       ShOpcVal = ARM_AM::no_shift;
 687     }
 688   }
 689
 690   // Try matching (R shl C) + (R).
 691   if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift &&
 692       !(Subtarget->isCortexA9() || N.getOperand(0).hasOneUse())) {
 693     ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode());
 694     if (ShOpcVal != ARM_AM::no_shift) {
 695       // Check to see if the RHS of the shift is a constant, if not, we can't
 696       // fold it.
 697       if (ConstantSDNode *Sh =
 698           dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) {
 699         ShAmt = Sh->getZExtValue();
 700         if (!Subtarget->isCortexA9() ||
 701             (N.hasOneUse() &&
 702              isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt))) {
 703           Offset = N.getOperand(0).getOperand(0);
 704           Base = N.getOperand(1);
 705         } else {
 706           ShAmt = 0;
 707           ShOpcVal = ARM_AM::no_shift;
 708         }
 709       } else {
 710         ShOpcVal = ARM_AM::no_shift;
 711       }
 712     }
 713   }
 714
 715   Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
 716                                   MVT::i32);
 717   return AM2_SHOP;
 718 }
 719
 720 bool ARMDAGToDAGISel::SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
 721                                             SDValue &Offset, SDValue &Opc) {
 722   unsigned Opcode = Op->getOpcode();
 723   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
 724     ? cast<LoadSDNode>(Op)->getAddressingMode()
 725     : cast<StoreSDNode>(Op)->getAddressingMode();
 726   ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
 727     ? ARM_AM::add : ARM_AM::sub;
 728   int Val;
 729   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val))
 730     return false;
 731
 732   Offset = N;
 733   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
 734   unsigned ShAmt = 0;
 735   if (ShOpcVal != ARM_AM::no_shift) {
 736     // Check to see if the RHS of the shift is a constant, if not, we can't fold
 737     // it.
 738     if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
 739       ShAmt = Sh->getZExtValue();
 740       if (isShifterOpProfitable(N, ShOpcVal, ShAmt))
 741         Offset = N.getOperand(0);
 742       else {
 743         ShAmt = 0;
 744         ShOpcVal = ARM_AM::no_shift;
 745       }
 746     } else {
 747       ShOpcVal = ARM_AM::no_shift;
 748     }
 749   }
 750
 751   Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
 752                                   MVT::i32);
 753   return true;
 754 }
 755
 756 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
 757                                             SDValue &Offset, SDValue &Opc) {
 758   unsigned Opcode = Op->getOpcode();
 759   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
 760     ? cast<LoadSDNode>(Op)->getAddressingMode()
 761     : cast<StoreSDNode>(Op)->getAddressingMode();
 762   ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
 763     ? ARM_AM::add : ARM_AM::sub;
 764   int Val;
 765   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
 766     Offset = CurDAG->getRegister(0, MVT::i32);
 767     Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, Val,
 768                                                       ARM_AM::no_shift),
 769                                     MVT::i32);
 770     return true;
 771   }
 772
 773   return false;
 774 }
 775
 776 bool ARMDAGToDAGISel::SelectAddrOffsetNone(SDValue N, SDValue &Base) {
 777   Base = N;
 778   return true;
 779 }
 780
 781 bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N,
 782                                       SDValue &Base, SDValue &Offset,
 783                                       SDValue &Opc) {
 784   if (N.getOpcode() == ISD::SUB) {
 785     // X - C  is canonicalize to X + -C, no need to handle it here.
 786     Base = N.getOperand(0);
 787     Offset = N.getOperand(1);
 788     Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::sub, 0),MVT::i32);
 789     return true;
 790   }
 791
 792   if (!CurDAG->isBaseWithConstantOffset(N)) {
 793     Base = N;
 794     if (N.getOpcode() == ISD::FrameIndex) {
 795       int FI = cast<FrameIndexSDNode>(N)->getIndex();
 796       Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
 797     }
 798     Offset = CurDAG->getRegister(0, MVT::i32);
 799     Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0),MVT::i32);
 800     return true;
 801   }
 802
 803   // If the RHS is +/- imm8, fold into addr mode.
 804   int RHSC;
 805   if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
 806                               -256 + 1, 256, RHSC)) { // 8 bits.
 807     Base = N.getOperand(0);
 808     if (Base.getOpcode() == ISD::FrameIndex) {
 809       int FI = cast<FrameIndexSDNode>(Base)->getIndex();
 810       Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
 811     }
 812     Offset = CurDAG->getRegister(0, MVT::i32);
 813
 814     ARM_AM::AddrOpc AddSub = ARM_AM::add;
 815     if (RHSC < 0) {
 816       AddSub = ARM_AM::sub;
 817       RHSC = -RHSC;
 818     }
 819     Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, RHSC),MVT::i32);
 820     return true;
 821   }
 822
 823   Base = N.getOperand(0);
 824   Offset = N.getOperand(1);
 825   Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), MVT::i32);
 826   return true;
 827 }
 828
 829 bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode *Op, SDValue N,
 830                                             SDValue &Offset, SDValue &Opc) {
 831   unsigned Opcode = Op->getOpcode();
 832   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
 833     ? cast<LoadSDNode>(Op)->getAddressingMode()
 834     : cast<StoreSDNode>(Op)->getAddressingMode();
 835   ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
 836     ? ARM_AM::add : ARM_AM::sub;
 837   int Val;
 838   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 256, Val)) { // 12 bits.
 839     Offset = CurDAG->getRegister(0, MVT::i32);
 840     Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, Val), MVT::i32);
 841     return true;
 842   }
 843
 844   Offset = N;
 845   Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, 0), MVT::i32);
 846   return true;
 847 }
 848
 849 bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N,
 850                                       SDValue &Base, SDValue &Offset) {
 851   if (!CurDAG->isBaseWithConstantOffset(N)) {
 852     Base = N;
 853     if (N.getOpcode() == ISD::FrameIndex) {
 854       int FI = cast<FrameIndexSDNode>(N)->getIndex();
 855       Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
 856     } else if (N.getOpcode() == ARMISD::Wrapper &&
 857                !(Subtarget->useMovt() &&
 858                  N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) {
 859       Base = N.getOperand(0);
 860     }
 861     Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
 862                                        MVT::i32);
 863     return true;
 864   }
 865
 866   // If the RHS is +/- imm8, fold into addr mode.
 867   int RHSC;
 868   if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4,
 869                               -256 + 1, 256, RHSC)) {
 870     Base = N.getOperand(0);
 871     if (Base.getOpcode() == ISD::FrameIndex) {
 872       int FI = cast<FrameIndexSDNode>(Base)->getIndex();
 873       Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
 874     }
 875
 876     ARM_AM::AddrOpc AddSub = ARM_AM::add;
 877     if (RHSC < 0) {
 878       AddSub = ARM_AM::sub;
 879       RHSC = -RHSC;
 880     }
 881     Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(AddSub, RHSC),
 882                                        MVT::i32);
 883     return true;
 884   }
 885
 886   Base = N;
 887   Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
 888                                      MVT::i32);
 889   return true;
 890 }
 891
 892 bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,
 893                                       SDValue &Align) {
 894   Addr = N;
 895
 896   unsigned Alignment = 0;
 897   if (LSBaseSDNode *LSN = dyn_cast<LSBaseSDNode>(Parent)) {
 898     // This case occurs only for VLD1-lane/dup and VST1-lane instructions.
 899     // The maximum alignment is equal to the memory size being referenced.
 900     unsigned LSNAlign = LSN->getAlignment();
 901     unsigned MemSize = LSN->getMemoryVT().getSizeInBits() / 8;
 902     if (LSNAlign > MemSize && MemSize > 1)
 903       Alignment = MemSize;
 904   } else {
 905     // All other uses of addrmode6 are for intrinsics.  For now just record
 906     // the raw alignment value; it will be refined later based on the legal
 907     // alignment operands for the intrinsic.
 908     Alignment = cast<MemIntrinsicSDNode>(Parent)->getAlignment();
 909   }
 910
 911   Align = CurDAG->getTargetConstant(Alignment, MVT::i32);
 912   return true;
 913 }
 914
 915 bool ARMDAGToDAGISel::SelectAddrMode6Offset(SDNode *Op, SDValue N,
 916                                             SDValue &Offset) {
 917   LSBaseSDNode *LdSt = cast<LSBaseSDNode>(Op);
 918   ISD::MemIndexedMode AM = LdSt->getAddressingMode();
 919   if (AM != ISD::POST_INC)
 920     return false;
 921   Offset = N;
 922   if (ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N)) {
 923     if (NC->getZExtValue() * 8 == LdSt->getMemoryVT().getSizeInBits())
 924       Offset = CurDAG->getRegister(0, MVT::i32);
 925   }
 926   return true;
 927 }
 928
 929 bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N,
 930                                        SDValue &Offset, SDValue &Label) {
 931   if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) {
 932     Offset = N.getOperand(0);
 933     SDValue N1 = N.getOperand(1);
 934     Label = CurDAG->getTargetConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
 935                                       MVT::i32);
 936     return true;
 937   }
 938
 939   return false;
 940 }
 941
 942
 943 //===----------------------------------------------------------------------===//
 944 //                         Thumb Addressing Modes
 945 //===----------------------------------------------------------------------===//
 946
 947 bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue N,
 948                                             SDValue &Base, SDValue &Offset){
 949   if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) {
 950     ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N);
 951     if (!NC || !NC->isNullValue())
 952       return false;
 953
 954     Base = Offset = N;
 955     return true;
 956   }
 957
 958   Base = N.getOperand(0);
 959   Offset = N.getOperand(1);
 960   return true;
 961 }
 962
 963 bool
 964 ARMDAGToDAGISel::SelectThumbAddrModeRI(SDValue N, SDValue &Base,
 965                                        SDValue &Offset, unsigned Scale) {
 966   if (Scale == 4) {
 967     SDValue TmpBase, TmpOffImm;
 968     if (SelectThumbAddrModeSP(N, TmpBase, TmpOffImm))
 969       return false;  // We want to select tLDRspi / tSTRspi instead.
 970
 971     if (N.getOpcode() == ARMISD::Wrapper &&
 972         N.getOperand(0).getOpcode() == ISD::TargetConstantPool)
 973       return false;  // We want to select tLDRpci instead.
 974   }
 975
 976   if (!CurDAG->isBaseWithConstantOffset(N))
 977     return false;
 978
 979   // Thumb does not have [sp, r] address mode.
 980   RegisterSDNode *LHSR = dyn_cast<RegisterSDNode>(N.getOperand(0));
 981   RegisterSDNode *RHSR = dyn_cast<RegisterSDNode>(N.getOperand(1));
 982   if ((LHSR && LHSR->getReg() == ARM::SP) ||
 983       (RHSR && RHSR->getReg() == ARM::SP))
 984     return false;
 985
 986   // FIXME: Why do we explicitly check for a match here and then return false?
 987   // Presumably to allow something else to match, but shouldn't this be
 988   // documented?
 989   int RHSC;
 990   if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC))
 991     return false;
 992
 993   Base = N.getOperand(0);
 994   Offset = N.getOperand(1);
 995   return true;
 996 }
 997
 998 bool
 999 ARMDAGToDAGISel::SelectThumbAddrModeRI5S1(SDValue N,
1000                                           SDValue &Base,
1001                                           SDValue &Offset) {
1002   return SelectThumbAddrModeRI(N, Base, Offset, 1);
1003 }
1004
1005 bool
1006 ARMDAGToDAGISel::SelectThumbAddrModeRI5S2(SDValue N,
1007                                           SDValue &Base,
1008                                           SDValue &Offset) {
1009   return SelectThumbAddrModeRI(N, Base, Offset, 2);
1010 }
1011
1012 bool
1013 ARMDAGToDAGISel::SelectThumbAddrModeRI5S4(SDValue N,
1014                                           SDValue &Base,
1015                                           SDValue &Offset) {
1016   return SelectThumbAddrModeRI(N, Base, Offset, 4);
1017 }
1018
1019 bool
1020 ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale,
1021                                           SDValue &Base, SDValue &OffImm) {
1022   if (Scale == 4) {
1023     SDValue TmpBase, TmpOffImm;
1024     if (SelectThumbAddrModeSP(N, TmpBase, TmpOffImm))
1025       return false;  // We want to select tLDRspi / tSTRspi instead.
1026
1027     if (N.getOpcode() == ARMISD::Wrapper &&
1028         N.getOperand(0).getOpcode() == ISD::TargetConstantPool)
1029       return false;  // We want to select tLDRpci instead.
1030   }
1031
1032   if (!CurDAG->isBaseWithConstantOffset(N)) {
1033     if (N.getOpcode() == ARMISD::Wrapper &&
1034         !(Subtarget->useMovt() &&
1035           N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) {
1036       Base = N.getOperand(0);
1037     } else {
1038       Base = N;
1039     }
1040
1041     OffImm = CurDAG->getTargetConstant(0, MVT::i32);
1042     return true;
1043   }
1044
1045   RegisterSDNode *LHSR = dyn_cast<RegisterSDNode>(N.getOperand(0));
1046   RegisterSDNode *RHSR = dyn_cast<RegisterSDNode>(N.getOperand(1));
1047   if ((LHSR && LHSR->getReg() == ARM::SP) ||
1048       (RHSR && RHSR->getReg() == ARM::SP)) {
1049     ConstantSDNode *LHS = dyn_cast<ConstantSDNode>(N.getOperand(0));
1050     ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
1051     unsigned LHSC = LHS ? LHS->getZExtValue() : 0;
1052     unsigned RHSC = RHS ? RHS->getZExtValue() : 0;
1053
1054     // Thumb does not have [sp, #imm5] address mode for non-zero imm5.
1055     if (LHSC != 0 || RHSC != 0) return false;
1056
1057     Base = N;
1058     OffImm = CurDAG->getTargetConstant(0, MVT::i32);
1059     return true;
1060   }
1061
1062   // If the RHS is + imm5 * scale, fold into addr mode.
1063   int RHSC;
1064   if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC)) {
1065     Base = N.getOperand(0);
1066     OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32);
1067     return true;
1068   }
1069
1070   Base = N.getOperand(0);
1071   OffImm = CurDAG->getTargetConstant(0, MVT::i32);
1072   return true;
1073 }
1074
1075 bool
1076 ARMDAGToDAGISel::SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
1077                                            SDValue &OffImm) {
1078   return SelectThumbAddrModeImm5S(N, 4, Base, OffImm);
1079 }
1080
1081 bool
1082 ARMDAGToDAGISel::SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
1083                                            SDValue &OffImm) {
1084   return SelectThumbAddrModeImm5S(N, 2, Base, OffImm);
1085 }
1086
1087 bool
1088 ARMDAGToDAGISel::SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
1089                                            SDValue &OffImm) {
1090   return SelectThumbAddrModeImm5S(N, 1, Base, OffImm);
1091 }
1092
1093 bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N,
1094                                             SDValue &Base, SDValue &OffImm) {
1095   if (N.getOpcode() == ISD::FrameIndex) {
1096     int FI = cast<FrameIndexSDNode>(N)->getIndex();
1097     Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
1098     OffImm = CurDAG->getTargetConstant(0, MVT::i32);
1099     return true;
1100   }
1101
1102   if (!CurDAG->isBaseWithConstantOffset(N))
1103     return false;
1104
1105   RegisterSDNode *LHSR = dyn_cast<RegisterSDNode>(N.getOperand(0));
1106   if (N.getOperand(0).getOpcode() == ISD::FrameIndex ||
1107       (LHSR && LHSR->getReg() == ARM::SP)) {
1108     // If the RHS is + imm8 * scale, fold into addr mode.
1109     int RHSC;
1110     if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4, 0, 256, RHSC)) {
1111       Base = N.getOperand(0);
1112       if (Base.getOpcode() == ISD::FrameIndex) {
1113         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1114         Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
1115       }
1116       OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32);
1117       return true;
1118     }
1119   }
1120
1121   return false;
1122 }
1123
1124
1125 //===----------------------------------------------------------------------===//
1126 //                        Thumb 2 Addressing Modes
1127 //===----------------------------------------------------------------------===//
1128
1129
1130 bool ARMDAGToDAGISel::SelectT2ShifterOperandReg(SDValue N, SDValue &BaseReg,
1131                                                 SDValue &Opc) {
1132   if (DisableShifterOp)
1133     return false;
1134
1135   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
1136
1137   // Don't match base register only case. That is matched to a separate
1138   // lower complexity pattern with explicit register operand.
1139   if (ShOpcVal == ARM_AM::no_shift) return false;
1140
1141   BaseReg = N.getOperand(0);
1142   unsigned ShImmVal = 0;
1143   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1144     ShImmVal = RHS->getZExtValue() & 31;
1145     Opc = getI32Imm(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal));
1146     return true;
1147   }
1148
1149   return false;
1150 }
1151
1152 bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N,
1153                                             SDValue &Base, SDValue &OffImm) {
1154   // Match simple R + imm12 operands.
1155
1156   // Base only.
1157   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1158       !CurDAG->isBaseWithConstantOffset(N)) {
1159     if (N.getOpcode() == ISD::FrameIndex) {
1160       // Match frame index.
1161       int FI = cast<FrameIndexSDNode>(N)->getIndex();
1162       Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
1163       OffImm  = CurDAG->getTargetConstant(0, MVT::i32);
1164       return true;
1165     }
1166
1167     if (N.getOpcode() == ARMISD::Wrapper &&
1168                !(Subtarget->useMovt() &&
1169                  N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) {
1170       Base = N.getOperand(0);
1171       if (Base.getOpcode() == ISD::TargetConstantPool)
1172         return false;  // We want to select t2LDRpci instead.
1173     } else
1174       Base = N;
1175     OffImm  = CurDAG->getTargetConstant(0, MVT::i32);
1176     return true;
1177   }
1178
1179   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1180     if (SelectT2AddrModeImm8(N, Base, OffImm))
1181       // Let t2LDRi8 handle (R - imm8).
1182       return false;
1183
1184     int RHSC = (int)RHS->getZExtValue();
1185     if (N.getOpcode() == ISD::SUB)
1186       RHSC = -RHSC;
1187
1188     if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits (unsigned)
1189       Base   = N.getOperand(0);
1190       if (Base.getOpcode() == ISD::FrameIndex) {
1191         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1192         Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
1193       }
1194       OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32);
1195       return true;
1196     }
1197   }
1198
1199   // Base only.
1200   Base = N;
1201   OffImm  = CurDAG->getTargetConstant(0, MVT::i32);
1202   return true;
1203 }
1204
1205 bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N,
1206                                            SDValue &Base, SDValue &OffImm) {
1207   // Match simple R - imm8 operands.
1208   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1209       !CurDAG->isBaseWithConstantOffset(N))
1210     return false;
1211
1212   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1213     int RHSC = (int)RHS->getSExtValue();
1214     if (N.getOpcode() == ISD::SUB)
1215       RHSC = -RHSC;
1216
1217     if ((RHSC >= -255) && (RHSC < 0)) { // 8 bits (always negative)
1218       Base = N.getOperand(0);
1219       if (Base.getOpcode() == ISD::FrameIndex) {
1220         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1221         Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
1222       }
1223       OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32);
1224       return true;
1225     }
1226   }
1227
1228   return false;
1229 }
1230
1231 bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
1232                                                  SDValue &OffImm){
1233   unsigned Opcode = Op->getOpcode();
1234   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
1235     ? cast<LoadSDNode>(Op)->getAddressingMode()
1236     : cast<StoreSDNode>(Op)->getAddressingMode();
1237   int RHSC;
1238   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x100, RHSC)) { // 8 bits.
1239     OffImm = ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC))
1240       ? CurDAG->getTargetConstant(RHSC, MVT::i32)
1241       : CurDAG->getTargetConstant(-RHSC, MVT::i32);
1242     return true;
1243   }
1244
1245   return false;
1246 }
1247
1248 bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N,
1249                                             SDValue &Base,
1250                                             SDValue &OffReg, SDValue &ShImm) {
1251   // (R - imm8) should be handled by t2LDRi8. The rest are handled by t2LDRi12.
1252   if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N))
1253     return false;
1254
1255   // Leave (R + imm12) for t2LDRi12, (R - imm8) for t2LDRi8.
1256   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1257     int RHSC = (int)RHS->getZExtValue();
1258     if (RHSC >= 0 && RHSC < 0x1000) // 12 bits (unsigned)
1259       return false;
1260     else if (RHSC < 0 && RHSC >= -255) // 8 bits
1261       return false;
1262   }
1263
1264   if (Subtarget->isCortexA9() && !N.hasOneUse()) {
1265     // Compute R + (R << [1,2,3]) and reuse it.
1266     Base = N;
1267     return false;
1268   }
1269
1270   // Look for (R + R) or (R + (R << [1,2,3])).
1271   unsigned ShAmt = 0;
1272   Base   = N.getOperand(0);
1273   OffReg = N.getOperand(1);
1274
1275   // Swap if it is ((R << c) + R).
1276   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(OffReg.getOpcode());
1277   if (ShOpcVal != ARM_AM::lsl) {
1278     ShOpcVal = ARM_AM::getShiftOpcForNode(Base.getOpcode());
1279     if (ShOpcVal == ARM_AM::lsl)
1280       std::swap(Base, OffReg);
1281   }
1282
1283   if (ShOpcVal == ARM_AM::lsl) {
1284     // Check to see if the RHS of the shift is a constant, if not, we can't fold
1285     // it.
1286     if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(OffReg.getOperand(1))) {
1287       ShAmt = Sh->getZExtValue();
1288       if (ShAmt < 4 && isShifterOpProfitable(OffReg, ShOpcVal, ShAmt))
1289         OffReg = OffReg.getOperand(0);
1290       else {
1291         ShAmt = 0;
1292         ShOpcVal = ARM_AM::no_shift;
1293       }
1294     } else {
1295       ShOpcVal = ARM_AM::no_shift;
1296     }
1297   }
1298
1299   ShImm = CurDAG->getTargetConstant(ShAmt, MVT::i32);
1300
1301   return true;
1302 }
1303
1304 //===--------------------------------------------------------------------===//
1305
1306 /// getAL - Returns a ARMCC::AL immediate node.
1307 static inline SDValue getAL(SelectionDAG *CurDAG) {
1308   return CurDAG->getTargetConstant((uint64_t)ARMCC::AL, MVT::i32);
1309 }
1310
1311 SDNode *ARMDAGToDAGISel::SelectARMIndexedLoad(SDNode *N) {
1312   LoadSDNode *LD = cast<LoadSDNode>(N);
1313   ISD::MemIndexedMode AM = LD->getAddressingMode();
1314   if (AM == ISD::UNINDEXED)
1315     return NULL;
1316
1317   EVT LoadedVT = LD->getMemoryVT();
1318   SDValue Offset, AMOpc;
1319   bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1320   unsigned Opcode = 0;
1321   bool Match = false;
1322   if (LoadedVT == MVT::i32 &&
1323       SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
1324     Opcode = isPre ? ARM::LDR_PRE_IMM : ARM::LDR_POST_IMM;
1325     Match = true;
1326   } else if (LoadedVT == MVT::i32 &&
1327       SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
1328     Opcode = isPre ? ARM::LDR_PRE_REG : ARM::LDR_POST_REG;
1329     Match = true;
1330
1331   } else if (LoadedVT == MVT::i16 &&
1332              SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
1333     Match = true;
1334     Opcode = (LD->getExtensionType() == ISD::SEXTLOAD)
1335       ? (isPre ? ARM::LDRSH_PRE : ARM::LDRSH_POST)
1336       : (isPre ? ARM::LDRH_PRE : ARM::LDRH_POST);
1337   } else if (LoadedVT == MVT::i8 || LoadedVT == MVT::i1) {
1338     if (LD->getExtensionType() == ISD::SEXTLOAD) {
1339       if (SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
1340         Match = true;
1341         Opcode = isPre ? ARM::LDRSB_PRE : ARM::LDRSB_POST;
1342       }
1343     } else {
1344       if (SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
1345         Match = true;
1346         Opcode = isPre ? ARM::LDRB_PRE_IMM : ARM::LDRB_POST_IMM;
1347       } else if (SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
1348         Match = true;
1349         Opcode = isPre ? ARM::LDRB_PRE_REG : ARM::LDRB_POST_REG;
1350       }
1351     }
1352   }
1353
1354   if (Match) {
1355     SDValue Chain = LD->getChain();
1356     SDValue Base = LD->getBasePtr();
1357     SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG),
1358                      CurDAG->getRegister(0, MVT::i32), Chain };
1359     return CurDAG->getMachineNode(Opcode, N->getDebugLoc(), MVT::i32, MVT::i32,
1360                                   MVT::Other, Ops, 6);
1361   }
1362
1363   return NULL;
1364 }
1365
1366 SDNode *ARMDAGToDAGISel::SelectT2IndexedLoad(SDNode *N) {
1367   LoadSDNode *LD = cast<LoadSDNode>(N);
1368   ISD::MemIndexedMode AM = LD->getAddressingMode();
1369   if (AM == ISD::UNINDEXED)
1370     return NULL;
1371
1372   EVT LoadedVT = LD->getMemoryVT();
1373   bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
1374   SDValue Offset;
1375   bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1376   unsigned Opcode = 0;
1377   bool Match = false;
1378   if (SelectT2AddrModeImm8Offset(N, LD->getOffset(), Offset)) {
1379     switch (LoadedVT.getSimpleVT().SimpleTy) {
1380     case MVT::i32:
1381       Opcode = isPre ? ARM::t2LDR_PRE : ARM::t2LDR_POST;
1382       break;
1383     case MVT::i16:
1384       if (isSExtLd)
1385         Opcode = isPre ? ARM::t2LDRSH_PRE : ARM::t2LDRSH_POST;
1386       else
1387         Opcode = isPre ? ARM::t2LDRH_PRE : ARM::t2LDRH_POST;
1388       break;
1389     case MVT::i8:
1390     case MVT::i1:
1391       if (isSExtLd)
1392         Opcode = isPre ? ARM::t2LDRSB_PRE : ARM::t2LDRSB_POST;
1393       else
1394         Opcode = isPre ? ARM::t2LDRB_PRE : ARM::t2LDRB_POST;
1395       break;
1396     default:
1397       return NULL;
1398     }
1399     Match = true;
1400   }
1401
1402   if (Match) {
1403     SDValue Chain = LD->getChain();
1404     SDValue Base = LD->getBasePtr();
1405     SDValue Ops[]= { Base, Offset, getAL(CurDAG),
1406                      CurDAG->getRegister(0, MVT::i32), Chain };
1407     return CurDAG->getMachineNode(Opcode, N->getDebugLoc(), MVT::i32, MVT::i32,
1408                                   MVT::Other, Ops, 5);
1409   }
1410
1411   return NULL;
1412 }
1413
1414 /// PairSRegs - Form a D register from a pair of S registers.
1415 ///
1416 SDNode *ARMDAGToDAGISel::PairSRegs(EVT VT, SDValue V0, SDValue V1) {
1417   DebugLoc dl = V0.getNode()->getDebugLoc();
1418   SDValue RegClass =
1419     CurDAG->getTargetConstant(ARM::DPR_VFP2RegClassID, MVT::i32);
1420   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, MVT::i32);
1421   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, MVT::i32);
1422   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1423   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 5);
1424 }
1425
1426 /// PairDRegs - Form a quad register from a pair of D registers.
1427 ///
1428 SDNode *ARMDAGToDAGISel::PairDRegs(EVT VT, SDValue V0, SDValue V1) {
1429   DebugLoc dl = V0.getNode()->getDebugLoc();
1430   SDValue RegClass = CurDAG->getTargetConstant(ARM::QPRRegClassID, MVT::i32);
1431   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, MVT::i32);
1432   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, MVT::i32);
1433   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1434   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 5);
1435 }
1436
1437 /// PairQRegs - Form 4 consecutive D registers from a pair of Q registers.
1438 ///
1439 SDNode *ARMDAGToDAGISel::PairQRegs(EVT VT, SDValue V0, SDValue V1) {
1440   DebugLoc dl = V0.getNode()->getDebugLoc();
1441   SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, MVT::i32);
1442   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, MVT::i32);
1443   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, MVT::i32);
1444   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1445   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 5);
1446 }
1447
1448 /// QuadSRegs - Form 4 consecutive S registers.
1449 ///
1450 SDNode *ARMDAGToDAGISel::QuadSRegs(EVT VT, SDValue V0, SDValue V1,
1451                                    SDValue V2, SDValue V3) {
1452   DebugLoc dl = V0.getNode()->getDebugLoc();
1453   SDValue RegClass =
1454     CurDAG->getTargetConstant(ARM::QPR_VFP2RegClassID, MVT::i32);
1455   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, MVT::i32);
1456   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, MVT::i32);
1457   SDValue SubReg2 = CurDAG->getTargetConstant(ARM::ssub_2, MVT::i32);
1458   SDValue SubReg3 = CurDAG->getTargetConstant(ARM::ssub_3, MVT::i32);
1459   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1460                                     V2, SubReg2, V3, SubReg3 };
1461   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 9);
1462 }
1463
1464 /// QuadDRegs - Form 4 consecutive D registers.
1465 ///
1466 SDNode *ARMDAGToDAGISel::QuadDRegs(EVT VT, SDValue V0, SDValue V1,
1467                                    SDValue V2, SDValue V3) {
1468   DebugLoc dl = V0.getNode()->getDebugLoc();
1469   SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, MVT::i32);
1470   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, MVT::i32);
1471   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, MVT::i32);
1472   SDValue SubReg2 = CurDAG->getTargetConstant(ARM::dsub_2, MVT::i32);
1473   SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, MVT::i32);
1474   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1475                                     V2, SubReg2, V3, SubReg3 };
1476   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 9);
1477 }
1478
1479 /// QuadQRegs - Form 4 consecutive Q registers.
1480 ///
1481 SDNode *ARMDAGToDAGISel::QuadQRegs(EVT VT, SDValue V0, SDValue V1,
1482                                    SDValue V2, SDValue V3) {
1483   DebugLoc dl = V0.getNode()->getDebugLoc();
1484   SDValue RegClass = CurDAG->getTargetConstant(ARM::QQQQPRRegClassID, MVT::i32);
1485   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, MVT::i32);
1486   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, MVT::i32);
1487   SDValue SubReg2 = CurDAG->getTargetConstant(ARM::qsub_2, MVT::i32);
1488   SDValue SubReg3 = CurDAG->getTargetConstant(ARM::qsub_3, MVT::i32);
1489   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1490                                     V2, SubReg2, V3, SubReg3 };
1491   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 9);
1492 }
1493
1494 /// GetVLDSTAlign - Get the alignment (in bytes) for the alignment operand
1495 /// of a NEON VLD or VST instruction.  The supported values depend on the
1496 /// number of registers being loaded.
1497 SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, unsigned NumVecs,
1498                                        bool is64BitVector) {
1499   unsigned NumRegs = NumVecs;
1500   if (!is64BitVector && NumVecs < 3)
1501     NumRegs *= 2;
1502
1503   unsigned Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
1504   if (Alignment >= 32 && NumRegs == 4)
1505     Alignment = 32;
1506   else if (Alignment >= 16 && (NumRegs == 2 || NumRegs == 4))
1507     Alignment = 16;
1508   else if (Alignment >= 8)
1509     Alignment = 8;
1510   else
1511     Alignment = 0;
1512
1513   return CurDAG->getTargetConstant(Alignment, MVT::i32);
1514 }
1515
1516 SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
1517                                    unsigned *DOpcodes, unsigned *QOpcodes0,
1518                                    unsigned *QOpcodes1) {
1519   assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range");
1520   DebugLoc dl = N->getDebugLoc();
1521
1522   SDValue MemAddr, Align;
1523   unsigned AddrOpIdx = isUpdating ? 1 : 2;
1524   if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
1525     return NULL;
1526
1527   SDValue Chain = N->getOperand(0);
1528   EVT VT = N->getValueType(0);
1529   bool is64BitVector = VT.is64BitVector();
1530   Align = GetVLDSTAlign(Align, NumVecs, is64BitVector);
1531
1532   unsigned OpcodeIndex;
1533   switch (VT.getSimpleVT().SimpleTy) {
1534   default: llvm_unreachable("unhandled vld type");
1535     // Double-register operations:
1536   case MVT::v8i8:  OpcodeIndex = 0; break;
1537   case MVT::v4i16: OpcodeIndex = 1; break;
1538   case MVT::v2f32:
1539   case MVT::v2i32: OpcodeIndex = 2; break;
1540   case MVT::v1i64: OpcodeIndex = 3; break;
1541     // Quad-register operations:
1542   case MVT::v16i8: OpcodeIndex = 0; break;
1543   case MVT::v8i16: OpcodeIndex = 1; break;
1544   case MVT::v4f32:
1545   case MVT::v4i32: OpcodeIndex = 2; break;
1546   case MVT::v2i64: OpcodeIndex = 3;
1547     assert(NumVecs == 1 && "v2i64 type only supported for VLD1");
1548     break;
1549   }
1550
1551   EVT ResTy;
1552   if (NumVecs == 1)
1553     ResTy = VT;
1554   else {
1555     unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
1556     if (!is64BitVector)
1557       ResTyElts *= 2;
1558     ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
1559   }
1560   std::vector<EVT> ResTys;
1561   ResTys.push_back(ResTy);
1562   if (isUpdating)
1563     ResTys.push_back(MVT::i32);
1564   ResTys.push_back(MVT::Other);
1565
1566   SDValue Pred = getAL(CurDAG);
1567   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
1568   SDNode *VLd;
1569   SmallVector<SDValue, 7> Ops;
1570
1571   // Double registers and VLD1/VLD2 quad registers are directly supported.
1572   if (is64BitVector || NumVecs <= 2) {
1573     unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
1574                     QOpcodes0[OpcodeIndex]);
1575     Ops.push_back(MemAddr);
1576     Ops.push_back(Align);
1577     if (isUpdating) {
1578       SDValue Inc = N->getOperand(AddrOpIdx + 1);
1579       Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc);
1580     }
1581     Ops.push_back(Pred);
1582     Ops.push_back(Reg0);
1583     Ops.push_back(Chain);
1584     VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(), Ops.size());
1585
1586   } else {
1587     // Otherwise, quad registers are loaded with two separate instructions,
1588     // where one loads the even registers and the other loads the odd registers.
1589     EVT AddrTy = MemAddr.getValueType();
1590
1591     // Load the even subregs.  This is always an updating load, so that it
1592     // provides the address to the second load for the odd subregs.
1593     SDValue ImplDef =
1594       SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
1595     const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain };
1596     SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
1597                                           ResTy, AddrTy, MVT::Other, OpsA, 7);
1598     Chain = SDValue(VLdA, 2);
1599
1600     // Load the odd subregs.
1601     Ops.push_back(SDValue(VLdA, 1));
1602     Ops.push_back(Align);
1603     if (isUpdating) {
1604       SDValue Inc = N->getOperand(AddrOpIdx + 1);
1605       assert(isa<ConstantSDNode>(Inc.getNode()) &&
1606              "only constant post-increment update allowed for VLD3/4");
1607       (void)Inc;
1608       Ops.push_back(Reg0);
1609     }
1610     Ops.push_back(SDValue(VLdA, 0));
1611     Ops.push_back(Pred);
1612     Ops.push_back(Reg0);
1613     Ops.push_back(Chain);
1614     VLd = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys,
1615                                  Ops.data(), Ops.size());
1616   }
1617
1618   // Transfer memoperands.
1619   MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
1620   MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1621   cast<MachineSDNode>(VLd)->setMemRefs(MemOp, MemOp + 1);
1622
1623   if (NumVecs == 1)
1624     return VLd;
1625
1626   // Extract out the subregisters.
1627   SDValue SuperReg = SDValue(VLd, 0);
1628   assert(ARM::dsub_7 == ARM::dsub_0+7 &&
1629          ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering");
1630   unsigned Sub0 = (is64BitVector ? ARM::dsub_0 : ARM::qsub_0);
1631   for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
1632     ReplaceUses(SDValue(N, Vec),
1633                 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
1634   ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1));
1635   if (isUpdating)
1636     ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLd, 2));
1637   return NULL;
1638 }
1639
1640 SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
1641                                    unsigned *DOpcodes, unsigned *QOpcodes0,
1642                                    unsigned *QOpcodes1) {
1643   assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range");
1644   DebugLoc dl = N->getDebugLoc();
1645
1646   SDValue MemAddr, Align;
1647   unsigned AddrOpIdx = isUpdating ? 1 : 2;
1648   unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
1649   if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
1650     return NULL;
1651
1652   MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
1653   MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1654
1655   SDValue Chain = N->getOperand(0);
1656   EVT VT = N->getOperand(Vec0Idx).getValueType();
1657   bool is64BitVector = VT.is64BitVector();
1658   Align = GetVLDSTAlign(Align, NumVecs, is64BitVector);
1659
1660   unsigned OpcodeIndex;
1661   switch (VT.getSimpleVT().SimpleTy) {
1662   default: llvm_unreachable("unhandled vst type");
1663     // Double-register operations:
1664   case MVT::v8i8:  OpcodeIndex = 0; break;
1665   case MVT::v4i16: OpcodeIndex = 1; break;
1666   case MVT::v2f32:
1667   case MVT::v2i32: OpcodeIndex = 2; break;
1668   case MVT::v1i64: OpcodeIndex = 3; break;
1669     // Quad-register operations:
1670   case MVT::v16i8: OpcodeIndex = 0; break;
1671   case MVT::v8i16: OpcodeIndex = 1; break;
1672   case MVT::v4f32:
1673   case MVT::v4i32: OpcodeIndex = 2; break;
1674   case MVT::v2i64: OpcodeIndex = 3;
1675     assert(NumVecs == 1 && "v2i64 type only supported for VST1");
1676     break;
1677   }
1678
1679   std::vector<EVT> ResTys;
1680   if (isUpdating)
1681     ResTys.push_back(MVT::i32);
1682   ResTys.push_back(MVT::Other);
1683
1684   SDValue Pred = getAL(CurDAG);
1685   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
1686   SmallVector<SDValue, 7> Ops;
1687
1688   // Double registers and VST1/VST2 quad registers are directly supported.
1689   if (is64BitVector || NumVecs <= 2) {
1690     SDValue SrcReg;
1691     if (NumVecs == 1) {
1692       SrcReg = N->getOperand(Vec0Idx);
1693     } else if (is64BitVector) {
1694       // Form a REG_SEQUENCE to force register allocation.
1695       SDValue V0 = N->getOperand(Vec0Idx + 0);
1696       SDValue V1 = N->getOperand(Vec0Idx + 1);
1697       if (NumVecs == 2)
1698         SrcReg = SDValue(PairDRegs(MVT::v2i64, V0, V1), 0);
1699       else {
1700         SDValue V2 = N->getOperand(Vec0Idx + 2);
1701         // If it's a vst3, form a quad D-register and leave the last part as
1702         // an undef.
1703         SDValue V3 = (NumVecs == 3)
1704           ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
1705           : N->getOperand(Vec0Idx + 3);
1706         SrcReg = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0);
1707       }
1708     } else {
1709       // Form a QQ register.
1710       SDValue Q0 = N->getOperand(Vec0Idx);
1711       SDValue Q1 = N->getOperand(Vec0Idx + 1);
1712       SrcReg = SDValue(PairQRegs(MVT::v4i64, Q0, Q1), 0);
1713     }
1714
1715     unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
1716                     QOpcodes0[OpcodeIndex]);
1717     Ops.push_back(MemAddr);
1718     Ops.push_back(Align);
1719     if (isUpdating) {
1720       SDValue Inc = N->getOperand(AddrOpIdx + 1);
1721       Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc);
1722     }
1723     Ops.push_back(SrcReg);
1724     Ops.push_back(Pred);
1725     Ops.push_back(Reg0);
1726     Ops.push_back(Chain);
1727     SDNode *VSt =
1728       CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(), Ops.size());
1729
1730     // Transfer memoperands.
1731     cast<MachineSDNode>(VSt)->setMemRefs(MemOp, MemOp + 1);
1732
1733     return VSt;
1734   }
1735
1736   // Otherwise, quad registers are stored with two separate instructions,
1737   // where one stores the even registers and the other stores the odd registers.
1738
1739   // Form the QQQQ REG_SEQUENCE.
1740   SDValue V0 = N->getOperand(Vec0Idx + 0);
1741   SDValue V1 = N->getOperand(Vec0Idx + 1);
1742   SDValue V2 = N->getOperand(Vec0Idx + 2);
1743   SDValue V3 = (NumVecs == 3)
1744     ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
1745     : N->getOperand(Vec0Idx + 3);
1746   SDValue RegSeq = SDValue(QuadQRegs(MVT::v8i64, V0, V1, V2, V3), 0);
1747
1748   // Store the even D registers.  This is always an updating store, so that it
1749   // provides the address to the second store for the odd subregs.
1750   const SDValue OpsA[] = { MemAddr, Align, Reg0, RegSeq, Pred, Reg0, Chain };
1751   SDNode *VStA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
1752                                         MemAddr.getValueType(),
1753                                         MVT::Other, OpsA, 7);
1754   cast<MachineSDNode>(VStA)->setMemRefs(MemOp, MemOp + 1);
1755   Chain = SDValue(VStA, 1);
1756
1757   // Store the odd D registers.
1758   Ops.push_back(SDValue(VStA, 0));
1759   Ops.push_back(Align);
1760   if (isUpdating) {
1761     SDValue Inc = N->getOperand(AddrOpIdx + 1);
1762     assert(isa<ConstantSDNode>(Inc.getNode()) &&
1763            "only constant post-increment update allowed for VST3/4");
1764     (void)Inc;
1765     Ops.push_back(Reg0);
1766   }
1767   Ops.push_back(RegSeq);
1768   Ops.push_back(Pred);
1769   Ops.push_back(Reg0);
1770   Ops.push_back(Chain);
1771   SDNode *VStB = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys,
1772                                         Ops.data(), Ops.size());
1773   cast<MachineSDNode>(VStB)->setMemRefs(MemOp, MemOp + 1);
1774   return VStB;
1775 }
1776
1777 SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad,
1778                                          bool isUpdating, unsigned NumVecs,
1779                                          unsigned *DOpcodes,
1780                                          unsigned *QOpcodes) {
1781   assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range");
1782   DebugLoc dl = N->getDebugLoc();
1783
1784   SDValue MemAddr, Align;
1785   unsigned AddrOpIdx = isUpdating ? 1 : 2;
1786   unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
1787   if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
1788     return NULL;
1789
1790   MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
1791   MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1792
1793   SDValue Chain = N->getOperand(0);
1794   unsigned Lane =
1795     cast<ConstantSDNode>(N->getOperand(Vec0Idx + NumVecs))->getZExtValue();
1796   EVT VT = N->getOperand(Vec0Idx).getValueType();
1797   bool is64BitVector = VT.is64BitVector();
1798
1799   unsigned Alignment = 0;
1800   if (NumVecs != 3) {
1801     Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
1802     unsigned NumBytes = NumVecs * VT.getVectorElementType().getSizeInBits()/8;
1803     if (Alignment > NumBytes)
1804       Alignment = NumBytes;
1805     if (Alignment < 8 && Alignment < NumBytes)
1806       Alignment = 0;
1807     // Alignment must be a power of two; make sure of that.
1808     Alignment = (Alignment & -Alignment);
1809     if (Alignment == 1)
1810       Alignment = 0;
1811   }
1812   Align = CurDAG->getTargetConstant(Alignment, MVT::i32);
1813
1814   unsigned OpcodeIndex;
1815   switch (VT.getSimpleVT().SimpleTy) {
1816   default: llvm_unreachable("unhandled vld/vst lane type");
1817     // Double-register operations:
1818   case MVT::v8i8:  OpcodeIndex = 0; break;
1819   case MVT::v4i16: OpcodeIndex = 1; break;
1820   case MVT::v2f32:
1821   case MVT::v2i32: OpcodeIndex = 2; break;
1822     // Quad-register operations:
1823   case MVT::v8i16: OpcodeIndex = 0; break;
1824   case MVT::v4f32:
1825   case MVT::v4i32: OpcodeIndex = 1; break;
1826   }
1827
1828   std::vector<EVT> ResTys;
1829   if (IsLoad) {
1830     unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
1831     if (!is64BitVector)
1832       ResTyElts *= 2;
1833     ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(),
1834                                       MVT::i64, ResTyElts));
1835   }
1836   if (isUpdating)
1837     ResTys.push_back(MVT::i32);
1838   ResTys.push_back(MVT::Other);
1839
1840   SDValue Pred = getAL(CurDAG);
1841   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
1842
1843   SmallVector<SDValue, 8> Ops;
1844   Ops.push_back(MemAddr);
1845   Ops.push_back(Align);
1846   if (isUpdating) {
1847     SDValue Inc = N->getOperand(AddrOpIdx + 1);
1848     Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc);
1849   }
1850
1851   SDValue SuperReg;
1852   SDValue V0 = N->getOperand(Vec0Idx + 0);
1853   SDValue V1 = N->getOperand(Vec0Idx + 1);
1854   if (NumVecs == 2) {
1855     if (is64BitVector)
1856       SuperReg = SDValue(PairDRegs(MVT::v2i64, V0, V1), 0);
1857     else
1858       SuperReg = SDValue(PairQRegs(MVT::v4i64, V0, V1), 0);
1859   } else {
1860     SDValue V2 = N->getOperand(Vec0Idx + 2);
1861     SDValue V3 = (NumVecs == 3)
1862       ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
1863       : N->getOperand(Vec0Idx + 3);
1864     if (is64BitVector)
1865       SuperReg = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0);
1866     else
1867       SuperReg = SDValue(QuadQRegs(MVT::v8i64, V0, V1, V2, V3), 0);
1868   }
1869   Ops.push_back(SuperReg);
1870   Ops.push_back(getI32Imm(Lane));
1871   Ops.push_back(Pred);
1872   Ops.push_back(Reg0);
1873   Ops.push_back(Chain);
1874
1875   unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
1876                                   QOpcodes[OpcodeIndex]);
1877   SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys,
1878                                          Ops.data(), Ops.size());
1879   cast<MachineSDNode>(VLdLn)->setMemRefs(MemOp, MemOp + 1);
1880   if (!IsLoad)
1881     return VLdLn;
1882
1883   // Extract the subregisters.
1884   SuperReg = SDValue(VLdLn, 0);
1885   assert(ARM::dsub_7 == ARM::dsub_0+7 &&
1886          ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering");
1887   unsigned Sub0 = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
1888   for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
1889     ReplaceUses(SDValue(N, Vec),
1890                 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
1891   ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, 1));
1892   if (isUpdating)
1893     ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdLn, 2));
1894   return NULL;
1895 }
1896
1897 SDNode *ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating,
1898                                       unsigned NumVecs, unsigned *Opcodes) {
1899   assert(NumVecs >=2 && NumVecs <= 4 && "VLDDup NumVecs out-of-range");
1900   DebugLoc dl = N->getDebugLoc();
1901
1902   SDValue MemAddr, Align;
1903   if (!SelectAddrMode6(N, N->getOperand(1), MemAddr, Align))
1904     return NULL;
1905
1906   MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
1907   MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1908
1909   SDValue Chain = N->getOperand(0);
1910   EVT VT = N->getValueType(0);
1911
1912   unsigned Alignment = 0;
1913   if (NumVecs != 3) {
1914     Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
1915     unsigned NumBytes = NumVecs * VT.getVectorElementType().getSizeInBits()/8;
1916     if (Alignment > NumBytes)
1917       Alignment = NumBytes;
1918     if (Alignment < 8 && Alignment < NumBytes)
1919       Alignment = 0;
1920     // Alignment must be a power of two; make sure of that.
1921     Alignment = (Alignment & -Alignment);
1922     if (Alignment == 1)
1923       Alignment = 0;
1924   }
1925   Align = CurDAG->getTargetConstant(Alignment, MVT::i32);
1926
1927   unsigned OpcodeIndex;
1928   switch (VT.getSimpleVT().SimpleTy) {
1929   default: llvm_unreachable("unhandled vld-dup type");
1930   case MVT::v8i8:  OpcodeIndex = 0; break;
1931   case MVT::v4i16: OpcodeIndex = 1; break;
1932   case MVT::v2f32:
1933   case MVT::v2i32: OpcodeIndex = 2; break;
1934   }
1935
1936   SDValue Pred = getAL(CurDAG);
1937   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
1938   SDValue SuperReg;
1939   unsigned Opc = Opcodes[OpcodeIndex];
1940   SmallVector<SDValue, 6> Ops;
1941   Ops.push_back(MemAddr);
1942   Ops.push_back(Align);
1943   if (isUpdating) {
1944     SDValue Inc = N->getOperand(2);
1945     Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc);
1946   }
1947   Ops.push_back(Pred);
1948   Ops.push_back(Reg0);
1949   Ops.push_back(Chain);
1950
1951   unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
1952   std::vector<EVT> ResTys;
1953   ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(), MVT::i64,ResTyElts));
1954   if (isUpdating)
1955     ResTys.push_back(MVT::i32);
1956   ResTys.push_back(MVT::Other);
1957   SDNode *VLdDup =
1958     CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(), Ops.size());
1959   cast<MachineSDNode>(VLdDup)->setMemRefs(MemOp, MemOp + 1);
1960   SuperReg = SDValue(VLdDup, 0);
1961
1962   // Extract the subregisters.
1963   assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
1964   unsigned SubIdx = ARM::dsub_0;
1965   for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
1966     ReplaceUses(SDValue(N, Vec),
1967                 CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, SuperReg));
1968   ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1));
1969   if (isUpdating)
1970     ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdDup, 2));
1971   return NULL;
1972 }
1973
1974 SDNode *ARMDAGToDAGISel::SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs,
1975                                     unsigned Opc) {
1976   assert(NumVecs >= 2 && NumVecs <= 4 && "VTBL NumVecs out-of-range");
1977   DebugLoc dl = N->getDebugLoc();
1978   EVT VT = N->getValueType(0);
1979   unsigned FirstTblReg = IsExt ? 2 : 1;
1980
1981   // Form a REG_SEQUENCE to force register allocation.
1982   SDValue RegSeq;
1983   SDValue V0 = N->getOperand(FirstTblReg + 0);
1984   SDValue V1 = N->getOperand(FirstTblReg + 1);
1985   if (NumVecs == 2)
1986     RegSeq = SDValue(PairDRegs(MVT::v16i8, V0, V1), 0);
1987   else {
1988     SDValue V2 = N->getOperand(FirstTblReg + 2);
1989     // If it's a vtbl3, form a quad D-register and leave the last part as
1990     // an undef.
1991     SDValue V3 = (NumVecs == 3)
1992       ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
1993       : N->getOperand(FirstTblReg + 3);
1994     RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0);
1995   }
1996
1997   SmallVector<SDValue, 6> Ops;
1998   if (IsExt)
1999     Ops.push_back(N->getOperand(1));
2000   Ops.push_back(RegSeq);
2001   Ops.push_back(N->getOperand(FirstTblReg + NumVecs));
2002   Ops.push_back(getAL(CurDAG)); // predicate
2003   Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // predicate register
2004   return CurDAG->getMachineNode(Opc, dl, VT, Ops.data(), Ops.size());
2005 }
2006
2007 SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDNode *N,
2008                                                      bool isSigned) {
2009   if (!Subtarget->hasV6T2Ops())
2010     return NULL;
2011
2012   unsigned Opc = isSigned ? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX)
2013     : (Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX);
2014
2015
2016   // For unsigned extracts, check for a shift right and mask
2017   unsigned And_imm = 0;
2018   if (N->getOpcode() == ISD::AND) {
2019     if (isOpcWithIntImmediate(N, ISD::AND, And_imm)) {
2020
2021       // The immediate is a mask of the low bits iff imm & (imm+1) == 0
2022       if (And_imm & (And_imm + 1))
2023         return NULL;
2024
2025       unsigned Srl_imm = 0;
2026       if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL,
2027                                 Srl_imm)) {
2028         assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
2029
2030         // Note: The width operand is encoded as width-1.
2031         unsigned Width = CountTrailingOnes_32(And_imm) - 1;
2032         unsigned LSB = Srl_imm;
2033         SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2034         SDValue Ops[] = { N->getOperand(0).getOperand(0),
2035                           CurDAG->getTargetConstant(LSB, MVT::i32),
2036                           CurDAG->getTargetConstant(Width, MVT::i32),
2037           getAL(CurDAG), Reg0 };
2038         return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 5);
2039       }
2040     }
2041     return NULL;
2042   }
2043
2044   // Otherwise, we're looking for a shift of a shift
2045   unsigned Shl_imm = 0;
2046   if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) {
2047     assert(Shl_imm > 0 && Shl_imm < 32 && "bad amount in shift node!");
2048     unsigned Srl_imm = 0;
2049     if (isInt32Immediate(N->getOperand(1), Srl_imm)) {
2050       assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
2051       // Note: The width operand is encoded as width-1.
2052       unsigned Width = 32 - Srl_imm - 1;
2053       int LSB = Srl_imm - Shl_imm;
2054       if (LSB < 0)
2055         return NULL;
2056       SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2057       SDValue Ops[] = { N->getOperand(0).getOperand(0),
2058                         CurDAG->getTargetConstant(LSB, MVT::i32),
2059                         CurDAG->getTargetConstant(Width, MVT::i32),
2060                         getAL(CurDAG), Reg0 };
2061       return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 5);
2062     }
2063   }
2064   return NULL;
2065 }
2066
2067 SDNode *ARMDAGToDAGISel::
2068 SelectT2CMOVShiftOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
2069                     ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag) {
2070   SDValue CPTmp0;
2071   SDValue CPTmp1;
2072   if (SelectT2ShifterOperandReg(TrueVal, CPTmp0, CPTmp1)) {
2073     unsigned SOVal = cast<ConstantSDNode>(CPTmp1)->getZExtValue();
2074     unsigned SOShOp = ARM_AM::getSORegShOp(SOVal);
2075     unsigned Opc = 0;
2076     switch (SOShOp) {
2077     case ARM_AM::lsl: Opc = ARM::t2MOVCClsl; break;
2078     case ARM_AM::lsr: Opc = ARM::t2MOVCClsr; break;
2079     case ARM_AM::asr: Opc = ARM::t2MOVCCasr; break;
2080     case ARM_AM::ror: Opc = ARM::t2MOVCCror; break;
2081     default:
2082       llvm_unreachable("Unknown so_reg opcode!");
2083       break;
2084     }
2085     SDValue SOShImm =
2086       CurDAG->getTargetConstant(ARM_AM::getSORegOffset(SOVal), MVT::i32);
2087     SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32);
2088     SDValue Ops[] = { FalseVal, CPTmp0, SOShImm, CC, CCR, InFlag };
2089     return CurDAG->SelectNodeTo(N, Opc, MVT::i32,Ops, 6);
2090   }
2091   return 0;
2092 }
2093
2094 SDNode *ARMDAGToDAGISel::
2095 SelectARMCMOVShiftOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
2096                      ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag) {
2097   SDValue CPTmp0;
2098   SDValue CPTmp1;
2099   SDValue CPTmp2;
2100   if (SelectImmShifterOperand(TrueVal, CPTmp0, CPTmp2)) {
2101     SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32);
2102     SDValue Ops[] = { FalseVal, CPTmp0, CPTmp2, CC, CCR, InFlag };
2103     return CurDAG->SelectNodeTo(N, ARM::MOVCCsi, MVT::i32, Ops, 6);
2104   }
2105
2106   if (SelectRegShifterOperand(TrueVal, CPTmp0, CPTmp1, CPTmp2)) {
2107     SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32);
2108     SDValue Ops[] = { FalseVal, CPTmp0, CPTmp1, CPTmp2, CC, CCR, InFlag };
2109     return CurDAG->SelectNodeTo(N, ARM::MOVCCsr, MVT::i32, Ops, 7);
2110   }
2111   return 0;
2112 }
2113
2114 SDNode *ARMDAGToDAGISel::
2115 SelectT2CMOVImmOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
2116                   ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag) {
2117   ConstantSDNode *T = dyn_cast<ConstantSDNode>(TrueVal);
2118   if (!T)
2119     return 0;
2120
2121   unsigned Opc = 0;
2122   unsigned TrueImm = T->getZExtValue();
2123   if (is_t2_so_imm(TrueImm)) {
2124     Opc = ARM::t2MOVCCi;
2125   } else if (TrueImm <= 0xffff) {
2126     Opc = ARM::t2MOVCCi16;
2127   } else if (is_t2_so_imm_not(TrueImm)) {
2128     TrueImm = ~TrueImm;
2129     Opc = ARM::t2MVNCCi;
2130   } else if (TrueVal.getNode()->hasOneUse() && Subtarget->hasV6T2Ops()) {
2131     // Large immediate.
2132     Opc = ARM::t2MOVCCi32imm;
2133   }
2134
2135   if (Opc) {
2136     SDValue True = CurDAG->getTargetConstant(TrueImm, MVT::i32);
2137     SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32);
2138     SDValue Ops[] = { FalseVal, True, CC, CCR, InFlag };
2139     return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 5);
2140   }
2141
2142   return 0;
2143 }
2144
2145 SDNode *ARMDAGToDAGISel::
2146 SelectARMCMOVImmOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
2147                    ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag) {
2148   ConstantSDNode *T = dyn_cast<ConstantSDNode>(TrueVal);
2149   if (!T)
2150     return 0;
2151
2152   unsigned Opc = 0;
2153   unsigned TrueImm = T->getZExtValue();
2154   bool isSoImm = is_so_imm(TrueImm);
2155   if (isSoImm) {
2156     Opc = ARM::MOVCCi;
2157   } else if (Subtarget->hasV6T2Ops() && TrueImm <= 0xffff) {
2158     Opc = ARM::MOVCCi16;
2159   } else if (is_so_imm_not(TrueImm)) {
2160     TrueImm = ~TrueImm;
2161     Opc = ARM::MVNCCi;
2162   } else if (TrueVal.getNode()->hasOneUse() &&
2163              (Subtarget->hasV6T2Ops() || ARM_AM::isSOImmTwoPartVal(TrueImm))) {
2164     // Large immediate.
2165     Opc = ARM::MOVCCi32imm;
2166   }
2167
2168   if (Opc) {
2169     SDValue True = CurDAG->getTargetConstant(TrueImm, MVT::i32);
2170     SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32);
2171     SDValue Ops[] = { FalseVal, True, CC, CCR, InFlag };
2172     return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 5);
2173   }
2174
2175   return 0;
2176 }
2177
2178 SDNode *ARMDAGToDAGISel::SelectCMOVOp(SDNode *N) {
2179   EVT VT = N->getValueType(0);
2180   SDValue FalseVal = N->getOperand(0);
2181   SDValue TrueVal  = N->getOperand(1);
2182   SDValue CC = N->getOperand(2);
2183   SDValue CCR = N->getOperand(3);
2184   SDValue InFlag = N->getOperand(4);
2185   assert(CC.getOpcode() == ISD::Constant);
2186   assert(CCR.getOpcode() == ISD::Register);
2187   ARMCC::CondCodes CCVal =
2188     (ARMCC::CondCodes)cast<ConstantSDNode>(CC)->getZExtValue();
2189
2190   if (!Subtarget->isThumb1Only() && VT == MVT::i32) {
2191     // Pattern: (ARMcmov:i32 GPR:i32:$false, so_reg:i32:$true, (imm:i32):$cc)
2192     // Emits: (MOVCCs:i32 GPR:i32:$false, so_reg:i32:$true, (imm:i32):$cc)
2193     // Pattern complexity = 18  cost = 1  size = 0
2194     SDValue CPTmp0;
2195     SDValue CPTmp1;
2196     SDValue CPTmp2;
2197     if (Subtarget->isThumb()) {
2198       SDNode *Res = SelectT2CMOVShiftOp(N, FalseVal, TrueVal,
2199                                         CCVal, CCR, InFlag);
2200       if (!Res)
2201         Res = SelectT2CMOVShiftOp(N, TrueVal, FalseVal,
2202                                ARMCC::getOppositeCondition(CCVal), CCR, InFlag);
2203       if (Res)
2204         return Res;
2205     } else {
2206       SDNode *Res = SelectARMCMOVShiftOp(N, FalseVal, TrueVal,
2207                                          CCVal, CCR, InFlag);
2208       if (!Res)
2209         Res = SelectARMCMOVShiftOp(N, TrueVal, FalseVal,
2210                                ARMCC::getOppositeCondition(CCVal), CCR, InFlag);
2211       if (Res)
2212         return Res;
2213     }
2214
2215     // Pattern: (ARMcmov:i32 GPR:i32:$false,
2216     //             (imm:i32)<<P:Pred_so_imm>>:$true,
2217     //             (imm:i32):$cc)
2218     // Emits: (MOVCCi:i32 GPR:i32:$false,
2219     //           (so_imm:i32 (imm:i32):$true), (imm:i32):$cc)
2220     // Pattern complexity = 10  cost = 1  size = 0
2221     if (Subtarget->isThumb()) {
2222       SDNode *Res = SelectT2CMOVImmOp(N, FalseVal, TrueVal,
2223                                         CCVal, CCR, InFlag);
2224       if (!Res)
2225         Res = SelectT2CMOVImmOp(N, TrueVal, FalseVal,
2226                                ARMCC::getOppositeCondition(CCVal), CCR, InFlag);
2227       if (Res)
2228         return Res;
2229     } else {
2230       SDNode *Res = SelectARMCMOVImmOp(N, FalseVal, TrueVal,
2231                                          CCVal, CCR, InFlag);
2232       if (!Res)
2233         Res = SelectARMCMOVImmOp(N, TrueVal, FalseVal,
2234                                ARMCC::getOppositeCondition(CCVal), CCR, InFlag);
2235       if (Res)
2236         return Res;
2237     }
2238   }
2239
2240   // Pattern: (ARMcmov:i32 GPR:i32:$false, GPR:i32:$true, (imm:i32):$cc)
2241   // Emits: (MOVCCr:i32 GPR:i32:$false, GPR:i32:$true, (imm:i32):$cc)
2242   // Pattern complexity = 6  cost = 1  size = 0
2243   //
2244   // Pattern: (ARMcmov:i32 GPR:i32:$false, GPR:i32:$true, (imm:i32):$cc)
2245   // Emits: (tMOVCCr:i32 GPR:i32:$false, GPR:i32:$true, (imm:i32):$cc)
2246   // Pattern complexity = 6  cost = 11  size = 0
2247   //
2248   // Also VMOVScc and VMOVDcc.
2249   SDValue Tmp2 = CurDAG->getTargetConstant(CCVal, MVT::i32);
2250   SDValue Ops[] = { FalseVal, TrueVal, Tmp2, CCR, InFlag };
2251   unsigned Opc = 0;
2252   switch (VT.getSimpleVT().SimpleTy) {
2253   default: assert(false && "Illegal conditional move type!");
2254     break;
2255   case MVT::i32:
2256     Opc = Subtarget->isThumb()
2257       ? (Subtarget->hasThumb2() ? ARM::t2MOVCCr : ARM::tMOVCCr_pseudo)
2258       : ARM::MOVCCr;
2259     break;
2260   case MVT::f32:
2261     Opc = ARM::VMOVScc;
2262     break;
2263   case MVT::f64:
2264     Opc = ARM::VMOVDcc;
2265     break;
2266   }
2267   return CurDAG->SelectNodeTo(N, Opc, VT, Ops, 5);
2268 }
2269
2270 SDNode *ARMDAGToDAGISel::SelectConcatVector(SDNode *N) {
2271   // The only time a CONCAT_VECTORS operation can have legal types is when
2272   // two 64-bit vectors are concatenated to a 128-bit vector.
2273   EVT VT = N->getValueType(0);
2274   if (!VT.is128BitVector() || N->getNumOperands() != 2)
2275     llvm_unreachable("unexpected CONCAT_VECTORS");
2276   return PairDRegs(VT, N->getOperand(0), N->getOperand(1));
2277 }
2278
2279 SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
2280   DebugLoc dl = N->getDebugLoc();
2281
2282   if (N->isMachineOpcode())
2283     return NULL;   // Already selected.
2284
2285   switch (N->getOpcode()) {
2286   default: break;
2287   case ISD::Constant: {
2288     unsigned Val = cast<ConstantSDNode>(N)->getZExtValue();
2289     bool UseCP = true;
2290     if (Subtarget->hasThumb2())
2291       // Thumb2-aware targets have the MOVT instruction, so all immediates can
2292       // be done with MOV + MOVT, at worst.
2293       UseCP = 0;
2294     else {
2295       if (Subtarget->isThumb()) {
2296         UseCP = (Val > 255 &&                          // MOV
2297                  ~Val > 255 &&                         // MOV + MVN
2298                  !ARM_AM::isThumbImmShiftedVal(Val));  // MOV + LSL
2299       } else
2300         UseCP = (ARM_AM::getSOImmVal(Val) == -1 &&     // MOV
2301                  ARM_AM::getSOImmVal(~Val) == -1 &&    // MVN
2302                  !ARM_AM::isSOImmTwoPartVal(Val));     // two instrs.
2303     }
2304
2305     if (UseCP) {
2306       SDValue CPIdx =
2307         CurDAG->getTargetConstantPool(ConstantInt::get(
2308                                   Type::getInt32Ty(*CurDAG->getContext()), Val),
2309                                       TLI.getPointerTy());
2310
2311       SDNode *ResNode;
2312       if (Subtarget->isThumb1Only()) {
2313         SDValue Pred = getAL(CurDAG);
2314         SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
2315         SDValue Ops[] = { CPIdx, Pred, PredReg, CurDAG->getEntryNode() };
2316         ResNode = CurDAG->getMachineNode(ARM::tLDRpci, dl, MVT::i32, MVT::Other,
2317                                          Ops, 4);
2318       } else {
2319         SDValue Ops[] = {
2320           CPIdx,
2321           CurDAG->getTargetConstant(0, MVT::i32),
2322           getAL(CurDAG),
2323           CurDAG->getRegister(0, MVT::i32),
2324           CurDAG->getEntryNode()
2325         };
2326         ResNode=CurDAG->getMachineNode(ARM::LDRcp, dl, MVT::i32, MVT::Other,
2327                                        Ops, 5);
2328       }
2329       ReplaceUses(SDValue(N, 0), SDValue(ResNode, 0));
2330       return NULL;
2331     }
2332
2333     // Other cases are autogenerated.
2334     break;
2335   }
2336   case ISD::FrameIndex: {
2337     // Selects to ADDri FI, 0 which in turn will become ADDri SP, imm.
2338     int FI = cast<FrameIndexSDNode>(N)->getIndex();
2339     SDValue TFI = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
2340     if (Subtarget->isThumb1Only()) {
2341       SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, MVT::i32),
2342                         getAL(CurDAG), CurDAG->getRegister(0, MVT::i32) };
2343       return CurDAG->SelectNodeTo(N, ARM::tADDrSPi, MVT::i32, Ops, 4);
2344     } else {
2345       unsigned Opc = ((Subtarget->isThumb() && Subtarget->hasThumb2()) ?
2346                       ARM::t2ADDri : ARM::ADDri);
2347       SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, MVT::i32),
2348                         getAL(CurDAG), CurDAG->getRegister(0, MVT::i32),
2349                         CurDAG->getRegister(0, MVT::i32) };
2350       return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 5);
2351     }
2352   }
2353   case ISD::SRL:
2354     if (SDNode *I = SelectV6T2BitfieldExtractOp(N, false))
2355       return I;
2356     break;
2357   case ISD::SRA:
2358     if (SDNode *I = SelectV6T2BitfieldExtractOp(N, true))
2359       return I;
2360     break;
2361   case ISD::MUL:
2362     if (Subtarget->isThumb1Only())
2363       break;
2364     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
2365       unsigned RHSV = C->getZExtValue();
2366       if (!RHSV) break;
2367       if (isPowerOf2_32(RHSV-1)) {  // 2^n+1?
2368         unsigned ShImm = Log2_32(RHSV-1);
2369         if (ShImm >= 32)
2370           break;
2371         SDValue V = N->getOperand(0);
2372         ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
2373         SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, MVT::i32);
2374         SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2375         if (Subtarget->isThumb()) {
2376           SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG), Reg0, Reg0 };
2377           return CurDAG->SelectNodeTo(N, ARM::t2ADDrs, MVT::i32, Ops, 6);
2378         } else {
2379           SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG), Reg0, Reg0 };
2380           return CurDAG->SelectNodeTo(N, ARM::ADDrsi, MVT::i32, Ops, 7);
2381         }
2382       }
2383       if (isPowerOf2_32(RHSV+1)) {  // 2^n-1?
2384         unsigned ShImm = Log2_32(RHSV+1);
2385         if (ShImm >= 32)
2386           break;
2387         SDValue V = N->getOperand(0);
2388         ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
2389         SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, MVT::i32);
2390         SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2391         if (Subtarget->isThumb()) {
2392           SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG), Reg0, Reg0 };
2393           return CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops, 6);
2394         } else {
2395           SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG), Reg0, Reg0 };
2396           return CurDAG->SelectNodeTo(N, ARM::RSBrsi, MVT::i32, Ops, 7);
2397         }
2398       }
2399     }
2400     break;
2401   case ISD::AND: {
2402     // Check for unsigned bitfield extract
2403     if (SDNode *I = SelectV6T2BitfieldExtractOp(N, false))
2404       return I;
2405
2406     // (and (or x, c2), c1) and top 16-bits of c1 and c2 match, lower 16-bits
2407     // of c1 are 0xffff, and lower 16-bit of c2 are 0. That is, the top 16-bits
2408     // are entirely contributed by c2 and lower 16-bits are entirely contributed
2409     // by x. That's equal to (or (and x, 0xffff), (and c1, 0xffff0000)).
2410     // Select it to: "movt x, ((c1 & 0xffff) >> 16)
2411     EVT VT = N->getValueType(0);
2412     if (VT != MVT::i32)
2413       break;
2414     unsigned Opc = (Subtarget->isThumb() && Subtarget->hasThumb2())
2415       ? ARM::t2MOVTi16
2416       : (Subtarget->hasV6T2Ops() ? ARM::MOVTi16 : 0);
2417     if (!Opc)
2418       break;
2419     SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
2420     ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2421     if (!N1C)
2422       break;
2423     if (N0.getOpcode() == ISD::OR && N0.getNode()->hasOneUse()) {
2424       SDValue N2 = N0.getOperand(1);
2425       ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
2426       if (!N2C)
2427         break;
2428       unsigned N1CVal = N1C->getZExtValue();
2429       unsigned N2CVal = N2C->getZExtValue();
2430       if ((N1CVal & 0xffff0000U) == (N2CVal & 0xffff0000U) &&
2431           (N1CVal & 0xffffU) == 0xffffU &&
2432           (N2CVal & 0xffffU) == 0x0U) {
2433         SDValue Imm16 = CurDAG->getTargetConstant((N2CVal & 0xFFFF0000U) >> 16,
2434                                                   MVT::i32);
2435         SDValue Ops[] = { N0.getOperand(0), Imm16,
2436                           getAL(CurDAG), CurDAG->getRegister(0, MVT::i32) };
2437         return CurDAG->getMachineNode(Opc, dl, VT, Ops, 4);
2438       }
2439     }
2440     break;
2441   }
2442   case ARMISD::VMOVRRD:
2443     return CurDAG->getMachineNode(ARM::VMOVRRD, dl, MVT::i32, MVT::i32,
2444                                   N->getOperand(0), getAL(CurDAG),
2445                                   CurDAG->getRegister(0, MVT::i32));
2446   case ISD::UMUL_LOHI: {
2447     if (Subtarget->isThumb1Only())
2448       break;
2449     if (Subtarget->isThumb()) {
2450       SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
2451                         getAL(CurDAG), CurDAG->getRegister(0, MVT::i32),
2452                         CurDAG->getRegister(0, MVT::i32) };
2453       return CurDAG->getMachineNode(ARM::t2UMULL, dl, MVT::i32, MVT::i32,Ops,4);
2454     } else {
2455       SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
2456                         getAL(CurDAG), CurDAG->getRegister(0, MVT::i32),
2457                         CurDAG->getRegister(0, MVT::i32) };
2458       return CurDAG->getMachineNode(Subtarget->hasV6Ops() ?
2459                                     ARM::UMULL : ARM::UMULLv5,
2460                                     dl, MVT::i32, MVT::i32, Ops, 5);
2461     }
2462   }
2463   case ISD::SMUL_LOHI: {
2464     if (Subtarget->isThumb1Only())
2465       break;
2466     if (Subtarget->isThumb()) {
2467       SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
2468                         getAL(CurDAG), CurDAG->getRegister(0, MVT::i32) };
2469       return CurDAG->getMachineNode(ARM::t2SMULL, dl, MVT::i32, MVT::i32,Ops,4);
2470     } else {
2471       SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
2472                         getAL(CurDAG), CurDAG->getRegister(0, MVT::i32),
2473                         CurDAG->getRegister(0, MVT::i32) };
2474       return CurDAG->getMachineNode(Subtarget->hasV6Ops() ?
2475                                     ARM::SMULL : ARM::SMULLv5,
2476                                     dl, MVT::i32, MVT::i32, Ops, 5);
2477     }
2478   }
2479   case ISD::LOAD: {
2480     SDNode *ResNode = 0;
2481     if (Subtarget->isThumb() && Subtarget->hasThumb2())
2482       ResNode = SelectT2IndexedLoad(N);
2483     else
2484       ResNode = SelectARMIndexedLoad(N);
2485     if (ResNode)
2486       return ResNode;
2487     // Other cases are autogenerated.
2488     break;
2489   }
2490   case ARMISD::BRCOND: {
2491     // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
2492     // Emits: (Bcc:void (bb:Other):$dst, (imm:i32):$cc)
2493     // Pattern complexity = 6  cost = 1  size = 0
2494
2495     // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
2496     // Emits: (tBcc:void (bb:Other):$dst, (imm:i32):$cc)
2497     // Pattern complexity = 6  cost = 1  size = 0
2498
2499     // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
2500     // Emits: (t2Bcc:void (bb:Other):$dst, (imm:i32):$cc)
2501     // Pattern complexity = 6  cost = 1  size = 0
2502
2503     unsigned Opc = Subtarget->isThumb() ?
2504       ((Subtarget->hasThumb2()) ? ARM::t2Bcc : ARM::tBcc) : ARM::Bcc;
2505     SDValue Chain = N->getOperand(0);
2506     SDValue N1 = N->getOperand(1);
2507     SDValue N2 = N->getOperand(2);
2508     SDValue N3 = N->getOperand(3);
2509     SDValue InFlag = N->getOperand(4);
2510     assert(N1.getOpcode() == ISD::BasicBlock);
2511     assert(N2.getOpcode() == ISD::Constant);
2512     assert(N3.getOpcode() == ISD::Register);
2513
2514     SDValue Tmp2 = CurDAG->getTargetConstant(((unsigned)
2515                                cast<ConstantSDNode>(N2)->getZExtValue()),
2516                                MVT::i32);
2517     SDValue Ops[] = { N1, Tmp2, N3, Chain, InFlag };
2518     SDNode *ResNode = CurDAG->getMachineNode(Opc, dl, MVT::Other,
2519                                              MVT::Glue, Ops, 5);
2520     Chain = SDValue(ResNode, 0);
2521     if (N->getNumValues() == 2) {
2522       InFlag = SDValue(ResNode, 1);
2523       ReplaceUses(SDValue(N, 1), InFlag);
2524     }
2525     ReplaceUses(SDValue(N, 0),
2526                 SDValue(Chain.getNode(), Chain.getResNo()));
2527     return NULL;
2528   }
2529   case ARMISD::CMOV:
2530     return SelectCMOVOp(N);
2531   case ARMISD::VZIP: {
2532     unsigned Opc = 0;
2533     EVT VT = N->getValueType(0);
2534     switch (VT.getSimpleVT().SimpleTy) {
2535     default: return NULL;
2536     case MVT::v8i8:  Opc = ARM::VZIPd8; break;
2537     case MVT::v4i16: Opc = ARM::VZIPd16; break;
2538     case MVT::v2f32:
2539     case MVT::v2i32: Opc = ARM::VZIPd32; break;
2540     case MVT::v16i8: Opc = ARM::VZIPq8; break;
2541     case MVT::v8i16: Opc = ARM::VZIPq16; break;
2542     case MVT::v4f32:
2543     case MVT::v4i32: Opc = ARM::VZIPq32; break;
2544     }
2545     SDValue Pred = getAL(CurDAG);
2546     SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
2547     SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
2548     return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops, 4);
2549   }
2550   case ARMISD::VUZP: {
2551     unsigned Opc = 0;
2552     EVT VT = N->getValueType(0);
2553     switch (VT.getSimpleVT().SimpleTy) {
2554     default: return NULL;
2555     case MVT::v8i8:  Opc = ARM::VUZPd8; break;
2556     case MVT::v4i16: Opc = ARM::VUZPd16; break;
2557     case MVT::v2f32:
2558     case MVT::v2i32: Opc = ARM::VUZPd32; break;
2559     case MVT::v16i8: Opc = ARM::VUZPq8; break;
2560     case MVT::v8i16: Opc = ARM::VUZPq16; break;
2561     case MVT::v4f32:
2562     case MVT::v4i32: Opc = ARM::VUZPq32; break;
2563     }
2564     SDValue Pred = getAL(CurDAG);
2565     SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
2566     SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
2567     return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops, 4);
2568   }
2569   case ARMISD::VTRN: {
2570     unsigned Opc = 0;
2571     EVT VT = N->getValueType(0);
2572     switch (VT.getSimpleVT().SimpleTy) {
2573     default: return NULL;
2574     case MVT::v8i8:  Opc = ARM::VTRNd8; break;
2575     case MVT::v4i16: Opc = ARM::VTRNd16; break;
2576     case MVT::v2f32:
2577     case MVT::v2i32: Opc = ARM::VTRNd32; break;
2578     case MVT::v16i8: Opc = ARM::VTRNq8; break;
2579     case MVT::v8i16: Opc = ARM::VTRNq16; break;
2580     case MVT::v4f32:
2581     case MVT::v4i32: Opc = ARM::VTRNq32; break;
2582     }
2583     SDValue Pred = getAL(CurDAG);
2584     SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
2585     SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
2586     return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops, 4);
2587   }
2588   case ARMISD::BUILD_VECTOR: {
2589     EVT VecVT = N->getValueType(0);
2590     EVT EltVT = VecVT.getVectorElementType();
2591     unsigned NumElts = VecVT.getVectorNumElements();
2592     if (EltVT == MVT::f64) {
2593       assert(NumElts == 2 && "unexpected type for BUILD_VECTOR");
2594       return PairDRegs(VecVT, N->getOperand(0), N->getOperand(1));
2595     }
2596     assert(EltVT == MVT::f32 && "unexpected type for BUILD_VECTOR");
2597     if (NumElts == 2)
2598       return PairSRegs(VecVT, N->getOperand(0), N->getOperand(1));
2599     assert(NumElts == 4 && "unexpected type for BUILD_VECTOR");
2600     return QuadSRegs(VecVT, N->getOperand(0), N->getOperand(1),
2601                      N->getOperand(2), N->getOperand(3));
2602   }
2603
2604   case ARMISD::VLD2DUP: {
2605     unsigned Opcodes[] = { ARM::VLD2DUPd8Pseudo, ARM::VLD2DUPd16Pseudo,
2606                            ARM::VLD2DUPd32Pseudo };
2607     return SelectVLDDup(N, false, 2, Opcodes);
2608   }
2609
2610   case ARMISD::VLD3DUP: {
2611     unsigned Opcodes[] = { ARM::VLD3DUPd8Pseudo, ARM::VLD3DUPd16Pseudo,
2612                            ARM::VLD3DUPd32Pseudo };
2613     return SelectVLDDup(N, false, 3, Opcodes);
2614   }
2615
2616   case ARMISD::VLD4DUP: {
2617     unsigned Opcodes[] = { ARM::VLD4DUPd8Pseudo, ARM::VLD4DUPd16Pseudo,
2618                            ARM::VLD4DUPd32Pseudo };
2619     return SelectVLDDup(N, false, 4, Opcodes);
2620   }
2621
2622   case ARMISD::VLD2DUP_UPD: {
2623     unsigned Opcodes[] = { ARM::VLD2DUPd8Pseudo_UPD, ARM::VLD2DUPd16Pseudo_UPD,
2624                            ARM::VLD2DUPd32Pseudo_UPD };
2625     return SelectVLDDup(N, true, 2, Opcodes);
2626   }
2627
2628   case ARMISD::VLD3DUP_UPD: {
2629     unsigned Opcodes[] = { ARM::VLD3DUPd8Pseudo_UPD, ARM::VLD3DUPd16Pseudo_UPD,
2630                            ARM::VLD3DUPd32Pseudo_UPD };
2631     return SelectVLDDup(N, true, 3, Opcodes);
2632   }
2633
2634   case ARMISD::VLD4DUP_UPD: {
2635     unsigned Opcodes[] = { ARM::VLD4DUPd8Pseudo_UPD, ARM::VLD4DUPd16Pseudo_UPD,
2636                            ARM::VLD4DUPd32Pseudo_UPD };
2637     return SelectVLDDup(N, true, 4, Opcodes);
2638   }
2639
2640   case ARMISD::VLD1_UPD: {
2641     unsigned DOpcodes[] = { ARM::VLD1d8_UPD, ARM::VLD1d16_UPD,
2642                             ARM::VLD1d32_UPD, ARM::VLD1d64_UPD };
2643     unsigned QOpcodes[] = { ARM::VLD1q8Pseudo_UPD, ARM::VLD1q16Pseudo_UPD,
2644                             ARM::VLD1q32Pseudo_UPD, ARM::VLD1q64Pseudo_UPD };
2645     return SelectVLD(N, true, 1, DOpcodes, QOpcodes, 0);
2646   }
2647
2648   case ARMISD::VLD2_UPD: {
2649     unsigned DOpcodes[] = { ARM::VLD2d8Pseudo_UPD, ARM::VLD2d16Pseudo_UPD,
2650                             ARM::VLD2d32Pseudo_UPD, ARM::VLD1q64Pseudo_UPD };
2651     unsigned QOpcodes[] = { ARM::VLD2q8Pseudo_UPD, ARM::VLD2q16Pseudo_UPD,
2652                             ARM::VLD2q32Pseudo_UPD };
2653     return SelectVLD(N, true, 2, DOpcodes, QOpcodes, 0);
2654   }
2655
2656   case ARMISD::VLD3_UPD: {
2657     unsigned DOpcodes[] = { ARM::VLD3d8Pseudo_UPD, ARM::VLD3d16Pseudo_UPD,
2658                             ARM::VLD3d32Pseudo_UPD, ARM::VLD1d64TPseudo_UPD };
2659     unsigned QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
2660                              ARM::VLD3q16Pseudo_UPD,
2661                              ARM::VLD3q32Pseudo_UPD };
2662     unsigned QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD,
2663                              ARM::VLD3q16oddPseudo_UPD,
2664                              ARM::VLD3q32oddPseudo_UPD };
2665     return SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
2666   }
2667
2668   case ARMISD::VLD4_UPD: {
2669     unsigned DOpcodes[] = { ARM::VLD4d8Pseudo_UPD, ARM::VLD4d16Pseudo_UPD,
2670                             ARM::VLD4d32Pseudo_UPD, ARM::VLD1d64QPseudo_UPD };
2671     unsigned QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
2672                              ARM::VLD4q16Pseudo_UPD,
2673                              ARM::VLD4q32Pseudo_UPD };
2674     unsigned QOpcodes1[] = { ARM::VLD4q8oddPseudo_UPD,
2675                              ARM::VLD4q16oddPseudo_UPD,
2676                              ARM::VLD4q32oddPseudo_UPD };
2677     return SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
2678   }
2679
2680   case ARMISD::VLD2LN_UPD: {
2681     unsigned DOpcodes[] = { ARM::VLD2LNd8Pseudo_UPD, ARM::VLD2LNd16Pseudo_UPD,
2682                             ARM::VLD2LNd32Pseudo_UPD };
2683     unsigned QOpcodes[] = { ARM::VLD2LNq16Pseudo_UPD,
2684                             ARM::VLD2LNq32Pseudo_UPD };
2685     return SelectVLDSTLane(N, true, true, 2, DOpcodes, QOpcodes);
2686   }
2687
2688   case ARMISD::VLD3LN_UPD: {
2689     unsigned DOpcodes[] = { ARM::VLD3LNd8Pseudo_UPD, ARM::VLD3LNd16Pseudo_UPD,
2690                             ARM::VLD3LNd32Pseudo_UPD };
2691     unsigned QOpcodes[] = { ARM::VLD3LNq16Pseudo_UPD,
2692                             ARM::VLD3LNq32Pseudo_UPD };
2693     return SelectVLDSTLane(N, true, true, 3, DOpcodes, QOpcodes);
2694   }
2695
2696   case ARMISD::VLD4LN_UPD: {
2697     unsigned DOpcodes[] = { ARM::VLD4LNd8Pseudo_UPD, ARM::VLD4LNd16Pseudo_UPD,
2698                             ARM::VLD4LNd32Pseudo_UPD };
2699     unsigned QOpcodes[] = { ARM::VLD4LNq16Pseudo_UPD,
2700                             ARM::VLD4LNq32Pseudo_UPD };
2701     return SelectVLDSTLane(N, true, true, 4, DOpcodes, QOpcodes);
2702   }
2703
2704   case ARMISD::VST1_UPD: {
2705     unsigned DOpcodes[] = { ARM::VST1d8_UPD, ARM::VST1d16_UPD,
2706                             ARM::VST1d32_UPD, ARM::VST1d64_UPD };
2707     unsigned QOpcodes[] = { ARM::VST1q8Pseudo_UPD, ARM::VST1q16Pseudo_UPD,
2708                             ARM::VST1q32Pseudo_UPD, ARM::VST1q64Pseudo_UPD };
2709     return SelectVST(N, true, 1, DOpcodes, QOpcodes, 0);
2710   }
2711
2712   case ARMISD::VST2_UPD: {
2713     unsigned DOpcodes[] = { ARM::VST2d8Pseudo_UPD, ARM::VST2d16Pseudo_UPD,
2714                             ARM::VST2d32Pseudo_UPD, ARM::VST1q64Pseudo_UPD };
2715     unsigned QOpcodes[] = { ARM::VST2q8Pseudo_UPD, ARM::VST2q16Pseudo_UPD,
2716                             ARM::VST2q32Pseudo_UPD };
2717     return SelectVST(N, true, 2, DOpcodes, QOpcodes, 0);
2718   }
2719
2720   case ARMISD::VST3_UPD: {
2721     unsigned DOpcodes[] = { ARM::VST3d8Pseudo_UPD, ARM::VST3d16Pseudo_UPD,
2722                             ARM::VST3d32Pseudo_UPD, ARM::VST1d64TPseudo_UPD };
2723     unsigned QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
2724                              ARM::VST3q16Pseudo_UPD,
2725                              ARM::VST3q32Pseudo_UPD };
2726     unsigned QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD,
2727                              ARM::VST3q16oddPseudo_UPD,
2728                              ARM::VST3q32oddPseudo_UPD };
2729     return SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
2730   }
2731
2732   case ARMISD::VST4_UPD: {
2733     unsigned DOpcodes[] = { ARM::VST4d8Pseudo_UPD, ARM::VST4d16Pseudo_UPD,
2734                             ARM::VST4d32Pseudo_UPD, ARM::VST1d64QPseudo_UPD };
2735     unsigned QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
2736                              ARM::VST4q16Pseudo_UPD,
2737                              ARM::VST4q32Pseudo_UPD };
2738     unsigned QOpcodes1[] = { ARM::VST4q8oddPseudo_UPD,
2739                              ARM::VST4q16oddPseudo_UPD,
2740                              ARM::VST4q32oddPseudo_UPD };
2741     return SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
2742   }
2743
2744   case ARMISD::VST2LN_UPD: {
2745     unsigned DOpcodes[] = { ARM::VST2LNd8Pseudo_UPD, ARM::VST2LNd16Pseudo_UPD,
2746                             ARM::VST2LNd32Pseudo_UPD };
2747     unsigned QOpcodes[] = { ARM::VST2LNq16Pseudo_UPD,
2748                             ARM::VST2LNq32Pseudo_UPD };
2749     return SelectVLDSTLane(N, false, true, 2, DOpcodes, QOpcodes);
2750   }
2751
2752   case ARMISD::VST3LN_UPD: {
2753     unsigned DOpcodes[] = { ARM::VST3LNd8Pseudo_UPD, ARM::VST3LNd16Pseudo_UPD,
2754                             ARM::VST3LNd32Pseudo_UPD };
2755     unsigned QOpcodes[] = { ARM::VST3LNq16Pseudo_UPD,
2756                             ARM::VST3LNq32Pseudo_UPD };
2757     return SelectVLDSTLane(N, false, true, 3, DOpcodes, QOpcodes);
2758   }
2759
2760   case ARMISD::VST4LN_UPD: {
2761     unsigned DOpcodes[] = { ARM::VST4LNd8Pseudo_UPD, ARM::VST4LNd16Pseudo_UPD,
2762                             ARM::VST4LNd32Pseudo_UPD };
2763     unsigned QOpcodes[] = { ARM::VST4LNq16Pseudo_UPD,
2764                             ARM::VST4LNq32Pseudo_UPD };
2765     return SelectVLDSTLane(N, false, true, 4, DOpcodes, QOpcodes);
2766   }
2767
2768   case ISD::INTRINSIC_VOID:
2769   case ISD::INTRINSIC_W_CHAIN: {
2770     unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
2771     switch (IntNo) {
2772     default:
2773       break;
2774
2775     case Intrinsic::arm_ldrexd: {
2776       SDValue MemAddr = N->getOperand(2);
2777       DebugLoc dl = N->getDebugLoc();
2778       SDValue Chain = N->getOperand(0);
2779
2780       unsigned NewOpc = ARM::LDREXD;
2781       if (Subtarget->isThumb() && Subtarget->hasThumb2())
2782         NewOpc = ARM::t2LDREXD;
2783
2784       // arm_ldrexd returns a i64 value in {i32, i32}
2785       std::vector<EVT> ResTys;
2786       ResTys.push_back(MVT::i32);
2787       ResTys.push_back(MVT::i32);
2788       ResTys.push_back(MVT::Other);
2789
2790       // place arguments in the right order
2791       SmallVector<SDValue, 7> Ops;
2792       Ops.push_back(MemAddr);
2793       Ops.push_back(getAL(CurDAG));
2794       Ops.push_back(CurDAG->getRegister(0, MVT::i32));
2795       Ops.push_back(Chain);
2796       SDNode *Ld = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops.data(),
2797                                           Ops.size());
2798       // Transfer memoperands.
2799       MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
2800       MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2801       cast<MachineSDNode>(Ld)->setMemRefs(MemOp, MemOp + 1);
2802
2803       // Until there's support for specifing explicit register constraints
2804       // like the use of even/odd register pair, hardcode ldrexd to always
2805       // use the pair [R0, R1] to hold the load result.
2806       Chain = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, ARM::R0,
2807                                    SDValue(Ld, 0), SDValue(0,0));
2808       Chain = CurDAG->getCopyToReg(Chain, dl, ARM::R1,
2809                                    SDValue(Ld, 1), Chain.getValue(1));
2810
2811       // Remap uses.
2812       SDValue Glue = Chain.getValue(1);
2813       if (!SDValue(N, 0).use_empty()) {
2814         SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
2815                                                 ARM::R0, MVT::i32, Glue);
2816         Glue = Result.getValue(2);
2817         ReplaceUses(SDValue(N, 0), Result);
2818       }
2819       if (!SDValue(N, 1).use_empty()) {
2820         SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
2821                                                 ARM::R1, MVT::i32, Glue);
2822         Glue = Result.getValue(2);
2823         ReplaceUses(SDValue(N, 1), Result);
2824       }
2825
2826       ReplaceUses(SDValue(N, 2), SDValue(Ld, 2));
2827       return NULL;
2828     }
2829
2830     case Intrinsic::arm_strexd: {
2831       DebugLoc dl = N->getDebugLoc();
2832       SDValue Chain = N->getOperand(0);
2833       SDValue Val0 = N->getOperand(2);
2834       SDValue Val1 = N->getOperand(3);
2835       SDValue MemAddr = N->getOperand(4);
2836
2837       // Until there's support for specifing explicit register constraints
2838       // like the use of even/odd register pair, hardcode strexd to always
2839       // use the pair [R2, R3] to hold the i64 (i32, i32) value to be stored.
2840       Chain = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, ARM::R2, Val0,
2841                                    SDValue(0, 0));
2842       Chain = CurDAG->getCopyToReg(Chain, dl, ARM::R3, Val1, Chain.getValue(1));
2843
2844       SDValue Glue = Chain.getValue(1);
2845       Val0 = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
2846                                     ARM::R2, MVT::i32, Glue);
2847       Glue = Val0.getValue(1);
2848       Val1 = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
2849                                     ARM::R3, MVT::i32, Glue);
2850
2851       // Store exclusive double return a i32 value which is the return status
2852       // of the issued store.
2853       std::vector<EVT> ResTys;
2854       ResTys.push_back(MVT::i32);
2855       ResTys.push_back(MVT::Other);
2856
2857       // place arguments in the right order
2858       SmallVector<SDValue, 7> Ops;
2859       Ops.push_back(Val0);
2860       Ops.push_back(Val1);
2861       Ops.push_back(MemAddr);
2862       Ops.push_back(getAL(CurDAG));
2863       Ops.push_back(CurDAG->getRegister(0, MVT::i32));
2864       Ops.push_back(Chain);
2865
2866       unsigned NewOpc = ARM::STREXD;
2867       if (Subtarget->isThumb() && Subtarget->hasThumb2())
2868         NewOpc = ARM::t2STREXD;
2869
2870       SDNode *St = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops.data(),
2871                                           Ops.size());
2872       // Transfer memoperands.
2873       MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
2874       MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2875       cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1);
2876
2877       return St;
2878     }
2879
2880     case Intrinsic::arm_neon_vld1: {
2881       unsigned DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16,
2882                               ARM::VLD1d32, ARM::VLD1d64 };
2883       unsigned QOpcodes[] = { ARM::VLD1q8Pseudo, ARM::VLD1q16Pseudo,
2884                               ARM::VLD1q32Pseudo, ARM::VLD1q64Pseudo };
2885       return SelectVLD(N, false, 1, DOpcodes, QOpcodes, 0);
2886     }
2887
2888     case Intrinsic::arm_neon_vld2: {
2889       unsigned DOpcodes[] = { ARM::VLD2d8Pseudo, ARM::VLD2d16Pseudo,
2890                               ARM::VLD2d32Pseudo, ARM::VLD1q64Pseudo };
2891       unsigned QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo,
2892                               ARM::VLD2q32Pseudo };
2893       return SelectVLD(N, false, 2, DOpcodes, QOpcodes, 0);
2894     }
2895
2896     case Intrinsic::arm_neon_vld3: {
2897       unsigned DOpcodes[] = { ARM::VLD3d8Pseudo, ARM::VLD3d16Pseudo,
2898                               ARM::VLD3d32Pseudo, ARM::VLD1d64TPseudo };
2899       unsigned QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
2900                                ARM::VLD3q16Pseudo_UPD,
2901                                ARM::VLD3q32Pseudo_UPD };
2902       unsigned QOpcodes1[] = { ARM::VLD3q8oddPseudo,
2903                                ARM::VLD3q16oddPseudo,
2904                                ARM::VLD3q32oddPseudo };
2905       return SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
2906     }
2907
2908     case Intrinsic::arm_neon_vld4: {
2909       unsigned DOpcodes[] = { ARM::VLD4d8Pseudo, ARM::VLD4d16Pseudo,
2910                               ARM::VLD4d32Pseudo, ARM::VLD1d64QPseudo };
2911       unsigned QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
2912                                ARM::VLD4q16Pseudo_UPD,
2913                                ARM::VLD4q32Pseudo_UPD };
2914       unsigned QOpcodes1[] = { ARM::VLD4q8oddPseudo,
2915                                ARM::VLD4q16oddPseudo,
2916                                ARM::VLD4q32oddPseudo };
2917       return SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
2918     }
2919
2920     case Intrinsic::arm_neon_vld2lane: {
2921       unsigned DOpcodes[] = { ARM::VLD2LNd8Pseudo, ARM::VLD2LNd16Pseudo,
2922                               ARM::VLD2LNd32Pseudo };
2923       unsigned QOpcodes[] = { ARM::VLD2LNq16Pseudo, ARM::VLD2LNq32Pseudo };
2924       return SelectVLDSTLane(N, true, false, 2, DOpcodes, QOpcodes);
2925     }
2926
2927     case Intrinsic::arm_neon_vld3lane: {
2928       unsigned DOpcodes[] = { ARM::VLD3LNd8Pseudo, ARM::VLD3LNd16Pseudo,
2929                               ARM::VLD3LNd32Pseudo };
2930       unsigned QOpcodes[] = { ARM::VLD3LNq16Pseudo, ARM::VLD3LNq32Pseudo };
2931       return SelectVLDSTLane(N, true, false, 3, DOpcodes, QOpcodes);
2932     }
2933
2934     case Intrinsic::arm_neon_vld4lane: {
2935       unsigned DOpcodes[] = { ARM::VLD4LNd8Pseudo, ARM::VLD4LNd16Pseudo,
2936                               ARM::VLD4LNd32Pseudo };
2937       unsigned QOpcodes[] = { ARM::VLD4LNq16Pseudo, ARM::VLD4LNq32Pseudo };
2938       return SelectVLDSTLane(N, true, false, 4, DOpcodes, QOpcodes);
2939     }
2940
2941     case Intrinsic::arm_neon_vst1: {
2942       unsigned DOpcodes[] = { ARM::VST1d8, ARM::VST1d16,
2943                               ARM::VST1d32, ARM::VST1d64 };
2944       unsigned QOpcodes[] = { ARM::VST1q8Pseudo, ARM::VST1q16Pseudo,
2945                               ARM::VST1q32Pseudo, ARM::VST1q64Pseudo };
2946       return SelectVST(N, false, 1, DOpcodes, QOpcodes, 0);
2947     }
2948
2949     case Intrinsic::arm_neon_vst2: {
2950       unsigned DOpcodes[] = { ARM::VST2d8Pseudo, ARM::VST2d16Pseudo,
2951                               ARM::VST2d32Pseudo, ARM::VST1q64Pseudo };
2952       unsigned QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo,
2953                               ARM::VST2q32Pseudo };
2954       return SelectVST(N, false, 2, DOpcodes, QOpcodes, 0);
2955     }
2956
2957     case Intrinsic::arm_neon_vst3: {
2958       unsigned DOpcodes[] = { ARM::VST3d8Pseudo, ARM::VST3d16Pseudo,
2959                               ARM::VST3d32Pseudo, ARM::VST1d64TPseudo };
2960       unsigned QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
2961                                ARM::VST3q16Pseudo_UPD,
2962                                ARM::VST3q32Pseudo_UPD };
2963       unsigned QOpcodes1[] = { ARM::VST3q8oddPseudo,
2964                                ARM::VST3q16oddPseudo,
2965                                ARM::VST3q32oddPseudo };
2966       return SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
2967     }
2968
2969     case Intrinsic::arm_neon_vst4: {
2970       unsigned DOpcodes[] = { ARM::VST4d8Pseudo, ARM::VST4d16Pseudo,
2971                               ARM::VST4d32Pseudo, ARM::VST1d64QPseudo };
2972       unsigned QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
2973                                ARM::VST4q16Pseudo_UPD,
2974                                ARM::VST4q32Pseudo_UPD };
2975       unsigned QOpcodes1[] = { ARM::VST4q8oddPseudo,
2976                                ARM::VST4q16oddPseudo,
2977                                ARM::VST4q32oddPseudo };
2978       return SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
2979     }
2980
2981     case Intrinsic::arm_neon_vst2lane: {
2982       unsigned DOpcodes[] = { ARM::VST2LNd8Pseudo, ARM::VST2LNd16Pseudo,
2983                               ARM::VST2LNd32Pseudo };
2984       unsigned QOpcodes[] = { ARM::VST2LNq16Pseudo, ARM::VST2LNq32Pseudo };
2985       return SelectVLDSTLane(N, false, false, 2, DOpcodes, QOpcodes);
2986     }
2987
2988     case Intrinsic::arm_neon_vst3lane: {
2989       unsigned DOpcodes[] = { ARM::VST3LNd8Pseudo, ARM::VST3LNd16Pseudo,
2990                               ARM::VST3LNd32Pseudo };
2991       unsigned QOpcodes[] = { ARM::VST3LNq16Pseudo, ARM::VST3LNq32Pseudo };
2992       return SelectVLDSTLane(N, false, false, 3, DOpcodes, QOpcodes);
2993     }
2994
2995     case Intrinsic::arm_neon_vst4lane: {
2996       unsigned DOpcodes[] = { ARM::VST4LNd8Pseudo, ARM::VST4LNd16Pseudo,
2997                               ARM::VST4LNd32Pseudo };
2998       unsigned QOpcodes[] = { ARM::VST4LNq16Pseudo, ARM::VST4LNq32Pseudo };
2999       return SelectVLDSTLane(N, false, false, 4, DOpcodes, QOpcodes);
3000     }
3001     }
3002     break;
3003   }
3004
3005   case ISD::INTRINSIC_WO_CHAIN: {
3006     unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
3007     switch (IntNo) {
3008     default:
3009       break;
3010
3011     case Intrinsic::arm_neon_vtbl2:
3012       return SelectVTBL(N, false, 2, ARM::VTBL2Pseudo);
3013     case Intrinsic::arm_neon_vtbl3:
3014       return SelectVTBL(N, false, 3, ARM::VTBL3Pseudo);
3015     case Intrinsic::arm_neon_vtbl4:
3016       return SelectVTBL(N, false, 4, ARM::VTBL4Pseudo);
3017
3018     case Intrinsic::arm_neon_vtbx2:
3019       return SelectVTBL(N, true, 2, ARM::VTBX2Pseudo);
3020     case Intrinsic::arm_neon_vtbx3:
3021       return SelectVTBL(N, true, 3, ARM::VTBX3Pseudo);
3022     case Intrinsic::arm_neon_vtbx4:
3023       return SelectVTBL(N, true, 4, ARM::VTBX4Pseudo);
3024     }
3025     break;
3026   }
3027
3028   case ARMISD::VTBL1: {
3029     DebugLoc dl = N->getDebugLoc();
3030     EVT VT = N->getValueType(0);
3031     SmallVector<SDValue, 6> Ops;
3032
3033     Ops.push_back(N->getOperand(0));
3034     Ops.push_back(N->getOperand(1));
3035     Ops.push_back(getAL(CurDAG));                    // Predicate
3036     Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // Predicate Register
3037     return CurDAG->getMachineNode(ARM::VTBL1, dl, VT, Ops.data(), Ops.size());
3038   }
3039   case ARMISD::VTBL2: {
3040     DebugLoc dl = N->getDebugLoc();
3041     EVT VT = N->getValueType(0);
3042
3043     // Form a REG_SEQUENCE to force register allocation.
3044     SDValue V0 = N->getOperand(0);
3045     SDValue V1 = N->getOperand(1);
3046     SDValue RegSeq = SDValue(PairDRegs(MVT::v16i8, V0, V1), 0);
3047
3048     SmallVector<SDValue, 6> Ops;
3049     Ops.push_back(RegSeq);
3050     Ops.push_back(N->getOperand(2));
3051     Ops.push_back(getAL(CurDAG));                    // Predicate
3052     Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // Predicate Register
3053     return CurDAG->getMachineNode(ARM::VTBL2Pseudo, dl, VT,
3054                                   Ops.data(), Ops.size());
3055   }
3056
3057   case ISD::CONCAT_VECTORS:
3058     return SelectConcatVector(N);
3059   }
3060
3061   return SelectCode(N);
3062 }
3063
3064 bool ARMDAGToDAGISel::
3065 SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
3066                              std::vector<SDValue> &OutOps) {
3067   assert(ConstraintCode == 'm' && "unexpected asm memory constraint");
3068   // Require the address to be in a register.  That is safe for all ARM
3069   // variants and it is hard to do anything much smarter without knowing
3070   // how the operand is used.
3071   OutOps.push_back(Op);
3072   return false;
3073 }
3074
3075 /// createARMISelDag - This pass converts a legalized DAG into a
3076 /// ARM-specific DAG, ready for instruction scheduling.
3077 ///
3078 FunctionPass *llvm::createARMISelDag(ARMBaseTargetMachine &TM,
3079                                      CodeGenOpt::Level OptLevel) {
3080   return new ARMDAGToDAGISel(TM, OptLevel);
3081 }