lib/Target/AArch64/AArch64ISelDAGToDAG.cpp

   1 //===-- AArch64ISelDAGToDAG.cpp - A dag to dag inst selector for AArch64 --===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file defines an instruction selector for the AArch64 target.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #define DEBUG_TYPE "aarch64-isel"
  15 #include "AArch64.h"
  16 #include "AArch64InstrInfo.h"
  17 #include "AArch64Subtarget.h"
  18 #include "AArch64TargetMachine.h"
  19 #include "Utils/AArch64BaseInfo.h"
  20 #include "llvm/ADT/APSInt.h"
  21 #include "llvm/CodeGen/SelectionDAGISel.h"
  22 #include "llvm/IR/GlobalValue.h"
  23 #include "llvm/Support/Debug.h"
  24 #include "llvm/Support/raw_ostream.h"
  25
  26 using namespace llvm;
  27
  28 //===--------------------------------------------------------------------===//
  29 /// AArch64 specific code to select AArch64 machine instructions for
  30 /// SelectionDAG operations.
  31 ///
  32 namespace {
  33
  34 class AArch64DAGToDAGISel : public SelectionDAGISel {
  35   AArch64TargetMachine &TM;
  36
  37   /// Keep a pointer to the AArch64Subtarget around so that we can
  38   /// make the right decision when generating code for different targets.
  39   const AArch64Subtarget *Subtarget;
  40
  41 public:
  42   explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm,
  43                                CodeGenOpt::Level OptLevel)
  44     : SelectionDAGISel(tm, OptLevel), TM(tm),
  45       Subtarget(&TM.getSubtarget<AArch64Subtarget>()) {
  46   }
  47
  48   virtual const char *getPassName() const {
  49     return "AArch64 Instruction Selection";
  50   }
  51
  52   // Include the pieces autogenerated from the target description.
  53 #include "AArch64GenDAGISel.inc"
  54
  55   template<unsigned MemSize>
  56   bool SelectOffsetUImm12(SDValue N, SDValue &UImm12) {
  57     const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);
  58     if (!CN || CN->getZExtValue() % MemSize != 0
  59         || CN->getZExtValue() / MemSize > 0xfff)
  60       return false;
  61
  62     UImm12 =  CurDAG->getTargetConstant(CN->getZExtValue() / MemSize, MVT::i64);
  63     return true;
  64   }
  65
  66   template<unsigned RegWidth>
  67   bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos) {
  68     return SelectCVTFixedPosOperand(N, FixedPos, RegWidth);
  69   }
  70
  71   /// Used for pre-lowered address-reference nodes, so we already know
  72   /// the fields match. This operand's job is simply to add an
  73   /// appropriate shift operand to the MOVZ/MOVK instruction.
  74   template<unsigned LogShift>
  75   bool SelectMOVWAddressRef(SDValue N, SDValue &Imm, SDValue &Shift) {
  76     Imm = N;
  77     Shift = CurDAG->getTargetConstant(LogShift, MVT::i32);
  78     return true;
  79   }
  80
  81   bool SelectFPZeroOperand(SDValue N, SDValue &Dummy);
  82
  83   bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
  84                                 unsigned RegWidth);
  85
  86   bool SelectInlineAsmMemoryOperand(const SDValue &Op,
  87                                     char ConstraintCode,
  88                                     std::vector<SDValue> &OutOps);
  89
  90   bool SelectLogicalImm(SDValue N, SDValue &Imm);
  91
  92   template<unsigned RegWidth>
  93   bool SelectTSTBOperand(SDValue N, SDValue &FixedPos) {
  94     return SelectTSTBOperand(N, FixedPos, RegWidth);
  95   }
  96
  97   bool SelectTSTBOperand(SDValue N, SDValue &FixedPos, unsigned RegWidth);
  98
  99   SDNode *SelectAtomic(SDNode *N, unsigned Op8, unsigned Op16, unsigned Op32,
 100                        unsigned Op64);
 101
 102   /// Put the given constant into a pool and return a DAG which will give its
 103   /// address.
 104   SDValue getConstantPoolItemAddress(SDLoc DL, const Constant *CV);
 105
 106   SDNode *TrySelectToMoveImm(SDNode *N);
 107   SDNode *LowerToFPLitPool(SDNode *Node);
 108   SDNode *SelectToLitPool(SDNode *N);
 109
 110   SDNode* Select(SDNode*);
 111 private:
 112   /// Select NEON load intrinsics.  NumVecs should be 1, 2, 3 or 4.
 113   SDNode *SelectVLD(SDNode *N, unsigned NumVecs, const uint16_t *Opcode);
 114
 115   /// Select NEON store intrinsics.  NumVecs should be 1, 2, 3 or 4.
 116   SDNode *SelectVST(SDNode *N, unsigned NumVecs, const uint16_t *Opcodes);
 117
 118   // Form pairs of consecutive 64-bit/128-bit registers.
 119   SDNode *createDPairNode(SDValue V0, SDValue V1);
 120   SDNode *createQPairNode(SDValue V0, SDValue V1);
 121
 122   // Form sequences of 3 consecutive 64-bit/128-bit registers.
 123   SDNode *createDTripleNode(SDValue V0, SDValue V1, SDValue V2);
 124   SDNode *createQTripleNode(SDValue V0, SDValue V1, SDValue V2);
 125
 126   // Form sequences of 4 consecutive 64-bit/128-bit registers.
 127   SDNode *createDQuadNode(SDValue V0, SDValue V1, SDValue V2, SDValue V3);
 128   SDNode *createQQuadNode(SDValue V0, SDValue V1, SDValue V2, SDValue V3);
 129 };
 130 }
 131
 132 bool
 133 AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
 134                                               unsigned RegWidth) {
 135   const ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N);
 136   if (!CN) return false;
 137
 138   // An FCVT[SU] instruction performs: convertToInt(Val * 2^fbits) where fbits
 139   // is between 1 and 32 for a destination w-register, or 1 and 64 for an
 140   // x-register.
 141   //
 142   // By this stage, we've detected (fp_to_[su]int (fmul Val, THIS_NODE)) so we
 143   // want THIS_NODE to be 2^fbits. This is much easier to deal with using
 144   // integers.
 145   bool IsExact;
 146
 147   // fbits is between 1 and 64 in the worst-case, which means the fmul
 148   // could have 2^64 as an actual operand. Need 65 bits of precision.
 149   APSInt IntVal(65, true);
 150   CN->getValueAPF().convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact);
 151
 152   // N.b. isPowerOf2 also checks for > 0.
 153   if (!IsExact || !IntVal.isPowerOf2()) return false;
 154   unsigned FBits = IntVal.logBase2();
 155
 156   // Checks above should have guaranteed that we haven't lost information in
 157   // finding FBits, but it must still be in range.
 158   if (FBits == 0 || FBits > RegWidth) return false;
 159
 160   FixedPos = CurDAG->getTargetConstant(64 - FBits, MVT::i32);
 161   return true;
 162 }
 163
 164 bool
 165 AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand(const SDValue &Op,
 166                                                  char ConstraintCode,
 167                                                  std::vector<SDValue> &OutOps) {
 168   switch (ConstraintCode) {
 169   default: llvm_unreachable("Unrecognised AArch64 memory constraint");
 170   case 'm':
 171     // FIXME: more freedom is actually permitted for 'm'. We can go
 172     // hunting for a base and an offset if we want. Of course, since
 173     // we don't really know how the operand is going to be used we're
 174     // probably restricted to the load/store pair's simm7 as an offset
 175     // range anyway.
 176   case 'Q':
 177     OutOps.push_back(Op);
 178   }
 179
 180   return false;
 181 }
 182
 183 bool
 184 AArch64DAGToDAGISel::SelectFPZeroOperand(SDValue N, SDValue &Dummy) {
 185   ConstantFPSDNode *Imm = dyn_cast<ConstantFPSDNode>(N);
 186   if (!Imm || !Imm->getValueAPF().isPosZero())
 187     return false;
 188
 189   // Doesn't actually carry any information, but keeps TableGen quiet.
 190   Dummy = CurDAG->getTargetConstant(0, MVT::i32);
 191   return true;
 192 }
 193
 194 bool AArch64DAGToDAGISel::SelectLogicalImm(SDValue N, SDValue &Imm) {
 195   uint32_t Bits;
 196   uint32_t RegWidth = N.getValueType().getSizeInBits();
 197
 198   ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);
 199   if (!CN) return false;
 200
 201   if (!A64Imms::isLogicalImm(RegWidth, CN->getZExtValue(), Bits))
 202     return false;
 203
 204   Imm = CurDAG->getTargetConstant(Bits, MVT::i32);
 205   return true;
 206 }
 207
 208 SDNode *AArch64DAGToDAGISel::TrySelectToMoveImm(SDNode *Node) {
 209   SDNode *ResNode;
 210   SDLoc dl(Node);
 211   EVT DestType = Node->getValueType(0);
 212   unsigned DestWidth = DestType.getSizeInBits();
 213
 214   unsigned MOVOpcode;
 215   EVT MOVType;
 216   int UImm16, Shift;
 217   uint32_t LogicalBits;
 218
 219   uint64_t BitPat = cast<ConstantSDNode>(Node)->getZExtValue();
 220   if (A64Imms::isMOVZImm(DestWidth, BitPat, UImm16, Shift)) {
 221     MOVType = DestType;
 222     MOVOpcode = DestWidth == 64 ? AArch64::MOVZxii : AArch64::MOVZwii;
 223   } else if (A64Imms::isMOVNImm(DestWidth, BitPat, UImm16, Shift)) {
 224     MOVType = DestType;
 225     MOVOpcode = DestWidth == 64 ? AArch64::MOVNxii : AArch64::MOVNwii;
 226   } else if (DestWidth == 64 && A64Imms::isMOVNImm(32, BitPat, UImm16, Shift)) {
 227     // To get something like 0x0000_0000_ffff_1234 into a 64-bit register we can
 228     // use a 32-bit instruction: "movn w0, 0xedbc".
 229     MOVType = MVT::i32;
 230     MOVOpcode = AArch64::MOVNwii;
 231   } else if (A64Imms::isLogicalImm(DestWidth, BitPat, LogicalBits))  {
 232     MOVOpcode = DestWidth == 64 ? AArch64::ORRxxi : AArch64::ORRwwi;
 233     uint16_t ZR = DestWidth == 64 ? AArch64::XZR : AArch64::WZR;
 234
 235     return CurDAG->getMachineNode(MOVOpcode, dl, DestType,
 236                               CurDAG->getRegister(ZR, DestType),
 237                               CurDAG->getTargetConstant(LogicalBits, MVT::i32));
 238   } else {
 239     // Can't handle it in one instruction. There's scope for permitting two (or
 240     // more) instructions, but that'll need more thought.
 241     return NULL;
 242   }
 243
 244   ResNode = CurDAG->getMachineNode(MOVOpcode, dl, MOVType,
 245                                    CurDAG->getTargetConstant(UImm16, MVT::i32),
 246                                    CurDAG->getTargetConstant(Shift, MVT::i32));
 247
 248   if (MOVType != DestType) {
 249     ResNode = CurDAG->getMachineNode(TargetOpcode::SUBREG_TO_REG, dl,
 250                           MVT::i64, MVT::i32, MVT::Other,
 251                           CurDAG->getTargetConstant(0, MVT::i64),
 252                           SDValue(ResNode, 0),
 253                           CurDAG->getTargetConstant(AArch64::sub_32, MVT::i32));
 254   }
 255
 256   return ResNode;
 257 }
 258
 259 SDValue
 260 AArch64DAGToDAGISel::getConstantPoolItemAddress(SDLoc DL,
 261                                                 const Constant *CV) {
 262   EVT PtrVT = getTargetLowering()->getPointerTy();
 263
 264   switch (getTargetLowering()->getTargetMachine().getCodeModel()) {
 265   case CodeModel::Small: {
 266     unsigned Alignment =
 267       getTargetLowering()->getDataLayout()->getABITypeAlignment(CV->getType());
 268     return CurDAG->getNode(
 269         AArch64ISD::WrapperSmall, DL, PtrVT,
 270         CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0, AArch64II::MO_NO_FLAG),
 271         CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0, AArch64II::MO_LO12),
 272         CurDAG->getConstant(Alignment, MVT::i32));
 273   }
 274   case CodeModel::Large: {
 275     SDNode *LitAddr;
 276     LitAddr = CurDAG->getMachineNode(
 277         AArch64::MOVZxii, DL, PtrVT,
 278         CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0, AArch64II::MO_ABS_G3),
 279         CurDAG->getTargetConstant(3, MVT::i32));
 280     LitAddr = CurDAG->getMachineNode(
 281         AArch64::MOVKxii, DL, PtrVT, SDValue(LitAddr, 0),
 282         CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0, AArch64II::MO_ABS_G2_NC),
 283         CurDAG->getTargetConstant(2, MVT::i32));
 284     LitAddr = CurDAG->getMachineNode(
 285         AArch64::MOVKxii, DL, PtrVT, SDValue(LitAddr, 0),
 286         CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0, AArch64II::MO_ABS_G1_NC),
 287         CurDAG->getTargetConstant(1, MVT::i32));
 288     LitAddr = CurDAG->getMachineNode(
 289         AArch64::MOVKxii, DL, PtrVT, SDValue(LitAddr, 0),
 290         CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0, AArch64II::MO_ABS_G0_NC),
 291         CurDAG->getTargetConstant(0, MVT::i32));
 292     return SDValue(LitAddr, 0);
 293   }
 294   default:
 295     llvm_unreachable("Only small and large code models supported now");
 296   }
 297 }
 298
 299 SDNode *AArch64DAGToDAGISel::SelectToLitPool(SDNode *Node) {
 300   SDLoc DL(Node);
 301   uint64_t UnsignedVal = cast<ConstantSDNode>(Node)->getZExtValue();
 302   int64_t SignedVal = cast<ConstantSDNode>(Node)->getSExtValue();
 303   EVT DestType = Node->getValueType(0);
 304
 305   // Since we may end up loading a 64-bit constant from a 32-bit entry the
 306   // constant in the pool may have a different type to the eventual node.
 307   ISD::LoadExtType Extension;
 308   EVT MemType;
 309
 310   assert((DestType == MVT::i64 || DestType == MVT::i32)
 311          && "Only expect integer constants at the moment");
 312
 313   if (DestType == MVT::i32) {
 314     Extension = ISD::NON_EXTLOAD;
 315     MemType = MVT::i32;
 316   } else if (UnsignedVal <= UINT32_MAX) {
 317     Extension = ISD::ZEXTLOAD;
 318     MemType = MVT::i32;
 319   } else if (SignedVal >= INT32_MIN && SignedVal <= INT32_MAX) {
 320     Extension = ISD::SEXTLOAD;
 321     MemType = MVT::i32;
 322   } else {
 323     Extension = ISD::NON_EXTLOAD;
 324     MemType = MVT::i64;
 325   }
 326
 327   Constant *CV = ConstantInt::get(Type::getIntNTy(*CurDAG->getContext(),
 328                                                   MemType.getSizeInBits()),
 329                                   UnsignedVal);
 330   SDValue PoolAddr = getConstantPoolItemAddress(DL, CV);
 331   unsigned Alignment =
 332     getTargetLowering()->getDataLayout()->getABITypeAlignment(CV->getType());
 333
 334   return CurDAG->getExtLoad(Extension, DL, DestType, CurDAG->getEntryNode(),
 335                             PoolAddr,
 336                             MachinePointerInfo::getConstantPool(), MemType,
 337                             /* isVolatile = */ false,
 338                             /* isNonTemporal = */ false,
 339                             Alignment).getNode();
 340 }
 341
 342 SDNode *AArch64DAGToDAGISel::LowerToFPLitPool(SDNode *Node) {
 343   SDLoc DL(Node);
 344   const ConstantFP *FV = cast<ConstantFPSDNode>(Node)->getConstantFPValue();
 345   EVT DestType = Node->getValueType(0);
 346
 347   unsigned Alignment =
 348     getTargetLowering()->getDataLayout()->getABITypeAlignment(FV->getType());
 349   SDValue PoolAddr = getConstantPoolItemAddress(DL, FV);
 350
 351   return CurDAG->getLoad(DestType, DL, CurDAG->getEntryNode(), PoolAddr,
 352                          MachinePointerInfo::getConstantPool(),
 353                          /* isVolatile = */ false,
 354                          /* isNonTemporal = */ false,
 355                          /* isInvariant = */ true,
 356                          Alignment).getNode();
 357 }
 358
 359 bool
 360 AArch64DAGToDAGISel::SelectTSTBOperand(SDValue N, SDValue &FixedPos,
 361                                        unsigned RegWidth) {
 362   const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);
 363   if (!CN) return false;
 364
 365   uint64_t Val = CN->getZExtValue();
 366
 367   if (!isPowerOf2_64(Val)) return false;
 368
 369   unsigned TestedBit = Log2_64(Val);
 370   // Checks above should have guaranteed that we haven't lost information in
 371   // finding TestedBit, but it must still be in range.
 372   if (TestedBit >= RegWidth) return false;
 373
 374   FixedPos = CurDAG->getTargetConstant(TestedBit, MVT::i64);
 375   return true;
 376 }
 377
 378 SDNode *AArch64DAGToDAGISel::SelectAtomic(SDNode *Node, unsigned Op8,
 379                                           unsigned Op16,unsigned Op32,
 380                                           unsigned Op64) {
 381   // Mostly direct translation to the given operations, except that we preserve
 382   // the AtomicOrdering for use later on.
 383   AtomicSDNode *AN = cast<AtomicSDNode>(Node);
 384   EVT VT = AN->getMemoryVT();
 385
 386   unsigned Op;
 387   if (VT == MVT::i8)
 388     Op = Op8;
 389   else if (VT == MVT::i16)
 390     Op = Op16;
 391   else if (VT == MVT::i32)
 392     Op = Op32;
 393   else if (VT == MVT::i64)
 394     Op = Op64;
 395   else
 396     llvm_unreachable("Unexpected atomic operation");
 397
 398   SmallVector<SDValue, 4> Ops;
 399   for (unsigned i = 1; i < AN->getNumOperands(); ++i)
 400       Ops.push_back(AN->getOperand(i));
 401
 402   Ops.push_back(CurDAG->getTargetConstant(AN->getOrdering(), MVT::i32));
 403   Ops.push_back(AN->getOperand(0)); // Chain moves to the end
 404
 405   return CurDAG->SelectNodeTo(Node, Op,
 406                               AN->getValueType(0), MVT::Other,
 407                               &Ops[0], Ops.size());
 408 }
 409
 410 SDNode *AArch64DAGToDAGISel::createDPairNode(SDValue V0, SDValue V1) {
 411   SDLoc dl(V0.getNode());
 412   SDValue RegClass =
 413       CurDAG->getTargetConstant(AArch64::DPairRegClassID, MVT::i32);
 414   SDValue SubReg0 = CurDAG->getTargetConstant(AArch64::dsub_0, MVT::i32);
 415   SDValue SubReg1 = CurDAG->getTargetConstant(AArch64::dsub_1, MVT::i32);
 416   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
 417   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, MVT::v2i64,
 418                                 Ops);
 419 }
 420
 421 SDNode *AArch64DAGToDAGISel::createQPairNode(SDValue V0, SDValue V1) {
 422   SDLoc dl(V0.getNode());
 423   SDValue RegClass =
 424       CurDAG->getTargetConstant(AArch64::QPairRegClassID, MVT::i32);
 425   SDValue SubReg0 = CurDAG->getTargetConstant(AArch64::qsub_0, MVT::i32);
 426   SDValue SubReg1 = CurDAG->getTargetConstant(AArch64::qsub_1, MVT::i32);
 427   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
 428   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, MVT::v4i64,
 429                                 Ops);
 430 }
 431
 432 SDNode *AArch64DAGToDAGISel::createDTripleNode(SDValue V0, SDValue V1,
 433                                                SDValue V2) {
 434   SDLoc dl(V0.getNode());
 435   SDValue RegClass =
 436       CurDAG->getTargetConstant(AArch64::DTripleRegClassID, MVT::i32);
 437   SDValue SubReg0 = CurDAG->getTargetConstant(AArch64::dsub_0, MVT::i32);
 438   SDValue SubReg1 = CurDAG->getTargetConstant(AArch64::dsub_1, MVT::i32);
 439   SDValue SubReg2 = CurDAG->getTargetConstant(AArch64::dsub_2, MVT::i32);
 440   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1, V2, SubReg2 };
 441   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, MVT::Untyped,
 442                                 Ops);
 443 }
 444
 445 SDNode *AArch64DAGToDAGISel::createQTripleNode(SDValue V0, SDValue V1,
 446                                                SDValue V2) {
 447   SDLoc dl(V0.getNode());
 448   SDValue RegClass =
 449       CurDAG->getTargetConstant(AArch64::QTripleRegClassID, MVT::i32);
 450   SDValue SubReg0 = CurDAG->getTargetConstant(AArch64::qsub_0, MVT::i32);
 451   SDValue SubReg1 = CurDAG->getTargetConstant(AArch64::qsub_1, MVT::i32);
 452   SDValue SubReg2 = CurDAG->getTargetConstant(AArch64::qsub_2, MVT::i32);
 453   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1, V2, SubReg2 };
 454   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, MVT::Untyped,
 455                                 Ops);
 456 }
 457
 458 SDNode *AArch64DAGToDAGISel::createDQuadNode(SDValue V0, SDValue V1, SDValue V2,
 459                                              SDValue V3) {
 460   SDLoc dl(V0.getNode());
 461   SDValue RegClass =
 462       CurDAG->getTargetConstant(AArch64::DQuadRegClassID, MVT::i32);
 463   SDValue SubReg0 = CurDAG->getTargetConstant(AArch64::dsub_0, MVT::i32);
 464   SDValue SubReg1 = CurDAG->getTargetConstant(AArch64::dsub_1, MVT::i32);
 465   SDValue SubReg2 = CurDAG->getTargetConstant(AArch64::dsub_2, MVT::i32);
 466   SDValue SubReg3 = CurDAG->getTargetConstant(AArch64::dsub_3, MVT::i32);
 467   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1, V2, SubReg2,  V3,
 468                           SubReg3 };
 469   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, MVT::v4i64,
 470                                 Ops);
 471 }
 472
 473 SDNode *AArch64DAGToDAGISel::createQQuadNode(SDValue V0, SDValue V1, SDValue V2,
 474                                              SDValue V3) {
 475   SDLoc dl(V0.getNode());
 476   SDValue RegClass =
 477       CurDAG->getTargetConstant(AArch64::QQuadRegClassID, MVT::i32);
 478   SDValue SubReg0 = CurDAG->getTargetConstant(AArch64::qsub_0, MVT::i32);
 479   SDValue SubReg1 = CurDAG->getTargetConstant(AArch64::qsub_1, MVT::i32);
 480   SDValue SubReg2 = CurDAG->getTargetConstant(AArch64::qsub_2, MVT::i32);
 481   SDValue SubReg3 = CurDAG->getTargetConstant(AArch64::qsub_3, MVT::i32);
 482   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1, V2, SubReg2,  V3,
 483                           SubReg3 };
 484   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, MVT::v8i64,
 485                                 Ops);
 486 }
 487
 488 SDNode *AArch64DAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs,
 489                                        const uint16_t *Opcodes) {
 490   assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range");
 491
 492   EVT VT = N->getValueType(0);
 493   unsigned OpcodeIndex;
 494   switch (VT.getSimpleVT().SimpleTy) {
 495   default: llvm_unreachable("unhandled vector load type");
 496   case MVT::v8i8:  OpcodeIndex = 0; break;
 497   case MVT::v4i16: OpcodeIndex = 1; break;
 498   case MVT::v2f32:
 499   case MVT::v2i32: OpcodeIndex = 2; break;
 500   case MVT::v1f64:
 501   case MVT::v1i64: OpcodeIndex = 3; break;
 502   case MVT::v16i8: OpcodeIndex = 4; break;
 503   case MVT::v8f16:
 504   case MVT::v8i16: OpcodeIndex = 5; break;
 505   case MVT::v4f32:
 506   case MVT::v4i32: OpcodeIndex = 6; break;
 507   case MVT::v2f64:
 508   case MVT::v2i64: OpcodeIndex = 7; break;
 509   }
 510   unsigned Opc = Opcodes[OpcodeIndex];
 511
 512   SmallVector<SDValue, 2> Ops;
 513   Ops.push_back(N->getOperand(2)); // Push back the Memory Address
 514   Ops.push_back(N->getOperand(0)); // Push back the Chain
 515
 516   std::vector<EVT> ResTys;
 517   bool is64BitVector = VT.is64BitVector();
 518
 519   if (NumVecs == 1)
 520     ResTys.push_back(VT);
 521   else if (NumVecs == 3)
 522     ResTys.push_back(MVT::Untyped);
 523   else {
 524     EVT ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64,
 525                                  is64BitVector ? NumVecs : NumVecs * 2);
 526     ResTys.push_back(ResTy);
 527   }
 528
 529   ResTys.push_back(MVT::Other); // Type of the Chain
 530   SDLoc dl(N);
 531   SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
 532
 533   // Transfer memoperands.
 534   MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
 535   MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
 536   cast<MachineSDNode>(VLd)->setMemRefs(MemOp, MemOp + 1);
 537
 538   if (NumVecs == 1)
 539     return VLd;
 540
 541   // If NumVecs > 1, the return result is a super register containing 2-4
 542   // consecutive vector registers.
 543   SDValue SuperReg = SDValue(VLd, 0);
 544
 545   unsigned Sub0 = is64BitVector ? AArch64::dsub_0 : AArch64::qsub_0;
 546   for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
 547     ReplaceUses(SDValue(N, Vec),
 548                 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
 549   // Update users of the Chain
 550   ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1));
 551
 552   return NULL;
 553 }
 554
 555 SDNode *AArch64DAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs,
 556                                        const uint16_t *Opcodes) {
 557   assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range");
 558   SDLoc dl(N);
 559
 560   MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
 561   MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
 562
 563   unsigned Vec0Idx = 3;
 564   EVT VT = N->getOperand(Vec0Idx).getValueType();
 565   unsigned OpcodeIndex;
 566   switch (VT.getSimpleVT().SimpleTy) {
 567   default: llvm_unreachable("unhandled vector store type");
 568   case MVT::v8i8:  OpcodeIndex = 0; break;
 569   case MVT::v4i16: OpcodeIndex = 1; break;
 570   case MVT::v2f32:
 571   case MVT::v2i32: OpcodeIndex = 2; break;
 572   case MVT::v1f64:
 573   case MVT::v1i64: OpcodeIndex = 3; break;
 574   case MVT::v16i8: OpcodeIndex = 4; break;
 575   case MVT::v8f16:
 576   case MVT::v8i16: OpcodeIndex = 5; break;
 577   case MVT::v4f32:
 578   case MVT::v4i32: OpcodeIndex = 6; break;
 579   case MVT::v2f64:
 580   case MVT::v2i64: OpcodeIndex = 7; break;
 581   }
 582   unsigned Opc = Opcodes[OpcodeIndex];
 583
 584   std::vector<EVT> ResTys;
 585   ResTys.push_back(MVT::Other); // Type for the Chain
 586
 587   SmallVector<SDValue, 6> Ops;
 588   Ops.push_back(N->getOperand(2)); // Push back the Memory Address
 589
 590   bool is64BitVector = VT.is64BitVector();
 591
 592   SDValue V0 = N->getOperand(Vec0Idx + 0);
 593   SDValue SrcReg;
 594   if (NumVecs == 1)
 595     SrcReg = V0;
 596   else {
 597     SDValue V1 = N->getOperand(Vec0Idx + 1);
 598     if (NumVecs == 2)
 599       SrcReg = is64BitVector ? SDValue(createDPairNode(V0, V1), 0)
 600                              : SDValue(createQPairNode(V0, V1), 0);
 601     else {
 602       SDValue V2 = N->getOperand(Vec0Idx + 2);
 603       if (NumVecs == 3)
 604         SrcReg = is64BitVector ? SDValue(createDTripleNode(V0, V1, V2), 0)
 605                                : SDValue(createQTripleNode(V0, V1, V2), 0);
 606       else {
 607         SDValue V3 = N->getOperand(Vec0Idx + 3);
 608         SrcReg = is64BitVector ? SDValue(createDQuadNode(V0, V1, V2, V3), 0)
 609                                : SDValue(createQQuadNode(V0, V1, V2, V3), 0);
 610       }
 611     }
 612   }
 613   Ops.push_back(SrcReg);
 614
 615   // Push back the Chain
 616   Ops.push_back(N->getOperand(0));
 617
 618   // Transfer memoperands.
 619   SDNode *VSt = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
 620   cast<MachineSDNode>(VSt)->setMemRefs(MemOp, MemOp + 1);
 621
 622   return VSt;
 623 }
 624
 625 SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
 626   // Dump information about the Node being selected
 627   DEBUG(dbgs() << "Selecting: "; Node->dump(CurDAG); dbgs() << "\n");
 628
 629   if (Node->isMachineOpcode()) {
 630     DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << "\n");
 631     Node->setNodeId(-1);
 632     return NULL;
 633   }
 634
 635   switch (Node->getOpcode()) {
 636   case ISD::ATOMIC_LOAD_ADD:
 637     return SelectAtomic(Node,
 638                         AArch64::ATOMIC_LOAD_ADD_I8,
 639                         AArch64::ATOMIC_LOAD_ADD_I16,
 640                         AArch64::ATOMIC_LOAD_ADD_I32,
 641                         AArch64::ATOMIC_LOAD_ADD_I64);
 642   case ISD::ATOMIC_LOAD_SUB:
 643     return SelectAtomic(Node,
 644                         AArch64::ATOMIC_LOAD_SUB_I8,
 645                         AArch64::ATOMIC_LOAD_SUB_I16,
 646                         AArch64::ATOMIC_LOAD_SUB_I32,
 647                         AArch64::ATOMIC_LOAD_SUB_I64);
 648   case ISD::ATOMIC_LOAD_AND:
 649     return SelectAtomic(Node,
 650                         AArch64::ATOMIC_LOAD_AND_I8,
 651                         AArch64::ATOMIC_LOAD_AND_I16,
 652                         AArch64::ATOMIC_LOAD_AND_I32,
 653                         AArch64::ATOMIC_LOAD_AND_I64);
 654   case ISD::ATOMIC_LOAD_OR:
 655     return SelectAtomic(Node,
 656                         AArch64::ATOMIC_LOAD_OR_I8,
 657                         AArch64::ATOMIC_LOAD_OR_I16,
 658                         AArch64::ATOMIC_LOAD_OR_I32,
 659                         AArch64::ATOMIC_LOAD_OR_I64);
 660   case ISD::ATOMIC_LOAD_XOR:
 661     return SelectAtomic(Node,
 662                         AArch64::ATOMIC_LOAD_XOR_I8,
 663                         AArch64::ATOMIC_LOAD_XOR_I16,
 664                         AArch64::ATOMIC_LOAD_XOR_I32,
 665                         AArch64::ATOMIC_LOAD_XOR_I64);
 666   case ISD::ATOMIC_LOAD_NAND:
 667     return SelectAtomic(Node,
 668                         AArch64::ATOMIC_LOAD_NAND_I8,
 669                         AArch64::ATOMIC_LOAD_NAND_I16,
 670                         AArch64::ATOMIC_LOAD_NAND_I32,
 671                         AArch64::ATOMIC_LOAD_NAND_I64);
 672   case ISD::ATOMIC_LOAD_MIN:
 673     return SelectAtomic(Node,
 674                         AArch64::ATOMIC_LOAD_MIN_I8,
 675                         AArch64::ATOMIC_LOAD_MIN_I16,
 676                         AArch64::ATOMIC_LOAD_MIN_I32,
 677                         AArch64::ATOMIC_LOAD_MIN_I64);
 678   case ISD::ATOMIC_LOAD_MAX:
 679     return SelectAtomic(Node,
 680                         AArch64::ATOMIC_LOAD_MAX_I8,
 681                         AArch64::ATOMIC_LOAD_MAX_I16,
 682                         AArch64::ATOMIC_LOAD_MAX_I32,
 683                         AArch64::ATOMIC_LOAD_MAX_I64);
 684   case ISD::ATOMIC_LOAD_UMIN:
 685     return SelectAtomic(Node,
 686                         AArch64::ATOMIC_LOAD_UMIN_I8,
 687                         AArch64::ATOMIC_LOAD_UMIN_I16,
 688                         AArch64::ATOMIC_LOAD_UMIN_I32,
 689                         AArch64::ATOMIC_LOAD_UMIN_I64);
 690   case ISD::ATOMIC_LOAD_UMAX:
 691     return SelectAtomic(Node,
 692                         AArch64::ATOMIC_LOAD_UMAX_I8,
 693                         AArch64::ATOMIC_LOAD_UMAX_I16,
 694                         AArch64::ATOMIC_LOAD_UMAX_I32,
 695                         AArch64::ATOMIC_LOAD_UMAX_I64);
 696   case ISD::ATOMIC_SWAP:
 697     return SelectAtomic(Node,
 698                         AArch64::ATOMIC_SWAP_I8,
 699                         AArch64::ATOMIC_SWAP_I16,
 700                         AArch64::ATOMIC_SWAP_I32,
 701                         AArch64::ATOMIC_SWAP_I64);
 702   case ISD::ATOMIC_CMP_SWAP:
 703     return SelectAtomic(Node,
 704                         AArch64::ATOMIC_CMP_SWAP_I8,
 705                         AArch64::ATOMIC_CMP_SWAP_I16,
 706                         AArch64::ATOMIC_CMP_SWAP_I32,
 707                         AArch64::ATOMIC_CMP_SWAP_I64);
 708   case ISD::FrameIndex: {
 709     int FI = cast<FrameIndexSDNode>(Node)->getIndex();
 710     EVT PtrTy = getTargetLowering()->getPointerTy();
 711     SDValue TFI = CurDAG->getTargetFrameIndex(FI, PtrTy);
 712     return CurDAG->SelectNodeTo(Node, AArch64::ADDxxi_lsl0_s, PtrTy,
 713                                 TFI, CurDAG->getTargetConstant(0, PtrTy));
 714   }
 715   case ISD::ConstantPool: {
 716     // Constant pools are fine, just create a Target entry.
 717     ConstantPoolSDNode *CN = cast<ConstantPoolSDNode>(Node);
 718     const Constant *C = CN->getConstVal();
 719     SDValue CP = CurDAG->getTargetConstantPool(C, CN->getValueType(0));
 720
 721     ReplaceUses(SDValue(Node, 0), CP);
 722     return NULL;
 723   }
 724   case ISD::Constant: {
 725     SDNode *ResNode = 0;
 726     if (cast<ConstantSDNode>(Node)->getZExtValue() == 0) {
 727       // XZR and WZR are probably even better than an actual move: most of the
 728       // time they can be folded into another instruction with *no* cost.
 729
 730       EVT Ty = Node->getValueType(0);
 731       assert((Ty == MVT::i32 || Ty == MVT::i64) && "unexpected type");
 732       uint16_t Register = Ty == MVT::i32 ? AArch64::WZR : AArch64::XZR;
 733       ResNode = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
 734                                        SDLoc(Node),
 735                                        Register, Ty).getNode();
 736     }
 737
 738     // Next best option is a move-immediate, see if we can do that.
 739     if (!ResNode) {
 740       ResNode = TrySelectToMoveImm(Node);
 741     }
 742
 743     if (ResNode)
 744       return ResNode;
 745
 746     // If even that fails we fall back to a lit-pool entry at the moment. Future
 747     // tuning may change this to a sequence of MOVZ/MOVN/MOVK instructions.
 748     ResNode = SelectToLitPool(Node);
 749     assert(ResNode && "We need *some* way to materialise a constant");
 750
 751     // We want to continue selection at this point since the litpool access
 752     // generated used generic nodes for simplicity.
 753     ReplaceUses(SDValue(Node, 0), SDValue(ResNode, 0));
 754     Node = ResNode;
 755     break;
 756   }
 757   case ISD::ConstantFP: {
 758     if (A64Imms::isFPImm(cast<ConstantFPSDNode>(Node)->getValueAPF())) {
 759       // FMOV will take care of it from TableGen
 760       break;
 761     }
 762
 763     SDNode *ResNode = LowerToFPLitPool(Node);
 764     ReplaceUses(SDValue(Node, 0), SDValue(ResNode, 0));
 765
 766     // We want to continue selection at this point since the litpool access
 767     // generated used generic nodes for simplicity.
 768     Node = ResNode;
 769     break;
 770   }
 771   case ISD::INTRINSIC_VOID:
 772   case ISD::INTRINSIC_W_CHAIN: {
 773     unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
 774     switch (IntNo) {
 775     default:
 776       break;
 777
 778     case Intrinsic::arm_neon_vld1: {
 779       static const uint16_t Opcodes[] = { AArch64::LD1_8B,  AArch64::LD1_4H,
 780                                           AArch64::LD1_2S,  AArch64::LD1_1D,
 781                                           AArch64::LD1_16B, AArch64::LD1_8H,
 782                                           AArch64::LD1_4S,  AArch64::LD1_2D };
 783       return SelectVLD(Node, 1, Opcodes);
 784     }
 785     case Intrinsic::arm_neon_vld2: {
 786       static const uint16_t Opcodes[] = { AArch64::LD2_8B,  AArch64::LD2_4H,
 787                                           AArch64::LD2_2S,  AArch64::LD1_2V_1D,
 788                                           AArch64::LD2_16B, AArch64::LD2_8H,
 789                                           AArch64::LD2_4S,  AArch64::LD2_2D };
 790       return SelectVLD(Node, 2, Opcodes);
 791     }
 792     case Intrinsic::arm_neon_vld3: {
 793       static const uint16_t Opcodes[] = { AArch64::LD3_8B,  AArch64::LD3_4H,
 794                                           AArch64::LD3_2S,  AArch64::LD1_3V_1D,
 795                                           AArch64::LD3_16B, AArch64::LD3_8H,
 796                                           AArch64::LD3_4S,  AArch64::LD3_2D };
 797       return SelectVLD(Node, 3, Opcodes);
 798     }
 799     case Intrinsic::arm_neon_vld4: {
 800       static const uint16_t Opcodes[] = { AArch64::LD4_8B,  AArch64::LD4_4H,
 801                                           AArch64::LD4_2S,  AArch64::LD1_4V_1D,
 802                                           AArch64::LD4_16B, AArch64::LD4_8H,
 803                                           AArch64::LD4_4S,  AArch64::LD4_2D };
 804       return SelectVLD(Node, 4, Opcodes);
 805     }
 806     case Intrinsic::arm_neon_vst1: {
 807       static const uint16_t Opcodes[] = { AArch64::ST1_8B,  AArch64::ST1_4H,
 808                                           AArch64::ST1_2S,  AArch64::ST1_1D,
 809                                           AArch64::ST1_16B, AArch64::ST1_8H,
 810                                           AArch64::ST1_4S,  AArch64::ST1_2D };
 811       return SelectVST(Node, 1, Opcodes);
 812     }
 813     case Intrinsic::arm_neon_vst2: {
 814       static const uint16_t Opcodes[] = { AArch64::ST2_8B,  AArch64::ST2_4H,
 815                                           AArch64::ST2_2S,  AArch64::ST1_2V_1D,
 816                                           AArch64::ST2_16B, AArch64::ST2_8H,
 817                                           AArch64::ST2_4S,  AArch64::ST2_2D };
 818       return SelectVST(Node, 2, Opcodes);
 819     }
 820     case Intrinsic::arm_neon_vst3: {
 821       static const uint16_t Opcodes[] = { AArch64::ST3_8B,  AArch64::ST3_4H,
 822                                           AArch64::ST3_2S,  AArch64::ST1_3V_1D,
 823                                           AArch64::ST3_16B, AArch64::ST3_8H,
 824                                           AArch64::ST3_4S,  AArch64::ST3_2D };
 825       return SelectVST(Node, 3, Opcodes);
 826     }
 827     case Intrinsic::arm_neon_vst4: {
 828       static const uint16_t Opcodes[] = { AArch64::ST4_8B,  AArch64::ST4_4H,
 829                                           AArch64::ST4_2S,  AArch64::ST1_4V_1D,
 830                                           AArch64::ST4_16B, AArch64::ST4_8H,
 831                                           AArch64::ST4_4S,  AArch64::ST4_2D };
 832       return SelectVST(Node, 4, Opcodes);
 833     }
 834     }
 835     break;
 836   }
 837   default:
 838     break; // Let generic code handle it
 839   }
 840
 841   SDNode *ResNode = SelectCode(Node);
 842
 843   DEBUG(dbgs() << "=> ";
 844         if (ResNode == NULL || ResNode == Node)
 845           Node->dump(CurDAG);
 846         else
 847           ResNode->dump(CurDAG);
 848         dbgs() << "\n");
 849
 850   return ResNode;
 851 }
 852
 853 /// This pass converts a legalized DAG into a AArch64-specific DAG, ready for
 854 /// instruction scheduling.
 855 FunctionPass *llvm::createAArch64ISelDAG(AArch64TargetMachine &TM,
 856                                          CodeGenOpt::Level OptLevel) {
 857   return new AArch64DAGToDAGISel(TM, OptLevel);
 858 }