lib/Target/AArch64/AArch64FastISel.cpp

   1 //===-- AArch6464FastISel.cpp - AArch64 FastISel implementation -----------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file defines the AArch64-specific support for the FastISel class. Some
  11 // of the target-specific code is generated by tablegen in the file
  12 // AArch64GenFastISel.inc, which is #included here.
  13 //
  14 //===----------------------------------------------------------------------===//
  15
  16 #include "AArch64.h"
  17 #include "AArch64Subtarget.h"
  18 #include "AArch64TargetMachine.h"
  19 #include "MCTargetDesc/AArch64AddressingModes.h"
  20 #include "llvm/Analysis/BranchProbabilityInfo.h"
  21 #include "llvm/CodeGen/CallingConvLower.h"
  22 #include "llvm/CodeGen/FastISel.h"
  23 #include "llvm/CodeGen/FunctionLoweringInfo.h"
  24 #include "llvm/CodeGen/MachineConstantPool.h"
  25 #include "llvm/CodeGen/MachineFrameInfo.h"
  26 #include "llvm/CodeGen/MachineInstrBuilder.h"
  27 #include "llvm/CodeGen/MachineRegisterInfo.h"
  28 #include "llvm/IR/CallingConv.h"
  29 #include "llvm/IR/DataLayout.h"
  30 #include "llvm/IR/DerivedTypes.h"
  31 #include "llvm/IR/Function.h"
  32 #include "llvm/IR/GetElementPtrTypeIterator.h"
  33 #include "llvm/IR/GlobalAlias.h"
  34 #include "llvm/IR/GlobalVariable.h"
  35 #include "llvm/IR/Instructions.h"
  36 #include "llvm/IR/IntrinsicInst.h"
  37 #include "llvm/IR/Operator.h"
  38 #include "llvm/Support/CommandLine.h"
  39 using namespace llvm;
  40
  41 namespace {
  42
  43 class AArch64FastISel final : public FastISel {
  44   class Address {
  45   public:
  46     typedef enum {
  47       RegBase,
  48       FrameIndexBase
  49     } BaseKind;
  50
  51   private:
  52     BaseKind Kind;
  53     AArch64_AM::ShiftExtendType ExtType;
  54     union {
  55       unsigned Reg;
  56       int FI;
  57     } Base;
  58     unsigned OffsetReg;
  59     unsigned Shift;
  60     int64_t Offset;
  61     const GlobalValue *GV;
  62
  63   public:
  64     Address() : Kind(RegBase), ExtType(AArch64_AM::InvalidShiftExtend),
  65       OffsetReg(0), Shift(0), Offset(0), GV(nullptr) { Base.Reg = 0; }
  66     void setKind(BaseKind K) { Kind = K; }
  67     BaseKind getKind() const { return Kind; }
  68     void setExtendType(AArch64_AM::ShiftExtendType E) { ExtType = E; }
  69     AArch64_AM::ShiftExtendType getExtendType() const { return ExtType; }
  70     bool isRegBase() const { return Kind == RegBase; }
  71     bool isFIBase() const { return Kind == FrameIndexBase; }
  72     void setReg(unsigned Reg) {
  73       assert(isRegBase() && "Invalid base register access!");
  74       Base.Reg = Reg;
  75     }
  76     unsigned getReg() const {
  77       assert(isRegBase() && "Invalid base register access!");
  78       return Base.Reg;
  79     }
  80     void setOffsetReg(unsigned Reg) {
  81       assert(isRegBase() && "Invalid offset register access!");
  82       OffsetReg = Reg;
  83     }
  84     unsigned getOffsetReg() const {
  85       assert(isRegBase() && "Invalid offset register access!");
  86       return OffsetReg;
  87     }
  88     void setFI(unsigned FI) {
  89       assert(isFIBase() && "Invalid base frame index  access!");
  90       Base.FI = FI;
  91     }
  92     unsigned getFI() const {
  93       assert(isFIBase() && "Invalid base frame index access!");
  94       return Base.FI;
  95     }
  96     void setOffset(int64_t O) { Offset = O; }
  97     int64_t getOffset() { return Offset; }
  98     void setShift(unsigned S) { Shift = S; }
  99     unsigned getShift() { return Shift; }
 100
 101     void setGlobalValue(const GlobalValue *G) { GV = G; }
 102     const GlobalValue *getGlobalValue() { return GV; }
 103   };
 104
 105   /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
 106   /// make the right decision when generating code for different targets.
 107   const AArch64Subtarget *Subtarget;
 108   LLVMContext *Context;
 109
 110   bool fastLowerArguments() override;
 111   bool fastLowerCall(CallLoweringInfo &CLI) override;
 112   bool fastLowerIntrinsicCall(const IntrinsicInst *II) override;
 113
 114 private:
 115   // Selection routines.
 116   bool selectAddSub(const Instruction *I);
 117   bool selectLogicalOp(const Instruction *I);
 118   bool selectLoad(const Instruction *I);
 119   bool selectStore(const Instruction *I);
 120   bool selectBranch(const Instruction *I);
 121   bool selectIndirectBr(const Instruction *I);
 122   bool selectCmp(const Instruction *I);
 123   bool selectSelect(const Instruction *I);
 124   bool selectFPExt(const Instruction *I);
 125   bool selectFPTrunc(const Instruction *I);
 126   bool selectFPToInt(const Instruction *I, bool Signed);
 127   bool selectIntToFP(const Instruction *I, bool Signed);
 128   bool selectRem(const Instruction *I, unsigned ISDOpcode);
 129   bool selectRet(const Instruction *I);
 130   bool selectTrunc(const Instruction *I);
 131   bool selectIntExt(const Instruction *I);
 132   bool selectMul(const Instruction *I);
 133   bool selectShift(const Instruction *I);
 134   bool selectBitCast(const Instruction *I);
 135   bool selectFRem(const Instruction *I);
 136   bool selectSDiv(const Instruction *I);
 137
 138   // Utility helper routines.
 139   bool isTypeLegal(Type *Ty, MVT &VT);
 140   bool isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed = false);
 141   bool isValueAvailable(const Value *V) const;
 142   bool computeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr);
 143   bool computeCallAddress(const Value *V, Address &Addr);
 144   bool simplifyAddress(Address &Addr, MVT VT);
 145   void addLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB,
 146                             unsigned Flags, unsigned ScaleFactor,
 147                             MachineMemOperand *MMO);
 148   bool isMemCpySmall(uint64_t Len, unsigned Alignment);
 149   bool tryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
 150                           unsigned Alignment);
 151   bool foldXALUIntrinsic(AArch64CC::CondCode &CC, const Instruction *I,
 152                          const Value *Cond);
 153
 154   // Emit helper routines.
 155   unsigned emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
 156                       const Value *RHS, bool SetFlags = false,
 157                       bool WantResult = true,  bool IsZExt = false);
 158   unsigned emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
 159                          bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
 160                          bool SetFlags = false, bool WantResult = true);
 161   unsigned emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
 162                          bool LHSIsKill, uint64_t Imm, bool SetFlags = false,
 163                          bool WantResult = true);
 164   unsigned emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
 165                          bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
 166                          AArch64_AM::ShiftExtendType ShiftType,
 167                          uint64_t ShiftImm, bool SetFlags = false,
 168                          bool WantResult = true);
 169   unsigned emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
 170                          bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
 171                           AArch64_AM::ShiftExtendType ExtType,
 172                           uint64_t ShiftImm, bool SetFlags = false,
 173                          bool WantResult = true);
 174
 175   // Emit functions.
 176   bool emitCompareAndBranch(const BranchInst *BI);
 177   bool emitCmp(const Value *LHS, const Value *RHS, bool IsZExt);
 178   bool emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, bool IsZExt);
 179   bool emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm);
 180   bool emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS);
 181   bool emitLoad(MVT VT, MVT ResultVT, unsigned &ResultReg, Address Addr,
 182                 bool WantZExt = true, MachineMemOperand *MMO = nullptr);
 183   bool emitStore(MVT VT, unsigned SrcReg, Address Addr,
 184                  MachineMemOperand *MMO = nullptr);
 185   unsigned emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt);
 186   unsigned emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt);
 187   unsigned emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
 188                    bool SetFlags = false, bool WantResult = true,
 189                    bool IsZExt = false);
 190   unsigned emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
 191                    bool SetFlags = false, bool WantResult = true,
 192                    bool IsZExt = false);
 193   unsigned emitSubs_rr(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
 194                        unsigned RHSReg, bool RHSIsKill, bool WantResult = true);
 195   unsigned emitSubs_rs(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
 196                        unsigned RHSReg, bool RHSIsKill,
 197                        AArch64_AM::ShiftExtendType ShiftType, uint64_t ShiftImm,
 198                        bool WantResult = true);
 199   unsigned emitLogicalOp(unsigned ISDOpc, MVT RetVT, const Value *LHS,
 200                          const Value *RHS);
 201   unsigned emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
 202                             bool LHSIsKill, uint64_t Imm);
 203   unsigned emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
 204                             bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
 205                             uint64_t ShiftImm);
 206   unsigned emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm);
 207   unsigned emitMul_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
 208                       unsigned Op1, bool Op1IsKill);
 209   unsigned emitSMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
 210                         unsigned Op1, bool Op1IsKill);
 211   unsigned emitUMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
 212                         unsigned Op1, bool Op1IsKill);
 213   unsigned emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
 214                       unsigned Op1Reg, bool Op1IsKill);
 215   unsigned emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
 216                       uint64_t Imm, bool IsZExt = true);
 217   unsigned emitLSR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
 218                       unsigned Op1Reg, bool Op1IsKill);
 219   unsigned emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
 220                       uint64_t Imm, bool IsZExt = true);
 221   unsigned emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
 222                       unsigned Op1Reg, bool Op1IsKill);
 223   unsigned emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
 224                       uint64_t Imm, bool IsZExt = false);
 225
 226   unsigned materializeInt(const ConstantInt *CI, MVT VT);
 227   unsigned materializeFP(const ConstantFP *CFP, MVT VT);
 228   unsigned materializeGV(const GlobalValue *GV);
 229
 230   // Call handling routines.
 231 private:
 232   CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const;
 233   bool processCallArgs(CallLoweringInfo &CLI, SmallVectorImpl<MVT> &ArgVTs,
 234                        unsigned &NumBytes);
 235   bool finishCall(CallLoweringInfo &CLI, MVT RetVT, unsigned NumBytes);
 236
 237 public:
 238   // Backend specific FastISel code.
 239   unsigned fastMaterializeAlloca(const AllocaInst *AI) override;
 240   unsigned fastMaterializeConstant(const Constant *C) override;
 241   unsigned fastMaterializeFloatZero(const ConstantFP* CF) override;
 242
 243   explicit AArch64FastISel(FunctionLoweringInfo &FuncInfo,
 244                          const TargetLibraryInfo *LibInfo)
 245       : FastISel(FuncInfo, LibInfo, /*SkipTargetIndependentISel=*/true) {
 246     Subtarget = &TM.getSubtarget<AArch64Subtarget>();
 247     Context = &FuncInfo.Fn->getContext();
 248   }
 249
 250   bool fastSelectInstruction(const Instruction *I) override;
 251
 252 #include "AArch64GenFastISel.inc"
 253 };
 254
 255 } // end anonymous namespace
 256
 257 #include "AArch64GenCallingConv.inc"
 258
 259 /// \brief Check if the sign-/zero-extend will be a noop.
 260 static bool isIntExtFree(const Instruction *I) {
 261   assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
 262          "Unexpected integer extend instruction.");
 263   assert(!I->getType()->isVectorTy() && I->getType()->isIntegerTy() &&
 264          "Unexpected value type.");
 265   bool IsZExt = isa<ZExtInst>(I);
 266
 267   if (const auto *LI = dyn_cast<LoadInst>(I->getOperand(0)))
 268     if (LI->hasOneUse())
 269       return true;
 270
 271   if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0)))
 272     if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr()))
 273       return true;
 274
 275   return false;
 276 }
 277
 278 /// \brief Determine the implicit scale factor that is applied by a memory
 279 /// operation for a given value type.
 280 static unsigned getImplicitScaleFactor(MVT VT) {
 281   switch (VT.SimpleTy) {
 282   default:
 283     return 0;    // invalid
 284   case MVT::i1:  // fall-through
 285   case MVT::i8:
 286     return 1;
 287   case MVT::i16:
 288     return 2;
 289   case MVT::i32: // fall-through
 290   case MVT::f32:
 291     return 4;
 292   case MVT::i64: // fall-through
 293   case MVT::f64:
 294     return 8;
 295   }
 296 }
 297
 298 CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const {
 299   if (CC == CallingConv::WebKit_JS)
 300     return CC_AArch64_WebKit_JS;
 301   return Subtarget->isTargetDarwin() ? CC_AArch64_DarwinPCS : CC_AArch64_AAPCS;
 302 }
 303
 304 unsigned AArch64FastISel::fastMaterializeAlloca(const AllocaInst *AI) {
 305   assert(TLI.getValueType(AI->getType(), true) == MVT::i64 &&
 306          "Alloca should always return a pointer.");
 307
 308   // Don't handle dynamic allocas.
 309   if (!FuncInfo.StaticAllocaMap.count(AI))
 310     return 0;
 311
 312   DenseMap<const AllocaInst *, int>::iterator SI =
 313       FuncInfo.StaticAllocaMap.find(AI);
 314
 315   if (SI != FuncInfo.StaticAllocaMap.end()) {
 316     unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass);
 317     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
 318             ResultReg)
 319         .addFrameIndex(SI->second)
 320         .addImm(0)
 321         .addImm(0);
 322     return ResultReg;
 323   }
 324
 325   return 0;
 326 }
 327
 328 unsigned AArch64FastISel::materializeInt(const ConstantInt *CI, MVT VT) {
 329   if (VT > MVT::i64)
 330     return 0;
 331
 332   if (!CI->isZero())
 333     return fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue());
 334
 335   // Create a copy from the zero register to materialize a "0" value.
 336   const TargetRegisterClass *RC = (VT == MVT::i64) ? &AArch64::GPR64RegClass
 337                                                    : &AArch64::GPR32RegClass;
 338   unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
 339   unsigned ResultReg = createResultReg(RC);
 340   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY),
 341           ResultReg).addReg(ZeroReg, getKillRegState(true));
 342   return ResultReg;
 343 }
 344
 345 unsigned AArch64FastISel::materializeFP(const ConstantFP *CFP, MVT VT) {
 346   // Positive zero (+0.0) has to be materialized with a fmov from the zero
 347   // register, because the immediate version of fmov cannot encode zero.
 348   if (CFP->isNullValue())
 349     return fastMaterializeFloatZero(CFP);
 350
 351   if (VT != MVT::f32 && VT != MVT::f64)
 352     return 0;
 353
 354   const APFloat Val = CFP->getValueAPF();
 355   bool Is64Bit = (VT == MVT::f64);
 356   // This checks to see if we can use FMOV instructions to materialize
 357   // a constant, otherwise we have to materialize via the constant pool.
 358   if (TLI.isFPImmLegal(Val, VT)) {
 359     int Imm =
 360         Is64Bit ? AArch64_AM::getFP64Imm(Val) : AArch64_AM::getFP32Imm(Val);
 361     assert((Imm != -1) && "Cannot encode floating-point constant.");
 362     unsigned Opc = Is64Bit ? AArch64::FMOVDi : AArch64::FMOVSi;
 363     return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm);
 364   }
 365
 366   // Materialize via constant pool.  MachineConstantPool wants an explicit
 367   // alignment.
 368   unsigned Align = DL.getPrefTypeAlignment(CFP->getType());
 369   if (Align == 0)
 370     Align = DL.getTypeAllocSize(CFP->getType());
 371
 372   unsigned CPI = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align);
 373   unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
 374   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
 375           ADRPReg).addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGE);
 376
 377   unsigned Opc = Is64Bit ? AArch64::LDRDui : AArch64::LDRSui;
 378   unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
 379   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
 380       .addReg(ADRPReg)
 381       .addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
 382   return ResultReg;
 383 }
 384
 385 unsigned AArch64FastISel::materializeGV(const GlobalValue *GV) {
 386   // We can't handle thread-local variables quickly yet.
 387   if (GV->isThreadLocal())
 388     return 0;
 389
 390   // MachO still uses GOT for large code-model accesses, but ELF requires
 391   // movz/movk sequences, which FastISel doesn't handle yet.
 392   if (TM.getCodeModel() != CodeModel::Small && !Subtarget->isTargetMachO())
 393     return 0;
 394
 395   unsigned char OpFlags = Subtarget->ClassifyGlobalReference(GV, TM);
 396
 397   EVT DestEVT = TLI.getValueType(GV->getType(), true);
 398   if (!DestEVT.isSimple())
 399     return 0;
 400
 401   unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
 402   unsigned ResultReg;
 403
 404   if (OpFlags & AArch64II::MO_GOT) {
 405     // ADRP + LDRX
 406     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
 407             ADRPReg)
 408       .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGE);
 409
 410     ResultReg = createResultReg(&AArch64::GPR64RegClass);
 411     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::LDRXui),
 412             ResultReg)
 413       .addReg(ADRPReg)
 414       .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGEOFF |
 415                         AArch64II::MO_NC);
 416   } else if (OpFlags & AArch64II::MO_CONSTPOOL) {
 417     // We can't handle addresses loaded from a constant pool quickly yet.
 418     return 0;
 419   } else {
 420     // ADRP + ADDX
 421     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
 422             ADRPReg)
 423       .addGlobalAddress(GV, 0, AArch64II::MO_PAGE);
 424
 425     ResultReg = createResultReg(&AArch64::GPR64spRegClass);
 426     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
 427             ResultReg)
 428       .addReg(ADRPReg)
 429       .addGlobalAddress(GV, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC)
 430       .addImm(0);
 431   }
 432   return ResultReg;
 433 }
 434
 435 unsigned AArch64FastISel::fastMaterializeConstant(const Constant *C) {
 436   EVT CEVT = TLI.getValueType(C->getType(), true);
 437
 438   // Only handle simple types.
 439   if (!CEVT.isSimple())
 440     return 0;
 441   MVT VT = CEVT.getSimpleVT();
 442
 443   if (const auto *CI = dyn_cast<ConstantInt>(C))
 444     return materializeInt(CI, VT);
 445   else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
 446     return materializeFP(CFP, VT);
 447   else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
 448     return materializeGV(GV);
 449
 450   return 0;
 451 }
 452
 453 unsigned AArch64FastISel::fastMaterializeFloatZero(const ConstantFP* CFP) {
 454   assert(CFP->isNullValue() &&
 455          "Floating-point constant is not a positive zero.");
 456   MVT VT;
 457   if (!isTypeLegal(CFP->getType(), VT))
 458     return 0;
 459
 460   if (VT != MVT::f32 && VT != MVT::f64)
 461     return 0;
 462
 463   bool Is64Bit = (VT == MVT::f64);
 464   unsigned ZReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
 465   unsigned Opc = Is64Bit ? AArch64::FMOVXDr : AArch64::FMOVWSr;
 466   return fastEmitInst_r(Opc, TLI.getRegClassFor(VT), ZReg, /*IsKill=*/true);
 467 }
 468
 469 /// \brief Check if the multiply is by a power-of-2 constant.
 470 static bool isMulPowOf2(const Value *I) {
 471   if (const auto *MI = dyn_cast<MulOperator>(I)) {
 472     if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(0)))
 473       if (C->getValue().isPowerOf2())
 474         return true;
 475     if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(1)))
 476       if (C->getValue().isPowerOf2())
 477         return true;
 478   }
 479   return false;
 480 }
 481
 482 // Computes the address to get to an object.
 483 bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty)
 484 {
 485   const User *U = nullptr;
 486   unsigned Opcode = Instruction::UserOp1;
 487   if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
 488     // Don't walk into other basic blocks unless the object is an alloca from
 489     // another block, otherwise it may not have a virtual register assigned.
 490     if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
 491         FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
 492       Opcode = I->getOpcode();
 493       U = I;
 494     }
 495   } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
 496     Opcode = C->getOpcode();
 497     U = C;
 498   }
 499
 500   if (const PointerType *Ty = dyn_cast<PointerType>(Obj->getType()))
 501     if (Ty->getAddressSpace() > 255)
 502       // Fast instruction selection doesn't support the special
 503       // address spaces.
 504       return false;
 505
 506   switch (Opcode) {
 507   default:
 508     break;
 509   case Instruction::BitCast: {
 510     // Look through bitcasts.
 511     return computeAddress(U->getOperand(0), Addr, Ty);
 512   }
 513   case Instruction::IntToPtr: {
 514     // Look past no-op inttoptrs.
 515     if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
 516       return computeAddress(U->getOperand(0), Addr, Ty);
 517     break;
 518   }
 519   case Instruction::PtrToInt: {
 520     // Look past no-op ptrtoints.
 521     if (TLI.getValueType(U->getType()) == TLI.getPointerTy())
 522       return computeAddress(U->getOperand(0), Addr, Ty);
 523     break;
 524   }
 525   case Instruction::GetElementPtr: {
 526     Address SavedAddr = Addr;
 527     uint64_t TmpOffset = Addr.getOffset();
 528
 529     // Iterate through the GEP folding the constants into offsets where
 530     // we can.
 531     gep_type_iterator GTI = gep_type_begin(U);
 532     for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end(); i != e;
 533          ++i, ++GTI) {
 534       const Value *Op = *i;
 535       if (StructType *STy = dyn_cast<StructType>(*GTI)) {
 536         const StructLayout *SL = DL.getStructLayout(STy);
 537         unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
 538         TmpOffset += SL->getElementOffset(Idx);
 539       } else {
 540         uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
 541         for (;;) {
 542           if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
 543             // Constant-offset addressing.
 544             TmpOffset += CI->getSExtValue() * S;
 545             break;
 546           }
 547           if (canFoldAddIntoGEP(U, Op)) {
 548             // A compatible add with a constant operand. Fold the constant.
 549             ConstantInt *CI =
 550                 cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
 551             TmpOffset += CI->getSExtValue() * S;
 552             // Iterate on the other operand.
 553             Op = cast<AddOperator>(Op)->getOperand(0);
 554             continue;
 555           }
 556           // Unsupported
 557           goto unsupported_gep;
 558         }
 559       }
 560     }
 561
 562     // Try to grab the base operand now.
 563     Addr.setOffset(TmpOffset);
 564     if (computeAddress(U->getOperand(0), Addr, Ty))
 565       return true;
 566
 567     // We failed, restore everything and try the other options.
 568     Addr = SavedAddr;
 569
 570   unsupported_gep:
 571     break;
 572   }
 573   case Instruction::Alloca: {
 574     const AllocaInst *AI = cast<AllocaInst>(Obj);
 575     DenseMap<const AllocaInst *, int>::iterator SI =
 576         FuncInfo.StaticAllocaMap.find(AI);
 577     if (SI != FuncInfo.StaticAllocaMap.end()) {
 578       Addr.setKind(Address::FrameIndexBase);
 579       Addr.setFI(SI->second);
 580       return true;
 581     }
 582     break;
 583   }
 584   case Instruction::Add: {
 585     // Adds of constants are common and easy enough.
 586     const Value *LHS = U->getOperand(0);
 587     const Value *RHS = U->getOperand(1);
 588
 589     if (isa<ConstantInt>(LHS))
 590       std::swap(LHS, RHS);
 591
 592     if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
 593       Addr.setOffset(Addr.getOffset() + CI->getSExtValue());
 594       return computeAddress(LHS, Addr, Ty);
 595     }
 596
 597     Address Backup = Addr;
 598     if (computeAddress(LHS, Addr, Ty) && computeAddress(RHS, Addr, Ty))
 599       return true;
 600     Addr = Backup;
 601
 602     break;
 603   }
 604   case Instruction::Sub: {
 605     // Subs of constants are common and easy enough.
 606     const Value *LHS = U->getOperand(0);
 607     const Value *RHS = U->getOperand(1);
 608
 609     if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
 610       Addr.setOffset(Addr.getOffset() - CI->getSExtValue());
 611       return computeAddress(LHS, Addr, Ty);
 612     }
 613     break;
 614   }
 615   case Instruction::Shl: {
 616     if (Addr.getOffsetReg())
 617       break;
 618
 619     const auto *CI = dyn_cast<ConstantInt>(U->getOperand(1));
 620     if (!CI)
 621       break;
 622
 623     unsigned Val = CI->getZExtValue();
 624     if (Val < 1 || Val > 3)
 625       break;
 626
 627     uint64_t NumBytes = 0;
 628     if (Ty && Ty->isSized()) {
 629       uint64_t NumBits = DL.getTypeSizeInBits(Ty);
 630       NumBytes = NumBits / 8;
 631       if (!isPowerOf2_64(NumBits))
 632         NumBytes = 0;
 633     }
 634
 635     if (NumBytes != (1ULL << Val))
 636       break;
 637
 638     Addr.setShift(Val);
 639     Addr.setExtendType(AArch64_AM::LSL);
 640
 641     const Value *Src = U->getOperand(0);
 642     if (const auto *I = dyn_cast<Instruction>(Src))
 643       if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB)
 644         Src = I;
 645
 646     // Fold the zext or sext when it won't become a noop.
 647     if (const auto *ZE = dyn_cast<ZExtInst>(Src)) {
 648       if (!isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) {
 649           Addr.setExtendType(AArch64_AM::UXTW);
 650           Src = ZE->getOperand(0);
 651       }
 652     } else if (const auto *SE = dyn_cast<SExtInst>(Src)) {
 653       if (!isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) {
 654         Addr.setExtendType(AArch64_AM::SXTW);
 655         Src = SE->getOperand(0);
 656       }
 657     }
 658
 659     if (const auto *AI = dyn_cast<BinaryOperator>(Src))
 660       if (AI->getOpcode() == Instruction::And) {
 661         const Value *LHS = AI->getOperand(0);
 662         const Value *RHS = AI->getOperand(1);
 663
 664         if (const auto *C = dyn_cast<ConstantInt>(LHS))
 665           if (C->getValue() == 0xffffffff)
 666             std::swap(LHS, RHS);
 667
 668         if (const auto *C = dyn_cast<ConstantInt>(RHS))
 669           if (C->getValue() == 0xffffffff) {
 670             Addr.setExtendType(AArch64_AM::UXTW);
 671             unsigned Reg = getRegForValue(LHS);
 672             if (!Reg)
 673               return false;
 674             bool RegIsKill = hasTrivialKill(LHS);
 675             Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, RegIsKill,
 676                                              AArch64::sub_32);
 677             Addr.setOffsetReg(Reg);
 678             return true;
 679           }
 680       }
 681
 682     unsigned Reg = getRegForValue(Src);
 683     if (!Reg)
 684       return false;
 685     Addr.setOffsetReg(Reg);
 686     return true;
 687   }
 688   case Instruction::Mul: {
 689     if (Addr.getOffsetReg())
 690       break;
 691
 692     if (!isMulPowOf2(U))
 693       break;
 694
 695     const Value *LHS = U->getOperand(0);
 696     const Value *RHS = U->getOperand(1);
 697
 698     // Canonicalize power-of-2 value to the RHS.
 699     if (const auto *C = dyn_cast<ConstantInt>(LHS))
 700       if (C->getValue().isPowerOf2())
 701         std::swap(LHS, RHS);
 702
 703     assert(isa<ConstantInt>(RHS) && "Expected an ConstantInt.");
 704     const auto *C = cast<ConstantInt>(RHS);
 705     unsigned Val = C->getValue().logBase2();
 706     if (Val < 1 || Val > 3)
 707       break;
 708
 709     uint64_t NumBytes = 0;
 710     if (Ty && Ty->isSized()) {
 711       uint64_t NumBits = DL.getTypeSizeInBits(Ty);
 712       NumBytes = NumBits / 8;
 713       if (!isPowerOf2_64(NumBits))
 714         NumBytes = 0;
 715     }
 716
 717     if (NumBytes != (1ULL << Val))
 718       break;
 719
 720     Addr.setShift(Val);
 721     Addr.setExtendType(AArch64_AM::LSL);
 722
 723     const Value *Src = LHS;
 724     if (const auto *I = dyn_cast<Instruction>(Src))
 725       if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB)
 726         Src = I;
 727
 728
 729     // Fold the zext or sext when it won't become a noop.
 730     if (const auto *ZE = dyn_cast<ZExtInst>(Src)) {
 731       if (!isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) {
 732         Addr.setExtendType(AArch64_AM::UXTW);
 733         Src = ZE->getOperand(0);
 734       }
 735     } else if (const auto *SE = dyn_cast<SExtInst>(Src)) {
 736       if (!isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) {
 737         Addr.setExtendType(AArch64_AM::SXTW);
 738         Src = SE->getOperand(0);
 739       }
 740     }
 741
 742     unsigned Reg = getRegForValue(Src);
 743     if (!Reg)
 744       return false;
 745     Addr.setOffsetReg(Reg);
 746     return true;
 747   }
 748   case Instruction::And: {
 749     if (Addr.getOffsetReg())
 750       break;
 751
 752     if (DL.getTypeSizeInBits(Ty) != 8)
 753       break;
 754
 755     const Value *LHS = U->getOperand(0);
 756     const Value *RHS = U->getOperand(1);
 757
 758     if (const auto *C = dyn_cast<ConstantInt>(LHS))
 759       if (C->getValue() == 0xffffffff)
 760         std::swap(LHS, RHS);
 761
 762     if (const auto *C = dyn_cast<ConstantInt>(RHS))
 763       if (C->getValue() == 0xffffffff) {
 764         Addr.setShift(0);
 765         Addr.setExtendType(AArch64_AM::LSL);
 766         Addr.setExtendType(AArch64_AM::UXTW);
 767
 768         unsigned Reg = getRegForValue(LHS);
 769         if (!Reg)
 770           return false;
 771         bool RegIsKill = hasTrivialKill(LHS);
 772         Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, RegIsKill,
 773                                          AArch64::sub_32);
 774         Addr.setOffsetReg(Reg);
 775         return true;
 776       }
 777     break;
 778   }
 779   } // end switch
 780
 781   if (Addr.getReg()) {
 782     if (!Addr.getOffsetReg()) {
 783       unsigned Reg = getRegForValue(Obj);
 784       if (!Reg)
 785         return false;
 786       Addr.setOffsetReg(Reg);
 787       return true;
 788     }
 789     return false;
 790   }
 791
 792   unsigned Reg = getRegForValue(Obj);
 793   if (!Reg)
 794     return false;
 795   Addr.setReg(Reg);
 796   return true;
 797 }
 798
 799 bool AArch64FastISel::computeCallAddress(const Value *V, Address &Addr) {
 800   const User *U = nullptr;
 801   unsigned Opcode = Instruction::UserOp1;
 802   bool InMBB = true;
 803
 804   if (const auto *I = dyn_cast<Instruction>(V)) {
 805     Opcode = I->getOpcode();
 806     U = I;
 807     InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock();
 808   } else if (const auto *C = dyn_cast<ConstantExpr>(V)) {
 809     Opcode = C->getOpcode();
 810     U = C;
 811   }
 812
 813   switch (Opcode) {
 814   default: break;
 815   case Instruction::BitCast:
 816     // Look past bitcasts if its operand is in the same BB.
 817     if (InMBB)
 818       return computeCallAddress(U->getOperand(0), Addr);
 819     break;
 820   case Instruction::IntToPtr:
 821     // Look past no-op inttoptrs if its operand is in the same BB.
 822     if (InMBB &&
 823         TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
 824       return computeCallAddress(U->getOperand(0), Addr);
 825     break;
 826   case Instruction::PtrToInt:
 827     // Look past no-op ptrtoints if its operand is in the same BB.
 828     if (InMBB &&
 829         TLI.getValueType(U->getType()) == TLI.getPointerTy())
 830       return computeCallAddress(U->getOperand(0), Addr);
 831     break;
 832   }
 833
 834   if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
 835     Addr.setGlobalValue(GV);
 836     return true;
 837   }
 838
 839   // If all else fails, try to materialize the value in a register.
 840   if (!Addr.getGlobalValue()) {
 841     Addr.setReg(getRegForValue(V));
 842     return Addr.getReg() != 0;
 843   }
 844
 845   return false;
 846 }
 847
 848
 849 bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) {
 850   EVT evt = TLI.getValueType(Ty, true);
 851
 852   // Only handle simple types.
 853   if (evt == MVT::Other || !evt.isSimple())
 854     return false;
 855   VT = evt.getSimpleVT();
 856
 857   // This is a legal type, but it's not something we handle in fast-isel.
 858   if (VT == MVT::f128)
 859     return false;
 860
 861   // Handle all other legal types, i.e. a register that will directly hold this
 862   // value.
 863   return TLI.isTypeLegal(VT);
 864 }
 865
 866 /// \brief Determine if the value type is supported by FastISel.
 867 ///
 868 /// FastISel for AArch64 can handle more value types than are legal. This adds
 869 /// simple value type such as i1, i8, and i16.
 870 bool AArch64FastISel::isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed) {
 871   if (Ty->isVectorTy() && !IsVectorAllowed)
 872     return false;
 873
 874   if (isTypeLegal(Ty, VT))
 875     return true;
 876
 877   // If this is a type than can be sign or zero-extended to a basic operation
 878   // go ahead and accept it now.
 879   if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
 880     return true;
 881
 882   return false;
 883 }
 884
 885 bool AArch64FastISel::isValueAvailable(const Value *V) const {
 886   if (!isa<Instruction>(V))
 887     return true;
 888
 889   const auto *I = cast<Instruction>(V);
 890   if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB)
 891     return true;
 892
 893   return false;
 894 }
 895
 896 bool AArch64FastISel::simplifyAddress(Address &Addr, MVT VT) {
 897   unsigned ScaleFactor = getImplicitScaleFactor(VT);
 898   if (!ScaleFactor)
 899     return false;
 900
 901   bool ImmediateOffsetNeedsLowering = false;
 902   bool RegisterOffsetNeedsLowering = false;
 903   int64_t Offset = Addr.getOffset();
 904   if (((Offset < 0) || (Offset & (ScaleFactor - 1))) && !isInt<9>(Offset))
 905     ImmediateOffsetNeedsLowering = true;
 906   else if (Offset > 0 && !(Offset & (ScaleFactor - 1)) &&
 907            !isUInt<12>(Offset / ScaleFactor))
 908     ImmediateOffsetNeedsLowering = true;
 909
 910   // Cannot encode an offset register and an immediate offset in the same
 911   // instruction. Fold the immediate offset into the load/store instruction and
 912   // emit an additonal add to take care of the offset register.
 913   if (!ImmediateOffsetNeedsLowering && Addr.getOffset() && Addr.isRegBase() &&
 914       Addr.getOffsetReg())
 915     RegisterOffsetNeedsLowering = true;
 916
 917   // Cannot encode zero register as base.
 918   if (Addr.isRegBase() && Addr.getOffsetReg() && !Addr.getReg())
 919     RegisterOffsetNeedsLowering = true;
 920
 921   // If this is a stack pointer and the offset needs to be simplified then put
 922   // the alloca address into a register, set the base type back to register and
 923   // continue. This should almost never happen.
 924   if (ImmediateOffsetNeedsLowering && Addr.isFIBase()) {
 925     unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass);
 926     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
 927             ResultReg)
 928       .addFrameIndex(Addr.getFI())
 929       .addImm(0)
 930       .addImm(0);
 931     Addr.setKind(Address::RegBase);
 932     Addr.setReg(ResultReg);
 933   }
 934
 935   if (RegisterOffsetNeedsLowering) {
 936     unsigned ResultReg = 0;
 937     if (Addr.getReg()) {
 938       if (Addr.getExtendType() == AArch64_AM::SXTW ||
 939           Addr.getExtendType() == AArch64_AM::UXTW   )
 940         ResultReg = emitAddSub_rx(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
 941                                   /*TODO:IsKill=*/false, Addr.getOffsetReg(),
 942                                   /*TODO:IsKill=*/false, Addr.getExtendType(),
 943                                   Addr.getShift());
 944       else
 945         ResultReg = emitAddSub_rs(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
 946                                   /*TODO:IsKill=*/false, Addr.getOffsetReg(),
 947                                   /*TODO:IsKill=*/false, AArch64_AM::LSL,
 948                                   Addr.getShift());
 949     } else {
 950       if (Addr.getExtendType() == AArch64_AM::UXTW)
 951         ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
 952                                /*Op0IsKill=*/false, Addr.getShift(),
 953                                /*IsZExt=*/true);
 954       else if (Addr.getExtendType() == AArch64_AM::SXTW)
 955         ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
 956                                /*Op0IsKill=*/false, Addr.getShift(),
 957                                /*IsZExt=*/false);
 958       else
 959         ResultReg = emitLSL_ri(MVT::i64, MVT::i64, Addr.getOffsetReg(),
 960                                /*Op0IsKill=*/false, Addr.getShift());
 961     }
 962     if (!ResultReg)
 963       return false;
 964
 965     Addr.setReg(ResultReg);
 966     Addr.setOffsetReg(0);
 967     Addr.setShift(0);
 968     Addr.setExtendType(AArch64_AM::InvalidShiftExtend);
 969   }
 970
 971   // Since the offset is too large for the load/store instruction get the
 972   // reg+offset into a register.
 973   if (ImmediateOffsetNeedsLowering) {
 974     unsigned ResultReg;
 975     if (Addr.getReg()) {
 976       // Try to fold the immediate into the add instruction.
 977       if (Offset < 0)
 978         ResultReg = emitAddSub_ri(/*UseAdd=*/false, MVT::i64, Addr.getReg(),
 979                                   /*IsKill=*/false, -Offset);
 980       else
 981         ResultReg = emitAddSub_ri(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
 982                                   /*IsKill=*/false, Offset);
 983       if (!ResultReg) {
 984         unsigned ImmReg = fastEmit_i(MVT::i64, MVT::i64, ISD::Constant, Offset);
 985         ResultReg = emitAddSub_rr(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
 986                                   /*IsKill=*/false, ImmReg, /*IsKill=*/true);
 987       }
 988     } else
 989       ResultReg = fastEmit_i(MVT::i64, MVT::i64, ISD::Constant, Offset);
 990
 991     if (!ResultReg)
 992       return false;
 993     Addr.setReg(ResultReg);
 994     Addr.setOffset(0);
 995   }
 996   return true;
 997 }
 998
 999 void AArch64FastISel::addLoadStoreOperands(Address &Addr,
1000                                            const MachineInstrBuilder &MIB,
1001                                            unsigned Flags,
1002                                            unsigned ScaleFactor,
1003                                            MachineMemOperand *MMO) {
1004   int64_t Offset = Addr.getOffset() / ScaleFactor;
1005   // Frame base works a bit differently. Handle it separately.
1006   if (Addr.isFIBase()) {
1007     int FI = Addr.getFI();
1008     // FIXME: We shouldn't be using getObjectSize/getObjectAlignment.  The size
1009     // and alignment should be based on the VT.
1010     MMO = FuncInfo.MF->getMachineMemOperand(
1011       MachinePointerInfo::getFixedStack(FI, Offset), Flags,
1012       MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
1013     // Now add the rest of the operands.
1014     MIB.addFrameIndex(FI).addImm(Offset);
1015   } else {
1016     assert(Addr.isRegBase() && "Unexpected address kind.");
1017     const MCInstrDesc &II = MIB->getDesc();
1018     unsigned Idx = (Flags & MachineMemOperand::MOStore) ? 1 : 0;
1019     Addr.setReg(
1020       constrainOperandRegClass(II, Addr.getReg(), II.getNumDefs()+Idx));
1021     Addr.setOffsetReg(
1022       constrainOperandRegClass(II, Addr.getOffsetReg(), II.getNumDefs()+Idx+1));
1023     if (Addr.getOffsetReg()) {
1024       assert(Addr.getOffset() == 0 && "Unexpected offset");
1025       bool IsSigned = Addr.getExtendType() == AArch64_AM::SXTW ||
1026                       Addr.getExtendType() == AArch64_AM::SXTX;
1027       MIB.addReg(Addr.getReg());
1028       MIB.addReg(Addr.getOffsetReg());
1029       MIB.addImm(IsSigned);
1030       MIB.addImm(Addr.getShift() != 0);
1031     } else {
1032       MIB.addReg(Addr.getReg());
1033       MIB.addImm(Offset);
1034     }
1035   }
1036
1037   if (MMO)
1038     MIB.addMemOperand(MMO);
1039 }
1040
1041 unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
1042                                      const Value *RHS, bool SetFlags,
1043                                      bool WantResult,  bool IsZExt) {
1044   AArch64_AM::ShiftExtendType ExtendType = AArch64_AM::InvalidShiftExtend;
1045   bool NeedExtend = false;
1046   switch (RetVT.SimpleTy) {
1047   default:
1048     return 0;
1049   case MVT::i1:
1050     NeedExtend = true;
1051     break;
1052   case MVT::i8:
1053     NeedExtend = true;
1054     ExtendType = IsZExt ? AArch64_AM::UXTB : AArch64_AM::SXTB;
1055     break;
1056   case MVT::i16:
1057     NeedExtend = true;
1058     ExtendType = IsZExt ? AArch64_AM::UXTH : AArch64_AM::SXTH;
1059     break;
1060   case MVT::i32:  // fall-through
1061   case MVT::i64:
1062     break;
1063   }
1064   MVT SrcVT = RetVT;
1065   RetVT.SimpleTy = std::max(RetVT.SimpleTy, MVT::i32);
1066
1067   // Canonicalize immediates to the RHS first.
1068   if (UseAdd && isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS))
1069     std::swap(LHS, RHS);
1070
1071   // Canonicalize mul by power of 2 to the RHS.
1072   if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
1073     if (isMulPowOf2(LHS))
1074       std::swap(LHS, RHS);
1075
1076   // Canonicalize shift immediate to the RHS.
1077   if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
1078     if (const auto *SI = dyn_cast<BinaryOperator>(LHS))
1079       if (isa<ConstantInt>(SI->getOperand(1)))
1080         if (SI->getOpcode() == Instruction::Shl  ||
1081             SI->getOpcode() == Instruction::LShr ||
1082             SI->getOpcode() == Instruction::AShr   )
1083           std::swap(LHS, RHS);
1084
1085   unsigned LHSReg = getRegForValue(LHS);
1086   if (!LHSReg)
1087     return 0;
1088   bool LHSIsKill = hasTrivialKill(LHS);
1089
1090   if (NeedExtend)
1091     LHSReg = emitIntExt(SrcVT, LHSReg, RetVT, IsZExt);
1092
1093   unsigned ResultReg = 0;
1094   if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1095     uint64_t Imm = IsZExt ? C->getZExtValue() : C->getSExtValue();
1096     if (C->isNegative())
1097       ResultReg = emitAddSub_ri(!UseAdd, RetVT, LHSReg, LHSIsKill, -Imm,
1098                                 SetFlags, WantResult);
1099     else
1100       ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, LHSIsKill, Imm, SetFlags,
1101                                 WantResult);
1102   }
1103   if (ResultReg)
1104     return ResultReg;
1105
1106   // Only extend the RHS within the instruction if there is a valid extend type.
1107   if (ExtendType != AArch64_AM::InvalidShiftExtend && RHS->hasOneUse() &&
1108       isValueAvailable(RHS)) {
1109     if (const auto *SI = dyn_cast<BinaryOperator>(RHS))
1110       if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1)))
1111         if ((SI->getOpcode() == Instruction::Shl) && (C->getZExtValue() < 4)) {
1112           unsigned RHSReg = getRegForValue(SI->getOperand(0));
1113           if (!RHSReg)
1114             return 0;
1115           bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
1116           return emitAddSub_rx(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
1117                                RHSIsKill, ExtendType, C->getZExtValue(),
1118                                SetFlags, WantResult);
1119         }
1120     unsigned RHSReg = getRegForValue(RHS);
1121     if (!RHSReg)
1122       return 0;
1123     bool RHSIsKill = hasTrivialKill(RHS);
1124     return emitAddSub_rx(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
1125                          ExtendType, 0, SetFlags, WantResult);
1126   }
1127
1128   // Check if the mul can be folded into the instruction.
1129   if (RHS->hasOneUse() && isValueAvailable(RHS))
1130     if (isMulPowOf2(RHS)) {
1131       const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1132       const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1133
1134       if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
1135         if (C->getValue().isPowerOf2())
1136           std::swap(MulLHS, MulRHS);
1137
1138       assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1139       uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1140       unsigned RHSReg = getRegForValue(MulLHS);
1141       if (!RHSReg)
1142         return 0;
1143       bool RHSIsKill = hasTrivialKill(MulLHS);
1144       return emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
1145                            AArch64_AM::LSL, ShiftVal, SetFlags, WantResult);
1146     }
1147
1148   // Check if the shift can be folded into the instruction.
1149   if (RHS->hasOneUse() && isValueAvailable(RHS))
1150     if (const auto *SI = dyn_cast<BinaryOperator>(RHS)) {
1151       if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1152         AArch64_AM::ShiftExtendType ShiftType = AArch64_AM::InvalidShiftExtend;
1153         switch (SI->getOpcode()) {
1154         default: break;
1155         case Instruction::Shl:  ShiftType = AArch64_AM::LSL; break;
1156         case Instruction::LShr: ShiftType = AArch64_AM::LSR; break;
1157         case Instruction::AShr: ShiftType = AArch64_AM::ASR; break;
1158         }
1159         uint64_t ShiftVal = C->getZExtValue();
1160         if (ShiftType != AArch64_AM::InvalidShiftExtend) {
1161           unsigned RHSReg = getRegForValue(SI->getOperand(0));
1162           if (!RHSReg)
1163             return 0;
1164           bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
1165           return emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
1166                                RHSIsKill, ShiftType, ShiftVal, SetFlags,
1167                                WantResult);
1168         }
1169       }
1170     }
1171
1172   unsigned RHSReg = getRegForValue(RHS);
1173   if (!RHSReg)
1174     return 0;
1175   bool RHSIsKill = hasTrivialKill(RHS);
1176
1177   if (NeedExtend)
1178     RHSReg = emitIntExt(SrcVT, RHSReg, RetVT, IsZExt);
1179
1180   return emitAddSub_rr(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
1181                        SetFlags, WantResult);
1182 }
1183
1184 unsigned AArch64FastISel::emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
1185                                         bool LHSIsKill, unsigned RHSReg,
1186                                         bool RHSIsKill, bool SetFlags,
1187                                         bool WantResult) {
1188   assert(LHSReg && RHSReg && "Invalid register number.");
1189
1190   if (RetVT != MVT::i32 && RetVT != MVT::i64)
1191     return 0;
1192
1193   static const unsigned OpcTable[2][2][2] = {
1194     { { AArch64::SUBWrr,  AArch64::SUBXrr  },
1195       { AArch64::ADDWrr,  AArch64::ADDXrr  }  },
1196     { { AArch64::SUBSWrr, AArch64::SUBSXrr },
1197       { AArch64::ADDSWrr, AArch64::ADDSXrr }  }
1198   };
1199   bool Is64Bit = RetVT == MVT::i64;
1200   unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1201   const TargetRegisterClass *RC =
1202       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1203   unsigned ResultReg;
1204   if (WantResult)
1205     ResultReg = createResultReg(RC);
1206   else
1207     ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1208
1209   const MCInstrDesc &II = TII.get(Opc);
1210   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1211   RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1212   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1213       .addReg(LHSReg, getKillRegState(LHSIsKill))
1214       .addReg(RHSReg, getKillRegState(RHSIsKill));
1215   return ResultReg;
1216 }
1217
1218 unsigned AArch64FastISel::emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
1219                                         bool LHSIsKill, uint64_t Imm,
1220                                         bool SetFlags, bool WantResult) {
1221   assert(LHSReg && "Invalid register number.");
1222
1223   if (RetVT != MVT::i32 && RetVT != MVT::i64)
1224     return 0;
1225
1226   unsigned ShiftImm;
1227   if (isUInt<12>(Imm))
1228     ShiftImm = 0;
1229   else if ((Imm & 0xfff000) == Imm) {
1230     ShiftImm = 12;
1231     Imm >>= 12;
1232   } else
1233     return 0;
1234
1235   static const unsigned OpcTable[2][2][2] = {
1236     { { AArch64::SUBWri,  AArch64::SUBXri  },
1237       { AArch64::ADDWri,  AArch64::ADDXri  }  },
1238     { { AArch64::SUBSWri, AArch64::SUBSXri },
1239       { AArch64::ADDSWri, AArch64::ADDSXri }  }
1240   };
1241   bool Is64Bit = RetVT == MVT::i64;
1242   unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1243   const TargetRegisterClass *RC;
1244   if (SetFlags)
1245     RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1246   else
1247     RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1248   unsigned ResultReg;
1249   if (WantResult)
1250     ResultReg = createResultReg(RC);
1251   else
1252     ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1253
1254   const MCInstrDesc &II = TII.get(Opc);
1255   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1256   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1257       .addReg(LHSReg, getKillRegState(LHSIsKill))
1258       .addImm(Imm)
1259       .addImm(getShifterImm(AArch64_AM::LSL, ShiftImm));
1260   return ResultReg;
1261 }
1262
1263 unsigned AArch64FastISel::emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
1264                                         bool LHSIsKill, unsigned RHSReg,
1265                                         bool RHSIsKill,
1266                                         AArch64_AM::ShiftExtendType ShiftType,
1267                                         uint64_t ShiftImm, bool SetFlags,
1268                                         bool WantResult) {
1269   assert(LHSReg && RHSReg && "Invalid register number.");
1270
1271   if (RetVT != MVT::i32 && RetVT != MVT::i64)
1272     return 0;
1273
1274   static const unsigned OpcTable[2][2][2] = {
1275     { { AArch64::SUBWrs,  AArch64::SUBXrs  },
1276       { AArch64::ADDWrs,  AArch64::ADDXrs  }  },
1277     { { AArch64::SUBSWrs, AArch64::SUBSXrs },
1278       { AArch64::ADDSWrs, AArch64::ADDSXrs }  }
1279   };
1280   bool Is64Bit = RetVT == MVT::i64;
1281   unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1282   const TargetRegisterClass *RC =
1283       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1284   unsigned ResultReg;
1285   if (WantResult)
1286     ResultReg = createResultReg(RC);
1287   else
1288     ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1289
1290   const MCInstrDesc &II = TII.get(Opc);
1291   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1292   RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1293   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1294       .addReg(LHSReg, getKillRegState(LHSIsKill))
1295       .addReg(RHSReg, getKillRegState(RHSIsKill))
1296       .addImm(getShifterImm(ShiftType, ShiftImm));
1297   return ResultReg;
1298 }
1299
1300 unsigned AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
1301                                         bool LHSIsKill, unsigned RHSReg,
1302                                         bool RHSIsKill,
1303                                         AArch64_AM::ShiftExtendType ExtType,
1304                                         uint64_t ShiftImm, bool SetFlags,
1305                                         bool WantResult) {
1306   assert(LHSReg && RHSReg && "Invalid register number.");
1307
1308   if (RetVT != MVT::i32 && RetVT != MVT::i64)
1309     return 0;
1310
1311   static const unsigned OpcTable[2][2][2] = {
1312     { { AArch64::SUBWrx,  AArch64::SUBXrx  },
1313       { AArch64::ADDWrx,  AArch64::ADDXrx  }  },
1314     { { AArch64::SUBSWrx, AArch64::SUBSXrx },
1315       { AArch64::ADDSWrx, AArch64::ADDSXrx }  }
1316   };
1317   bool Is64Bit = RetVT == MVT::i64;
1318   unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1319   const TargetRegisterClass *RC = nullptr;
1320   if (SetFlags)
1321     RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1322   else
1323     RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1324   unsigned ResultReg;
1325   if (WantResult)
1326     ResultReg = createResultReg(RC);
1327   else
1328     ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1329
1330   const MCInstrDesc &II = TII.get(Opc);
1331   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1332   RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1333   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1334       .addReg(LHSReg, getKillRegState(LHSIsKill))
1335       .addReg(RHSReg, getKillRegState(RHSIsKill))
1336       .addImm(getArithExtendImm(ExtType, ShiftImm));
1337   return ResultReg;
1338 }
1339
1340 bool AArch64FastISel::emitCmp(const Value *LHS, const Value *RHS, bool IsZExt) {
1341   Type *Ty = LHS->getType();
1342   EVT EVT = TLI.getValueType(Ty, true);
1343   if (!EVT.isSimple())
1344     return false;
1345   MVT VT = EVT.getSimpleVT();
1346
1347   switch (VT.SimpleTy) {
1348   default:
1349     return false;
1350   case MVT::i1:
1351   case MVT::i8:
1352   case MVT::i16:
1353   case MVT::i32:
1354   case MVT::i64:
1355     return emitICmp(VT, LHS, RHS, IsZExt);
1356   case MVT::f32:
1357   case MVT::f64:
1358     return emitFCmp(VT, LHS, RHS);
1359   }
1360 }
1361
1362 bool AArch64FastISel::emitICmp(MVT RetVT, const Value *LHS, const Value *RHS,
1363                                bool IsZExt) {
1364   return emitSub(RetVT, LHS, RHS, /*SetFlags=*/true, /*WantResult=*/false,
1365                  IsZExt) != 0;
1366 }
1367
1368 bool AArch64FastISel::emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
1369                                   uint64_t Imm) {
1370   return emitAddSub_ri(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, Imm,
1371                        /*SetFlags=*/true, /*WantResult=*/false) != 0;
1372 }
1373
1374 bool AArch64FastISel::emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS) {
1375   if (RetVT != MVT::f32 && RetVT != MVT::f64)
1376     return false;
1377
1378   // Check to see if the 2nd operand is a constant that we can encode directly
1379   // in the compare.
1380   bool UseImm = false;
1381   if (const auto *CFP = dyn_cast<ConstantFP>(RHS))
1382     if (CFP->isZero() && !CFP->isNegative())
1383       UseImm = true;
1384
1385   unsigned LHSReg = getRegForValue(LHS);
1386   if (!LHSReg)
1387     return false;
1388   bool LHSIsKill = hasTrivialKill(LHS);
1389
1390   if (UseImm) {
1391     unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDri : AArch64::FCMPSri;
1392     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
1393         .addReg(LHSReg, getKillRegState(LHSIsKill));
1394     return true;
1395   }
1396
1397   unsigned RHSReg = getRegForValue(RHS);
1398   if (!RHSReg)
1399     return false;
1400   bool RHSIsKill = hasTrivialKill(RHS);
1401
1402   unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDrr : AArch64::FCMPSrr;
1403   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
1404       .addReg(LHSReg, getKillRegState(LHSIsKill))
1405       .addReg(RHSReg, getKillRegState(RHSIsKill));
1406   return true;
1407 }
1408
1409 unsigned AArch64FastISel::emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
1410                                   bool SetFlags, bool WantResult, bool IsZExt) {
1411   return emitAddSub(/*UseAdd=*/true, RetVT, LHS, RHS, SetFlags, WantResult,
1412                     IsZExt);
1413 }
1414
1415 unsigned AArch64FastISel::emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
1416                                   bool SetFlags, bool WantResult, bool IsZExt) {
1417   return emitAddSub(/*UseAdd=*/false, RetVT, LHS, RHS, SetFlags, WantResult,
1418                     IsZExt);
1419 }
1420
1421 unsigned AArch64FastISel::emitSubs_rr(MVT RetVT, unsigned LHSReg,
1422                                       bool LHSIsKill, unsigned RHSReg,
1423                                       bool RHSIsKill, bool WantResult) {
1424   return emitAddSub_rr(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, RHSReg,
1425                        RHSIsKill, /*SetFlags=*/true, WantResult);
1426 }
1427
1428 unsigned AArch64FastISel::emitSubs_rs(MVT RetVT, unsigned LHSReg,
1429                                       bool LHSIsKill, unsigned RHSReg,
1430                                       bool RHSIsKill,
1431                                       AArch64_AM::ShiftExtendType ShiftType,
1432                                       uint64_t ShiftImm, bool WantResult) {
1433   return emitAddSub_rs(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, RHSReg,
1434                        RHSIsKill, ShiftType, ShiftImm, /*SetFlags=*/true,
1435                        WantResult);
1436 }
1437
1438 unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT,
1439                                         const Value *LHS, const Value *RHS) {
1440   // Canonicalize immediates to the RHS first.
1441   if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS))
1442     std::swap(LHS, RHS);
1443
1444   // Canonicalize mul by power-of-2 to the RHS.
1445   if (LHS->hasOneUse() && isValueAvailable(LHS))
1446     if (isMulPowOf2(LHS))
1447       std::swap(LHS, RHS);
1448
1449   // Canonicalize shift immediate to the RHS.
1450   if (LHS->hasOneUse() && isValueAvailable(LHS))
1451     if (const auto *SI = dyn_cast<ShlOperator>(LHS))
1452       if (isa<ConstantInt>(SI->getOperand(1)))
1453         std::swap(LHS, RHS);
1454
1455   unsigned LHSReg = getRegForValue(LHS);
1456   if (!LHSReg)
1457     return 0;
1458   bool LHSIsKill = hasTrivialKill(LHS);
1459
1460   unsigned ResultReg = 0;
1461   if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1462     uint64_t Imm = C->getZExtValue();
1463     ResultReg = emitLogicalOp_ri(ISDOpc, RetVT, LHSReg, LHSIsKill, Imm);
1464   }
1465   if (ResultReg)
1466     return ResultReg;
1467
1468   // Check if the mul can be folded into the instruction.
1469   if (RHS->hasOneUse() && isValueAvailable(RHS))
1470     if (isMulPowOf2(RHS)) {
1471       const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1472       const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1473
1474       if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
1475         if (C->getValue().isPowerOf2())
1476           std::swap(MulLHS, MulRHS);
1477
1478       assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1479       uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1480
1481       unsigned RHSReg = getRegForValue(MulLHS);
1482       if (!RHSReg)
1483         return 0;
1484       bool RHSIsKill = hasTrivialKill(MulLHS);
1485       return emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg,
1486                               RHSIsKill, ShiftVal);
1487     }
1488
1489   // Check if the shift can be folded into the instruction.
1490   if (RHS->hasOneUse() && isValueAvailable(RHS))
1491     if (const auto *SI = dyn_cast<ShlOperator>(RHS))
1492       if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1493         uint64_t ShiftVal = C->getZExtValue();
1494         unsigned RHSReg = getRegForValue(SI->getOperand(0));
1495         if (!RHSReg)
1496           return 0;
1497         bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
1498         return emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg,
1499                                 RHSIsKill, ShiftVal);
1500       }
1501
1502   unsigned RHSReg = getRegForValue(RHS);
1503   if (!RHSReg)
1504     return 0;
1505   bool RHSIsKill = hasTrivialKill(RHS);
1506
1507   MVT VT = std::max(MVT::i32, RetVT.SimpleTy);
1508   ResultReg = fastEmit_rr(VT, VT, ISDOpc, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
1509   if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1510     uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1511     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
1512   }
1513   return ResultReg;
1514 }
1515
1516 unsigned AArch64FastISel::emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT,
1517                                            unsigned LHSReg, bool LHSIsKill,
1518                                            uint64_t Imm) {
1519   assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR) &&
1520          "ISD nodes are not consecutive!");
1521   static const unsigned OpcTable[3][2] = {
1522     { AArch64::ANDWri, AArch64::ANDXri },
1523     { AArch64::ORRWri, AArch64::ORRXri },
1524     { AArch64::EORWri, AArch64::EORXri }
1525   };
1526   const TargetRegisterClass *RC;
1527   unsigned Opc;
1528   unsigned RegSize;
1529   switch (RetVT.SimpleTy) {
1530   default:
1531     return 0;
1532   case MVT::i1:
1533   case MVT::i8:
1534   case MVT::i16:
1535   case MVT::i32: {
1536     unsigned Idx = ISDOpc - ISD::AND;
1537     Opc = OpcTable[Idx][0];
1538     RC = &AArch64::GPR32spRegClass;
1539     RegSize = 32;
1540     break;
1541   }
1542   case MVT::i64:
1543     Opc = OpcTable[ISDOpc - ISD::AND][1];
1544     RC = &AArch64::GPR64spRegClass;
1545     RegSize = 64;
1546     break;
1547   }
1548
1549   if (!AArch64_AM::isLogicalImmediate(Imm, RegSize))
1550     return 0;
1551
1552   unsigned ResultReg =
1553       fastEmitInst_ri(Opc, RC, LHSReg, LHSIsKill,
1554                       AArch64_AM::encodeLogicalImmediate(Imm, RegSize));
1555   if (RetVT >= MVT::i8 && RetVT <= MVT::i16 && ISDOpc != ISD::AND) {
1556     uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1557     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
1558   }
1559   return ResultReg;
1560 }
1561
1562 unsigned AArch64FastISel::emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT,
1563                                            unsigned LHSReg, bool LHSIsKill,
1564                                            unsigned RHSReg, bool RHSIsKill,
1565                                            uint64_t ShiftImm) {
1566   assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR) &&
1567          "ISD nodes are not consecutive!");
1568   static const unsigned OpcTable[3][2] = {
1569     { AArch64::ANDWrs, AArch64::ANDXrs },
1570     { AArch64::ORRWrs, AArch64::ORRXrs },
1571     { AArch64::EORWrs, AArch64::EORXrs }
1572   };
1573   const TargetRegisterClass *RC;
1574   unsigned Opc;
1575   switch (RetVT.SimpleTy) {
1576   default:
1577     return 0;
1578   case MVT::i1:
1579   case MVT::i8:
1580   case MVT::i16:
1581   case MVT::i32:
1582     Opc = OpcTable[ISDOpc - ISD::AND][0];
1583     RC = &AArch64::GPR32RegClass;
1584     break;
1585   case MVT::i64:
1586     Opc = OpcTable[ISDOpc - ISD::AND][1];
1587     RC = &AArch64::GPR64RegClass;
1588     break;
1589   }
1590   unsigned ResultReg =
1591       fastEmitInst_rri(Opc, RC, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
1592                        AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftImm));
1593   if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1594     uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1595     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
1596   }
1597   return ResultReg;
1598 }
1599
1600 unsigned AArch64FastISel::emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
1601                                      uint64_t Imm) {
1602   return emitLogicalOp_ri(ISD::AND, RetVT, LHSReg, LHSIsKill, Imm);
1603 }
1604
1605 bool AArch64FastISel::emitLoad(MVT VT, MVT RetVT, unsigned &ResultReg,
1606                                Address Addr, bool WantZExt,
1607                                MachineMemOperand *MMO) {
1608   // Simplify this down to something we can handle.
1609   if (!simplifyAddress(Addr, VT))
1610     return false;
1611
1612   unsigned ScaleFactor = getImplicitScaleFactor(VT);
1613   if (!ScaleFactor)
1614     llvm_unreachable("Unexpected value type.");
1615
1616   // Negative offsets require unscaled, 9-bit, signed immediate offsets.
1617   // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
1618   bool UseScaled = true;
1619   if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
1620     UseScaled = false;
1621     ScaleFactor = 1;
1622   }
1623
1624   static const unsigned GPOpcTable[2][8][4] = {
1625     // Sign-extend.
1626     { { AArch64::LDURSBWi,  AArch64::LDURSHWi,  AArch64::LDURWi,
1627         AArch64::LDURXi  },
1628       { AArch64::LDURSBXi,  AArch64::LDURSHXi,  AArch64::LDURSWi,
1629         AArch64::LDURXi  },
1630       { AArch64::LDRSBWui,  AArch64::LDRSHWui,  AArch64::LDRWui,
1631         AArch64::LDRXui  },
1632       { AArch64::LDRSBXui,  AArch64::LDRSHXui,  AArch64::LDRSWui,
1633         AArch64::LDRXui  },
1634       { AArch64::LDRSBWroX, AArch64::LDRSHWroX, AArch64::LDRWroX,
1635         AArch64::LDRXroX },
1636       { AArch64::LDRSBXroX, AArch64::LDRSHXroX, AArch64::LDRSWroX,
1637         AArch64::LDRXroX },
1638       { AArch64::LDRSBWroW, AArch64::LDRSHWroW, AArch64::LDRWroW,
1639         AArch64::LDRXroW },
1640       { AArch64::LDRSBXroW, AArch64::LDRSHXroW, AArch64::LDRSWroW,
1641         AArch64::LDRXroW }
1642     },
1643     // Zero-extend.
1644     { { AArch64::LDURBBi,   AArch64::LDURHHi,   AArch64::LDURWi,
1645         AArch64::LDURXi  },
1646       { AArch64::LDURBBi,   AArch64::LDURHHi,   AArch64::LDURWi,
1647         AArch64::LDURXi  },
1648       { AArch64::LDRBBui,   AArch64::LDRHHui,   AArch64::LDRWui,
1649         AArch64::LDRXui  },
1650       { AArch64::LDRBBui,   AArch64::LDRHHui,   AArch64::LDRWui,
1651         AArch64::LDRXui  },
1652       { AArch64::LDRBBroX,  AArch64::LDRHHroX,  AArch64::LDRWroX,
1653         AArch64::LDRXroX },
1654       { AArch64::LDRBBroX,  AArch64::LDRHHroX,  AArch64::LDRWroX,
1655         AArch64::LDRXroX },
1656       { AArch64::LDRBBroW,  AArch64::LDRHHroW,  AArch64::LDRWroW,
1657         AArch64::LDRXroW },
1658       { AArch64::LDRBBroW,  AArch64::LDRHHroW,  AArch64::LDRWroW,
1659         AArch64::LDRXroW }
1660     }
1661   };
1662
1663   static const unsigned FPOpcTable[4][2] = {
1664     { AArch64::LDURSi,  AArch64::LDURDi  },
1665     { AArch64::LDRSui,  AArch64::LDRDui  },
1666     { AArch64::LDRSroX, AArch64::LDRDroX },
1667     { AArch64::LDRSroW, AArch64::LDRDroW }
1668   };
1669
1670   unsigned Opc;
1671   const TargetRegisterClass *RC;
1672   bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
1673                       Addr.getOffsetReg();
1674   unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
1675   if (Addr.getExtendType() == AArch64_AM::UXTW ||
1676       Addr.getExtendType() == AArch64_AM::SXTW)
1677     Idx++;
1678
1679   bool IsRet64Bit = RetVT == MVT::i64;
1680   switch (VT.SimpleTy) {
1681   default:
1682     llvm_unreachable("Unexpected value type.");
1683   case MVT::i1: // Intentional fall-through.
1684   case MVT::i8:
1685     Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][0];
1686     RC = (IsRet64Bit && !WantZExt) ?
1687              &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1688     break;
1689   case MVT::i16:
1690     Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][1];
1691     RC = (IsRet64Bit && !WantZExt) ?
1692              &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1693     break;
1694   case MVT::i32:
1695     Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][2];
1696     RC = (IsRet64Bit && !WantZExt) ?
1697              &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1698     break;
1699   case MVT::i64:
1700     Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][3];
1701     RC = &AArch64::GPR64RegClass;
1702     break;
1703   case MVT::f32:
1704     Opc = FPOpcTable[Idx][0];
1705     RC = &AArch64::FPR32RegClass;
1706     break;
1707   case MVT::f64:
1708     Opc = FPOpcTable[Idx][1];
1709     RC = &AArch64::FPR64RegClass;
1710     break;
1711   }
1712
1713   // Create the base instruction, then add the operands.
1714   ResultReg = createResultReg(RC);
1715   MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1716                                     TII.get(Opc), ResultReg);
1717   addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, ScaleFactor, MMO);
1718
1719   // For zero-extending loads to 64bit we emit a 32bit load and then convert
1720   // the w-reg to an x-reg. In the end this is just an noop and will be removed.
1721   if (WantZExt && RetVT == MVT::i64 && VT <= MVT::i32) {
1722     unsigned Reg64 = createResultReg(&AArch64::GPR64RegClass);
1723     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1724             TII.get(AArch64::SUBREG_TO_REG), Reg64)
1725         .addImm(0)
1726         .addReg(ResultReg, getKillRegState(true))
1727         .addImm(AArch64::sub_32);
1728     ResultReg = Reg64;
1729   }
1730
1731   // Loading an i1 requires special handling.
1732   if (VT == MVT::i1) {
1733     unsigned ANDReg = emitAnd_ri(IsRet64Bit ? MVT::i64 : MVT::i32, ResultReg,
1734                                  /*IsKill=*/true, 1);
1735     assert(ANDReg && "Unexpected AND instruction emission failure.");
1736     ResultReg = ANDReg;
1737   }
1738   return true;
1739 }
1740
1741 bool AArch64FastISel::selectAddSub(const Instruction *I) {
1742   MVT VT;
1743   if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
1744     return false;
1745
1746   if (VT.isVector())
1747     return selectOperator(I, I->getOpcode());
1748
1749   unsigned ResultReg;
1750   switch (I->getOpcode()) {
1751   default:
1752     llvm_unreachable("Unexpected instruction.");
1753   case Instruction::Add:
1754     ResultReg = emitAdd(VT, I->getOperand(0), I->getOperand(1));
1755     break;
1756   case Instruction::Sub:
1757     ResultReg = emitSub(VT, I->getOperand(0), I->getOperand(1));
1758     break;
1759   }
1760   if (!ResultReg)
1761     return false;
1762
1763   updateValueMap(I, ResultReg);
1764   return true;
1765 }
1766
1767 bool AArch64FastISel::selectLogicalOp(const Instruction *I) {
1768   MVT VT;
1769   if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
1770     return false;
1771
1772   if (VT.isVector())
1773     return selectOperator(I, I->getOpcode());
1774
1775   unsigned ResultReg;
1776   switch (I->getOpcode()) {
1777   default:
1778     llvm_unreachable("Unexpected instruction.");
1779   case Instruction::And:
1780     ResultReg = emitLogicalOp(ISD::AND, VT, I->getOperand(0), I->getOperand(1));
1781     break;
1782   case Instruction::Or:
1783     ResultReg = emitLogicalOp(ISD::OR, VT, I->getOperand(0), I->getOperand(1));
1784     break;
1785   case Instruction::Xor:
1786     ResultReg = emitLogicalOp(ISD::XOR, VT, I->getOperand(0), I->getOperand(1));
1787     break;
1788   }
1789   if (!ResultReg)
1790     return false;
1791
1792   updateValueMap(I, ResultReg);
1793   return true;
1794 }
1795
1796 bool AArch64FastISel::selectLoad(const Instruction *I) {
1797   MVT VT;
1798   // Verify we have a legal type before going any further.  Currently, we handle
1799   // simple types that will directly fit in a register (i32/f32/i64/f64) or
1800   // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
1801   if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true) ||
1802       cast<LoadInst>(I)->isAtomic())
1803     return false;
1804
1805   // See if we can handle this address.
1806   Address Addr;
1807   if (!computeAddress(I->getOperand(0), Addr, I->getType()))
1808     return false;
1809
1810   bool WantZExt = true;
1811   MVT RetVT = VT;
1812   if (I->hasOneUse()) {
1813     if (const auto *ZE = dyn_cast<ZExtInst>(I->use_begin()->getUser())) {
1814       if (!isTypeSupported(ZE->getType(), RetVT, /*IsVectorAllowed=*/false))
1815         RetVT = VT;
1816     } else if (const auto *SE = dyn_cast<SExtInst>(I->use_begin()->getUser())) {
1817       if (!isTypeSupported(SE->getType(), RetVT, /*IsVectorAllowed=*/false))
1818         RetVT = VT;
1819       WantZExt = false;
1820     }
1821   }
1822
1823   unsigned ResultReg;
1824   if (!emitLoad(VT, RetVT, ResultReg, Addr, WantZExt,
1825                 createMachineMemOperandFor(I)))
1826     return false;
1827
1828   updateValueMap(I, ResultReg);
1829   return true;
1830 }
1831
1832 bool AArch64FastISel::emitStore(MVT VT, unsigned SrcReg, Address Addr,
1833                                 MachineMemOperand *MMO) {
1834   // Simplify this down to something we can handle.
1835   if (!simplifyAddress(Addr, VT))
1836     return false;
1837
1838   unsigned ScaleFactor = getImplicitScaleFactor(VT);
1839   if (!ScaleFactor)
1840     llvm_unreachable("Unexpected value type.");
1841
1842   // Negative offsets require unscaled, 9-bit, signed immediate offsets.
1843   // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
1844   bool UseScaled = true;
1845   if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
1846     UseScaled = false;
1847     ScaleFactor = 1;
1848   }
1849
1850   static const unsigned OpcTable[4][6] = {
1851     { AArch64::STURBBi,  AArch64::STURHHi,  AArch64::STURWi,  AArch64::STURXi,
1852       AArch64::STURSi,   AArch64::STURDi },
1853     { AArch64::STRBBui,  AArch64::STRHHui,  AArch64::STRWui,  AArch64::STRXui,
1854       AArch64::STRSui,   AArch64::STRDui },
1855     { AArch64::STRBBroX, AArch64::STRHHroX, AArch64::STRWroX, AArch64::STRXroX,
1856       AArch64::STRSroX,  AArch64::STRDroX },
1857     { AArch64::STRBBroW, AArch64::STRHHroW, AArch64::STRWroW, AArch64::STRXroW,
1858       AArch64::STRSroW,  AArch64::STRDroW }
1859   };
1860
1861   unsigned Opc;
1862   bool VTIsi1 = false;
1863   bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
1864                       Addr.getOffsetReg();
1865   unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
1866   if (Addr.getExtendType() == AArch64_AM::UXTW ||
1867       Addr.getExtendType() == AArch64_AM::SXTW)
1868     Idx++;
1869
1870   switch (VT.SimpleTy) {
1871   default: llvm_unreachable("Unexpected value type.");
1872   case MVT::i1:  VTIsi1 = true;
1873   case MVT::i8:  Opc = OpcTable[Idx][0]; break;
1874   case MVT::i16: Opc = OpcTable[Idx][1]; break;
1875   case MVT::i32: Opc = OpcTable[Idx][2]; break;
1876   case MVT::i64: Opc = OpcTable[Idx][3]; break;
1877   case MVT::f32: Opc = OpcTable[Idx][4]; break;
1878   case MVT::f64: Opc = OpcTable[Idx][5]; break;
1879   }
1880
1881   // Storing an i1 requires special handling.
1882   if (VTIsi1 && SrcReg != AArch64::WZR) {
1883     unsigned ANDReg = emitAnd_ri(MVT::i32, SrcReg, /*TODO:IsKill=*/false, 1);
1884     assert(ANDReg && "Unexpected AND instruction emission failure.");
1885     SrcReg = ANDReg;
1886   }
1887   // Create the base instruction, then add the operands.
1888   const MCInstrDesc &II = TII.get(Opc);
1889   SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs());
1890   MachineInstrBuilder MIB =
1891       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(SrcReg);
1892   addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, ScaleFactor, MMO);
1893
1894   return true;
1895 }
1896
1897 bool AArch64FastISel::selectStore(const Instruction *I) {
1898   MVT VT;
1899   const Value *Op0 = I->getOperand(0);
1900   // Verify we have a legal type before going any further.  Currently, we handle
1901   // simple types that will directly fit in a register (i32/f32/i64/f64) or
1902   // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
1903   if (!isTypeSupported(Op0->getType(), VT, /*IsVectorAllowed=*/true) ||
1904       cast<StoreInst>(I)->isAtomic())
1905     return false;
1906
1907   // Get the value to be stored into a register. Use the zero register directly
1908   // when possible to avoid an unnecessary copy and a wasted register.
1909   unsigned SrcReg = 0;
1910   if (const auto *CI = dyn_cast<ConstantInt>(Op0)) {
1911     if (CI->isZero())
1912       SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
1913   } else if (const auto *CF = dyn_cast<ConstantFP>(Op0)) {
1914     if (CF->isZero() && !CF->isNegative()) {
1915       VT = MVT::getIntegerVT(VT.getSizeInBits());
1916       SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
1917     }
1918   }
1919
1920   if (!SrcReg)
1921     SrcReg = getRegForValue(Op0);
1922
1923   if (!SrcReg)
1924     return false;
1925
1926   // See if we can handle this address.
1927   Address Addr;
1928   if (!computeAddress(I->getOperand(1), Addr, I->getOperand(0)->getType()))
1929     return false;
1930
1931   if (!emitStore(VT, SrcReg, Addr, createMachineMemOperandFor(I)))
1932     return false;
1933   return true;
1934 }
1935
1936 static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred) {
1937   switch (Pred) {
1938   case CmpInst::FCMP_ONE:
1939   case CmpInst::FCMP_UEQ:
1940   default:
1941     // AL is our "false" for now. The other two need more compares.
1942     return AArch64CC::AL;
1943   case CmpInst::ICMP_EQ:
1944   case CmpInst::FCMP_OEQ:
1945     return AArch64CC::EQ;
1946   case CmpInst::ICMP_SGT:
1947   case CmpInst::FCMP_OGT:
1948     return AArch64CC::GT;
1949   case CmpInst::ICMP_SGE:
1950   case CmpInst::FCMP_OGE:
1951     return AArch64CC::GE;
1952   case CmpInst::ICMP_UGT:
1953   case CmpInst::FCMP_UGT:
1954     return AArch64CC::HI;
1955   case CmpInst::FCMP_OLT:
1956     return AArch64CC::MI;
1957   case CmpInst::ICMP_ULE:
1958   case CmpInst::FCMP_OLE:
1959     return AArch64CC::LS;
1960   case CmpInst::FCMP_ORD:
1961     return AArch64CC::VC;
1962   case CmpInst::FCMP_UNO:
1963     return AArch64CC::VS;
1964   case CmpInst::FCMP_UGE:
1965     return AArch64CC::PL;
1966   case CmpInst::ICMP_SLT:
1967   case CmpInst::FCMP_ULT:
1968     return AArch64CC::LT;
1969   case CmpInst::ICMP_SLE:
1970   case CmpInst::FCMP_ULE:
1971     return AArch64CC::LE;
1972   case CmpInst::FCMP_UNE:
1973   case CmpInst::ICMP_NE:
1974     return AArch64CC::NE;
1975   case CmpInst::ICMP_UGE:
1976     return AArch64CC::HS;
1977   case CmpInst::ICMP_ULT:
1978     return AArch64CC::LO;
1979   }
1980 }
1981
1982 /// \brief Try to emit a combined compare-and-branch instruction.
1983 bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) {
1984   assert(isa<CmpInst>(BI->getCondition()) && "Expected cmp instruction");
1985   const CmpInst *CI = cast<CmpInst>(BI->getCondition());
1986   CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
1987
1988   const Value *LHS = CI->getOperand(0);
1989   const Value *RHS = CI->getOperand(1);
1990
1991   Type *Ty = LHS->getType();
1992     if (!Ty->isIntegerTy())
1993       return false;
1994
1995   unsigned BW = cast<IntegerType>(Ty)->getBitWidth();
1996   if (BW != 1 && BW != 8 && BW != 16 && BW != 32 && BW != 64)
1997     return false;
1998
1999   MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
2000   MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
2001
2002   // Try to take advantage of fallthrough opportunities.
2003   if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2004     std::swap(TBB, FBB);
2005     Predicate = CmpInst::getInversePredicate(Predicate);
2006   }
2007
2008   int TestBit = -1;
2009   bool IsCmpNE;
2010   if ((Predicate == CmpInst::ICMP_EQ) || (Predicate == CmpInst::ICMP_NE)) {
2011     if (const auto *C = dyn_cast<ConstantInt>(LHS))
2012       if (C->isNullValue())
2013         std::swap(LHS, RHS);
2014
2015     if (!isa<ConstantInt>(RHS))
2016       return false;
2017
2018     if (!cast<ConstantInt>(RHS)->isNullValue())
2019       return false;
2020
2021     if (const auto *AI = dyn_cast<BinaryOperator>(LHS))
2022       if (AI->getOpcode() == Instruction::And) {
2023         const Value *AndLHS = AI->getOperand(0);
2024         const Value *AndRHS = AI->getOperand(1);
2025
2026         if (const auto *C = dyn_cast<ConstantInt>(AndLHS))
2027           if (C->getValue().isPowerOf2())
2028             std::swap(AndLHS, AndRHS);
2029
2030         if (const auto *C = dyn_cast<ConstantInt>(AndRHS))
2031           if (C->getValue().isPowerOf2()) {
2032             TestBit = C->getValue().logBase2();
2033             LHS = AndLHS;
2034           }
2035       }
2036     IsCmpNE = Predicate == CmpInst::ICMP_NE;
2037   } else if (Predicate == CmpInst::ICMP_SLT) {
2038     if (!isa<ConstantInt>(RHS))
2039       return false;
2040
2041     if (!cast<ConstantInt>(RHS)->isNullValue())
2042       return false;
2043
2044     TestBit = BW - 1;
2045     IsCmpNE = true;
2046   } else if (Predicate == CmpInst::ICMP_SGT) {
2047     if (!isa<ConstantInt>(RHS))
2048       return false;
2049
2050     if (cast<ConstantInt>(RHS)->getValue() != -1)
2051       return false;
2052
2053     TestBit = BW - 1;
2054     IsCmpNE = false;
2055   } else
2056     return false;
2057
2058   static const unsigned OpcTable[2][2][2] = {
2059     { {AArch64::CBZW,  AArch64::CBZX },
2060       {AArch64::CBNZW, AArch64::CBNZX} },
2061     { {AArch64::TBZW,  AArch64::TBZX },
2062       {AArch64::TBNZW, AArch64::TBNZX} }
2063   };
2064
2065   bool IsBitTest = TestBit != -1;
2066   bool Is64Bit = BW == 64;
2067   if (TestBit < 32 && TestBit >= 0)
2068     Is64Bit = false;
2069
2070   unsigned Opc = OpcTable[IsBitTest][IsCmpNE][Is64Bit];
2071   const MCInstrDesc &II = TII.get(Opc);
2072
2073   unsigned SrcReg = getRegForValue(LHS);
2074   if (!SrcReg)
2075     return false;
2076   bool SrcIsKill = hasTrivialKill(LHS);
2077
2078   if (BW == 64 && !Is64Bit) {
2079     SrcReg = fastEmitInst_extractsubreg(MVT::i32, SrcReg, SrcIsKill,
2080                                         AArch64::sub_32);
2081     SrcReg = constrainOperandRegClass(II, SrcReg,  II.getNumDefs());
2082   }
2083
2084   // Emit the combined compare and branch instruction.
2085   MachineInstrBuilder MIB =
2086       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
2087           .addReg(SrcReg, getKillRegState(SrcIsKill));
2088   if (IsBitTest)
2089     MIB.addImm(TestBit);
2090   MIB.addMBB(TBB);
2091
2092   // Obtain the branch weight and add the TrueBB to the successor list.
2093   uint32_t BranchWeight = 0;
2094   if (FuncInfo.BPI)
2095     BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
2096                                                TBB->getBasicBlock());
2097   FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
2098   fastEmitBranch(FBB, DbgLoc);
2099
2100   return true;
2101 }
2102
2103 bool AArch64FastISel::selectBranch(const Instruction *I) {
2104   const BranchInst *BI = cast<BranchInst>(I);
2105   if (BI->isUnconditional()) {
2106     MachineBasicBlock *MSucc = FuncInfo.MBBMap[BI->getSuccessor(0)];
2107     fastEmitBranch(MSucc, BI->getDebugLoc());
2108     return true;
2109   }
2110
2111   MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
2112   MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
2113
2114   AArch64CC::CondCode CC = AArch64CC::NE;
2115   if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
2116     if (CI->hasOneUse() && isValueAvailable(CI)) {
2117       // Try to optimize or fold the cmp.
2118       CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2119       switch (Predicate) {
2120       default:
2121         break;
2122       case CmpInst::FCMP_FALSE:
2123         fastEmitBranch(FBB, DbgLoc);
2124         return true;
2125       case CmpInst::FCMP_TRUE:
2126         fastEmitBranch(TBB, DbgLoc);
2127         return true;
2128       }
2129
2130       // Try to emit a combined compare-and-branch first.
2131       if (emitCompareAndBranch(BI))
2132         return true;
2133
2134       // Try to take advantage of fallthrough opportunities.
2135       if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2136         std::swap(TBB, FBB);
2137         Predicate = CmpInst::getInversePredicate(Predicate);
2138       }
2139
2140       // Emit the cmp.
2141       if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
2142         return false;
2143
2144       // FCMP_UEQ and FCMP_ONE cannot be checked with a single branch
2145       // instruction.
2146       CC = getCompareCC(Predicate);
2147       AArch64CC::CondCode ExtraCC = AArch64CC::AL;
2148       switch (Predicate) {
2149       default:
2150         break;
2151       case CmpInst::FCMP_UEQ:
2152         ExtraCC = AArch64CC::EQ;
2153         CC = AArch64CC::VS;
2154         break;
2155       case CmpInst::FCMP_ONE:
2156         ExtraCC = AArch64CC::MI;
2157         CC = AArch64CC::GT;
2158         break;
2159       }
2160       assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2161
2162       // Emit the extra branch for FCMP_UEQ and FCMP_ONE.
2163       if (ExtraCC != AArch64CC::AL) {
2164         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2165             .addImm(ExtraCC)
2166             .addMBB(TBB);
2167       }
2168
2169       // Emit the branch.
2170       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2171           .addImm(CC)
2172           .addMBB(TBB);
2173
2174       // Obtain the branch weight and add the TrueBB to the successor list.
2175       uint32_t BranchWeight = 0;
2176       if (FuncInfo.BPI)
2177         BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
2178                                                   TBB->getBasicBlock());
2179       FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
2180
2181       fastEmitBranch(FBB, DbgLoc);
2182       return true;
2183     }
2184   } else if (TruncInst *TI = dyn_cast<TruncInst>(BI->getCondition())) {
2185     MVT SrcVT;
2186     if (TI->hasOneUse() && isValueAvailable(TI) &&
2187         isTypeSupported(TI->getOperand(0)->getType(), SrcVT)) {
2188       unsigned CondReg = getRegForValue(TI->getOperand(0));
2189       if (!CondReg)
2190         return false;
2191       bool CondIsKill = hasTrivialKill(TI->getOperand(0));
2192
2193       // Issue an extract_subreg to get the lower 32-bits.
2194       if (SrcVT == MVT::i64) {
2195         CondReg = fastEmitInst_extractsubreg(MVT::i32, CondReg, CondIsKill,
2196                                              AArch64::sub_32);
2197         CondIsKill = true;
2198       }
2199
2200       unsigned ANDReg = emitAnd_ri(MVT::i32, CondReg, CondIsKill, 1);
2201       assert(ANDReg && "Unexpected AND instruction emission failure.");
2202       emitICmp_ri(MVT::i32, ANDReg, /*IsKill=*/true, 0);
2203
2204       if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2205         std::swap(TBB, FBB);
2206         CC = AArch64CC::EQ;
2207       }
2208       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2209           .addImm(CC)
2210           .addMBB(TBB);
2211
2212       // Obtain the branch weight and add the TrueBB to the successor list.
2213       uint32_t BranchWeight = 0;
2214       if (FuncInfo.BPI)
2215         BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
2216                                                   TBB->getBasicBlock());
2217       FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
2218
2219       fastEmitBranch(FBB, DbgLoc);
2220       return true;
2221     }
2222   } else if (const auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) {
2223     uint64_t Imm = CI->getZExtValue();
2224     MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
2225     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::B))
2226         .addMBB(Target);
2227
2228     // Obtain the branch weight and add the target to the successor list.
2229     uint32_t BranchWeight = 0;
2230     if (FuncInfo.BPI)
2231       BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
2232                                                  Target->getBasicBlock());
2233     FuncInfo.MBB->addSuccessor(Target, BranchWeight);
2234     return true;
2235   } else if (foldXALUIntrinsic(CC, I, BI->getCondition())) {
2236     // Fake request the condition, otherwise the intrinsic might be completely
2237     // optimized away.
2238     unsigned CondReg = getRegForValue(BI->getCondition());
2239     if (!CondReg)
2240       return false;
2241
2242     // Emit the branch.
2243     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2244       .addImm(CC)
2245       .addMBB(TBB);
2246
2247     // Obtain the branch weight and add the TrueBB to the successor list.
2248     uint32_t BranchWeight = 0;
2249     if (FuncInfo.BPI)
2250       BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
2251                                                  TBB->getBasicBlock());
2252     FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
2253
2254     fastEmitBranch(FBB, DbgLoc);
2255     return true;
2256   }
2257
2258   unsigned CondReg = getRegForValue(BI->getCondition());
2259   if (CondReg == 0)
2260     return false;
2261   bool CondRegIsKill = hasTrivialKill(BI->getCondition());
2262
2263   // We've been divorced from our compare!  Our block was split, and
2264   // now our compare lives in a predecessor block.  We musn't
2265   // re-compare here, as the children of the compare aren't guaranteed
2266   // live across the block boundary (we *could* check for this).
2267   // Regardless, the compare has been done in the predecessor block,
2268   // and it left a value for us in a virtual register.  Ergo, we test
2269   // the one-bit value left in the virtual register.
2270   emitICmp_ri(MVT::i32, CondReg, CondRegIsKill, 0);
2271
2272   if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2273     std::swap(TBB, FBB);
2274     CC = AArch64CC::EQ;
2275   }
2276
2277   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2278       .addImm(CC)
2279       .addMBB(TBB);
2280
2281   // Obtain the branch weight and add the TrueBB to the successor list.
2282   uint32_t BranchWeight = 0;
2283   if (FuncInfo.BPI)
2284     BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
2285                                                TBB->getBasicBlock());
2286   FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
2287
2288   fastEmitBranch(FBB, DbgLoc);
2289   return true;
2290 }
2291
2292 bool AArch64FastISel::selectIndirectBr(const Instruction *I) {
2293   const IndirectBrInst *BI = cast<IndirectBrInst>(I);
2294   unsigned AddrReg = getRegForValue(BI->getOperand(0));
2295   if (AddrReg == 0)
2296     return false;
2297
2298   // Emit the indirect branch.
2299   const MCInstrDesc &II = TII.get(AArch64::BR);
2300   AddrReg = constrainOperandRegClass(II, AddrReg,  II.getNumDefs());
2301   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(AddrReg);
2302
2303   // Make sure the CFG is up-to-date.
2304   for (unsigned i = 0, e = BI->getNumSuccessors(); i != e; ++i)
2305     FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[BI->getSuccessor(i)]);
2306
2307   return true;
2308 }
2309
2310 bool AArch64FastISel::selectCmp(const Instruction *I) {
2311   const CmpInst *CI = cast<CmpInst>(I);
2312
2313   // Try to optimize or fold the cmp.
2314   CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2315   unsigned ResultReg = 0;
2316   switch (Predicate) {
2317   default:
2318     break;
2319   case CmpInst::FCMP_FALSE:
2320     ResultReg = createResultReg(&AArch64::GPR32RegClass);
2321     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2322             TII.get(TargetOpcode::COPY), ResultReg)
2323         .addReg(AArch64::WZR, getKillRegState(true));
2324     break;
2325   case CmpInst::FCMP_TRUE:
2326     ResultReg = fastEmit_i(MVT::i32, MVT::i32, ISD::Constant, 1);
2327     break;
2328   }
2329
2330   if (ResultReg) {
2331     updateValueMap(I, ResultReg);
2332     return true;
2333   }
2334
2335   // Emit the cmp.
2336   if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
2337     return false;
2338
2339   ResultReg = createResultReg(&AArch64::GPR32RegClass);
2340
2341   // FCMP_UEQ and FCMP_ONE cannot be checked with a single instruction. These
2342   // condition codes are inverted, because they are used by CSINC.
2343   static unsigned CondCodeTable[2][2] = {
2344     { AArch64CC::NE, AArch64CC::VC },
2345     { AArch64CC::PL, AArch64CC::LE }
2346   };
2347   unsigned *CondCodes = nullptr;
2348   switch (Predicate) {
2349   default:
2350     break;
2351   case CmpInst::FCMP_UEQ:
2352     CondCodes = &CondCodeTable[0][0];
2353     break;
2354   case CmpInst::FCMP_ONE:
2355     CondCodes = &CondCodeTable[1][0];
2356     break;
2357   }
2358
2359   if (CondCodes) {
2360     unsigned TmpReg1 = createResultReg(&AArch64::GPR32RegClass);
2361     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
2362             TmpReg1)
2363         .addReg(AArch64::WZR, getKillRegState(true))
2364         .addReg(AArch64::WZR, getKillRegState(true))
2365         .addImm(CondCodes[0]);
2366     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
2367             ResultReg)
2368         .addReg(TmpReg1, getKillRegState(true))
2369         .addReg(AArch64::WZR, getKillRegState(true))
2370         .addImm(CondCodes[1]);
2371
2372     updateValueMap(I, ResultReg);
2373     return true;
2374   }
2375
2376   // Now set a register based on the comparison.
2377   AArch64CC::CondCode CC = getCompareCC(Predicate);
2378   assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2379   AArch64CC::CondCode invertedCC = getInvertedCondCode(CC);
2380   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
2381           ResultReg)
2382       .addReg(AArch64::WZR, getKillRegState(true))
2383       .addReg(AArch64::WZR, getKillRegState(true))
2384       .addImm(invertedCC);
2385
2386   updateValueMap(I, ResultReg);
2387   return true;
2388 }
2389
2390 bool AArch64FastISel::selectSelect(const Instruction *I) {
2391   const SelectInst *SI = cast<SelectInst>(I);
2392
2393   EVT DestEVT = TLI.getValueType(SI->getType(), true);
2394   if (!DestEVT.isSimple())
2395     return false;
2396
2397   MVT DestVT = DestEVT.getSimpleVT();
2398   if (DestVT != MVT::i32 && DestVT != MVT::i64 && DestVT != MVT::f32 &&
2399       DestVT != MVT::f64)
2400     return false;
2401
2402   unsigned SelectOpc;
2403   const TargetRegisterClass *RC = nullptr;
2404   switch (DestVT.SimpleTy) {
2405   default: return false;
2406   case MVT::i32:
2407     SelectOpc = AArch64::CSELWr;    RC = &AArch64::GPR32RegClass; break;
2408   case MVT::i64:
2409     SelectOpc = AArch64::CSELXr;    RC = &AArch64::GPR64RegClass; break;
2410   case MVT::f32:
2411     SelectOpc = AArch64::FCSELSrrr; RC = &AArch64::FPR32RegClass; break;
2412   case MVT::f64:
2413     SelectOpc = AArch64::FCSELDrrr; RC = &AArch64::FPR64RegClass; break;
2414   }
2415
2416   const Value *Cond = SI->getCondition();
2417   bool NeedTest = true;
2418   AArch64CC::CondCode CC = AArch64CC::NE;
2419   if (foldXALUIntrinsic(CC, I, Cond))
2420     NeedTest = false;
2421
2422   unsigned CondReg = getRegForValue(Cond);
2423   if (!CondReg)
2424     return false;
2425   bool CondIsKill = hasTrivialKill(Cond);
2426
2427   if (NeedTest) {
2428     unsigned ANDReg = emitAnd_ri(MVT::i32, CondReg, CondIsKill, 1);
2429     assert(ANDReg && "Unexpected AND instruction emission failure.");
2430     emitICmp_ri(MVT::i32, ANDReg, /*IsKill=*/true, 0);
2431   }
2432
2433   unsigned TrueReg = getRegForValue(SI->getTrueValue());
2434   bool TrueIsKill = hasTrivialKill(SI->getTrueValue());
2435
2436   unsigned FalseReg = getRegForValue(SI->getFalseValue());
2437   bool FalseIsKill = hasTrivialKill(SI->getFalseValue());
2438
2439   if (!TrueReg || !FalseReg)
2440     return false;
2441
2442   unsigned ResultReg = fastEmitInst_rri(SelectOpc, RC, TrueReg, TrueIsKill,
2443                                         FalseReg, FalseIsKill, CC);
2444   updateValueMap(I, ResultReg);
2445   return true;
2446 }
2447
2448 bool AArch64FastISel::selectFPExt(const Instruction *I) {
2449   Value *V = I->getOperand(0);
2450   if (!I->getType()->isDoubleTy() || !V->getType()->isFloatTy())
2451     return false;
2452
2453   unsigned Op = getRegForValue(V);
2454   if (Op == 0)
2455     return false;
2456
2457   unsigned ResultReg = createResultReg(&AArch64::FPR64RegClass);
2458   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTDSr),
2459           ResultReg).addReg(Op);
2460   updateValueMap(I, ResultReg);
2461   return true;
2462 }
2463
2464 bool AArch64FastISel::selectFPTrunc(const Instruction *I) {
2465   Value *V = I->getOperand(0);
2466   if (!I->getType()->isFloatTy() || !V->getType()->isDoubleTy())
2467     return false;
2468
2469   unsigned Op = getRegForValue(V);
2470   if (Op == 0)
2471     return false;
2472
2473   unsigned ResultReg = createResultReg(&AArch64::FPR32RegClass);
2474   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTSDr),
2475           ResultReg).addReg(Op);
2476   updateValueMap(I, ResultReg);
2477   return true;
2478 }
2479
2480 // FPToUI and FPToSI
2481 bool AArch64FastISel::selectFPToInt(const Instruction *I, bool Signed) {
2482   MVT DestVT;
2483   if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
2484     return false;
2485
2486   unsigned SrcReg = getRegForValue(I->getOperand(0));
2487   if (SrcReg == 0)
2488     return false;
2489
2490   EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType(), true);
2491   if (SrcVT == MVT::f128)
2492     return false;
2493
2494   unsigned Opc;
2495   if (SrcVT == MVT::f64) {
2496     if (Signed)
2497       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWDr : AArch64::FCVTZSUXDr;
2498     else
2499       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWDr : AArch64::FCVTZUUXDr;
2500   } else {
2501     if (Signed)
2502       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWSr : AArch64::FCVTZSUXSr;
2503     else
2504       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWSr : AArch64::FCVTZUUXSr;
2505   }
2506   unsigned ResultReg = createResultReg(
2507       DestVT == MVT::i32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
2508   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
2509       .addReg(SrcReg);
2510   updateValueMap(I, ResultReg);
2511   return true;
2512 }
2513
2514 bool AArch64FastISel::selectIntToFP(const Instruction *I, bool Signed) {
2515   MVT DestVT;
2516   if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
2517     return false;
2518   assert ((DestVT == MVT::f32 || DestVT == MVT::f64) &&
2519           "Unexpected value type.");
2520
2521   unsigned SrcReg = getRegForValue(I->getOperand(0));
2522   if (!SrcReg)
2523     return false;
2524   bool SrcIsKill = hasTrivialKill(I->getOperand(0));
2525
2526   EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType(), true);
2527
2528   // Handle sign-extension.
2529   if (SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) {
2530     SrcReg =
2531         emitIntExt(SrcVT.getSimpleVT(), SrcReg, MVT::i32, /*isZExt*/ !Signed);
2532     if (!SrcReg)
2533       return false;
2534     SrcIsKill = true;
2535   }
2536
2537   unsigned Opc;
2538   if (SrcVT == MVT::i64) {
2539     if (Signed)
2540       Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUXSri : AArch64::SCVTFUXDri;
2541     else
2542       Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUXSri : AArch64::UCVTFUXDri;
2543   } else {
2544     if (Signed)
2545       Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUWSri : AArch64::SCVTFUWDri;
2546     else
2547       Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUWSri : AArch64::UCVTFUWDri;
2548   }
2549
2550   unsigned ResultReg = fastEmitInst_r(Opc, TLI.getRegClassFor(DestVT), SrcReg,
2551                                       SrcIsKill);
2552   updateValueMap(I, ResultReg);
2553   return true;
2554 }
2555
2556 bool AArch64FastISel::fastLowerArguments() {
2557   if (!FuncInfo.CanLowerReturn)
2558     return false;
2559
2560   const Function *F = FuncInfo.Fn;
2561   if (F->isVarArg())
2562     return false;
2563
2564   CallingConv::ID CC = F->getCallingConv();
2565   if (CC != CallingConv::C)
2566     return false;
2567
2568   // Only handle simple cases of up to 8 GPR and FPR each.
2569   unsigned GPRCnt = 0;
2570   unsigned FPRCnt = 0;
2571   unsigned Idx = 0;
2572   for (auto const &Arg : F->args()) {
2573     // The first argument is at index 1.
2574     ++Idx;
2575     if (F->getAttributes().hasAttribute(Idx, Attribute::ByVal) ||
2576         F->getAttributes().hasAttribute(Idx, Attribute::InReg) ||
2577         F->getAttributes().hasAttribute(Idx, Attribute::StructRet) ||
2578         F->getAttributes().hasAttribute(Idx, Attribute::Nest))
2579       return false;
2580
2581     Type *ArgTy = Arg.getType();
2582     if (ArgTy->isStructTy() || ArgTy->isArrayTy())
2583       return false;
2584
2585     EVT ArgVT = TLI.getValueType(ArgTy);
2586     if (!ArgVT.isSimple())
2587       return false;
2588
2589     MVT VT = ArgVT.getSimpleVT().SimpleTy;
2590     if (VT.isFloatingPoint() && !Subtarget->hasFPARMv8())
2591       return false;
2592
2593     if (VT.isVector() &&
2594         (!Subtarget->hasNEON() || !Subtarget->isLittleEndian()))
2595       return false;
2596
2597     if (VT >= MVT::i1 && VT <= MVT::i64)
2598       ++GPRCnt;
2599     else if ((VT >= MVT::f16 && VT <= MVT::f64) || VT.is64BitVector() ||
2600              VT.is128BitVector())
2601       ++FPRCnt;
2602     else
2603       return false;
2604
2605     if (GPRCnt > 8 || FPRCnt > 8)
2606       return false;
2607   }
2608
2609   static const MCPhysReg Registers[6][8] = {
2610     { AArch64::W0, AArch64::W1, AArch64::W2, AArch64::W3, AArch64::W4,
2611       AArch64::W5, AArch64::W6, AArch64::W7 },
2612     { AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4,
2613       AArch64::X5, AArch64::X6, AArch64::X7 },
2614     { AArch64::H0, AArch64::H1, AArch64::H2, AArch64::H3, AArch64::H4,
2615       AArch64::H5, AArch64::H6, AArch64::H7 },
2616     { AArch64::S0, AArch64::S1, AArch64::S2, AArch64::S3, AArch64::S4,
2617       AArch64::S5, AArch64::S6, AArch64::S7 },
2618     { AArch64::D0, AArch64::D1, AArch64::D2, AArch64::D3, AArch64::D4,
2619       AArch64::D5, AArch64::D6, AArch64::D7 },
2620     { AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4,
2621       AArch64::Q5, AArch64::Q6, AArch64::Q7 }
2622   };
2623
2624   unsigned GPRIdx = 0;
2625   unsigned FPRIdx = 0;
2626   for (auto const &Arg : F->args()) {
2627     MVT VT = TLI.getSimpleValueType(Arg.getType());
2628     unsigned SrcReg;
2629     const TargetRegisterClass *RC;
2630     if (VT >= MVT::i1 && VT <= MVT::i32) {
2631       SrcReg = Registers[0][GPRIdx++];
2632       RC = &AArch64::GPR32RegClass;
2633       VT = MVT::i32;
2634     } else if (VT == MVT::i64) {
2635       SrcReg = Registers[1][GPRIdx++];
2636       RC = &AArch64::GPR64RegClass;
2637     } else if (VT == MVT::f16) {
2638       SrcReg = Registers[2][FPRIdx++];
2639       RC = &AArch64::FPR16RegClass;
2640     } else if (VT ==  MVT::f32) {
2641       SrcReg = Registers[3][FPRIdx++];
2642       RC = &AArch64::FPR32RegClass;
2643     } else if ((VT == MVT::f64) || VT.is64BitVector()) {
2644       SrcReg = Registers[4][FPRIdx++];
2645       RC = &AArch64::FPR64RegClass;
2646     } else if (VT.is128BitVector()) {
2647       SrcReg = Registers[5][FPRIdx++];
2648       RC = &AArch64::FPR128RegClass;
2649     } else
2650       llvm_unreachable("Unexpected value type.");
2651
2652     unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
2653     // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
2654     // Without this, EmitLiveInCopies may eliminate the livein if its only
2655     // use is a bitcast (which isn't turned into an instruction).
2656     unsigned ResultReg = createResultReg(RC);
2657     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2658             TII.get(TargetOpcode::COPY), ResultReg)
2659         .addReg(DstReg, getKillRegState(true));
2660     updateValueMap(&Arg, ResultReg);
2661   }
2662   return true;
2663 }
2664
2665 bool AArch64FastISel::processCallArgs(CallLoweringInfo &CLI,
2666                                       SmallVectorImpl<MVT> &OutVTs,
2667                                       unsigned &NumBytes) {
2668   CallingConv::ID CC = CLI.CallConv;
2669   SmallVector<CCValAssign, 16> ArgLocs;
2670   CCState CCInfo(CC, false, *FuncInfo.MF, ArgLocs, *Context);
2671   CCInfo.AnalyzeCallOperands(OutVTs, CLI.OutFlags, CCAssignFnForCall(CC));
2672
2673   // Get a count of how many bytes are to be pushed on the stack.
2674   NumBytes = CCInfo.getNextStackOffset();
2675
2676   // Issue CALLSEQ_START
2677   unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
2678   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown))
2679     .addImm(NumBytes);
2680
2681   // Process the args.
2682   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
2683     CCValAssign &VA = ArgLocs[i];
2684     const Value *ArgVal = CLI.OutVals[VA.getValNo()];
2685     MVT ArgVT = OutVTs[VA.getValNo()];
2686
2687     unsigned ArgReg = getRegForValue(ArgVal);
2688     if (!ArgReg)
2689       return false;
2690
2691     // Handle arg promotion: SExt, ZExt, AExt.
2692     switch (VA.getLocInfo()) {
2693     case CCValAssign::Full:
2694       break;
2695     case CCValAssign::SExt: {
2696       MVT DestVT = VA.getLocVT();
2697       MVT SrcVT = ArgVT;
2698       ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/false);
2699       if (!ArgReg)
2700         return false;
2701       break;
2702     }
2703     case CCValAssign::AExt:
2704     // Intentional fall-through.
2705     case CCValAssign::ZExt: {
2706       MVT DestVT = VA.getLocVT();
2707       MVT SrcVT = ArgVT;
2708       ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/true);
2709       if (!ArgReg)
2710         return false;
2711       break;
2712     }
2713     default:
2714       llvm_unreachable("Unknown arg promotion!");
2715     }
2716
2717     // Now copy/store arg to correct locations.
2718     if (VA.isRegLoc() && !VA.needsCustom()) {
2719       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2720               TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg);
2721       CLI.OutRegs.push_back(VA.getLocReg());
2722     } else if (VA.needsCustom()) {
2723       // FIXME: Handle custom args.
2724       return false;
2725     } else {
2726       assert(VA.isMemLoc() && "Assuming store on stack.");
2727
2728       // Don't emit stores for undef values.
2729       if (isa<UndefValue>(ArgVal))
2730         continue;
2731
2732       // Need to store on the stack.
2733       unsigned ArgSize = (ArgVT.getSizeInBits() + 7) / 8;
2734
2735       unsigned BEAlign = 0;
2736       if (ArgSize < 8 && !Subtarget->isLittleEndian())
2737         BEAlign = 8 - ArgSize;
2738
2739       Address Addr;
2740       Addr.setKind(Address::RegBase);
2741       Addr.setReg(AArch64::SP);
2742       Addr.setOffset(VA.getLocMemOffset() + BEAlign);
2743
2744       unsigned Alignment = DL.getABITypeAlignment(ArgVal->getType());
2745       MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
2746         MachinePointerInfo::getStack(Addr.getOffset()),
2747         MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment);
2748
2749       if (!emitStore(ArgVT, ArgReg, Addr, MMO))
2750         return false;
2751     }
2752   }
2753   return true;
2754 }
2755
2756 bool AArch64FastISel::finishCall(CallLoweringInfo &CLI, MVT RetVT,
2757                                  unsigned NumBytes) {
2758   CallingConv::ID CC = CLI.CallConv;
2759
2760   // Issue CALLSEQ_END
2761   unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
2762   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp))
2763     .addImm(NumBytes).addImm(0);
2764
2765   // Now the return value.
2766   if (RetVT != MVT::isVoid) {
2767     SmallVector<CCValAssign, 16> RVLocs;
2768     CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
2769     CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC));
2770
2771     // Only handle a single return value.
2772     if (RVLocs.size() != 1)
2773       return false;
2774
2775     // Copy all of the result registers out of their specified physreg.
2776     MVT CopyVT = RVLocs[0].getValVT();
2777     unsigned ResultReg = createResultReg(TLI.getRegClassFor(CopyVT));
2778     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2779             TII.get(TargetOpcode::COPY), ResultReg)
2780         .addReg(RVLocs[0].getLocReg());
2781     CLI.InRegs.push_back(RVLocs[0].getLocReg());
2782
2783     CLI.ResultReg = ResultReg;
2784     CLI.NumResultRegs = 1;
2785   }
2786
2787   return true;
2788 }
2789
2790 bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) {
2791   CallingConv::ID CC  = CLI.CallConv;
2792   bool IsTailCall     = CLI.IsTailCall;
2793   bool IsVarArg       = CLI.IsVarArg;
2794   const Value *Callee = CLI.Callee;
2795   const char *SymName = CLI.SymName;
2796
2797   if (!Callee && !SymName)
2798     return false;
2799
2800   // Allow SelectionDAG isel to handle tail calls.
2801   if (IsTailCall)
2802     return false;
2803
2804   CodeModel::Model CM = TM.getCodeModel();
2805   // Only support the small and large code model.
2806   if (CM != CodeModel::Small && CM != CodeModel::Large)
2807     return false;
2808
2809   // FIXME: Add large code model support for ELF.
2810   if (CM == CodeModel::Large && !Subtarget->isTargetMachO())
2811     return false;
2812
2813   // Let SDISel handle vararg functions.
2814   if (IsVarArg)
2815     return false;
2816
2817   // FIXME: Only handle *simple* calls for now.
2818   MVT RetVT;
2819   if (CLI.RetTy->isVoidTy())
2820     RetVT = MVT::isVoid;
2821   else if (!isTypeLegal(CLI.RetTy, RetVT))
2822     return false;
2823
2824   for (auto Flag : CLI.OutFlags)
2825     if (Flag.isInReg() || Flag.isSRet() || Flag.isNest() || Flag.isByVal())
2826       return false;
2827
2828   // Set up the argument vectors.
2829   SmallVector<MVT, 16> OutVTs;
2830   OutVTs.reserve(CLI.OutVals.size());
2831
2832   for (auto *Val : CLI.OutVals) {
2833     MVT VT;
2834     if (!isTypeLegal(Val->getType(), VT) &&
2835         !(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16))
2836       return false;
2837
2838     // We don't handle vector parameters yet.
2839     if (VT.isVector() || VT.getSizeInBits() > 64)
2840       return false;
2841
2842     OutVTs.push_back(VT);
2843   }
2844
2845   Address Addr;
2846   if (Callee && !computeCallAddress(Callee, Addr))
2847     return false;
2848
2849   // Handle the arguments now that we've gotten them.
2850   unsigned NumBytes;
2851   if (!processCallArgs(CLI, OutVTs, NumBytes))
2852     return false;
2853
2854   // Issue the call.
2855   MachineInstrBuilder MIB;
2856   if (CM == CodeModel::Small) {
2857     const MCInstrDesc &II = TII.get(Addr.getReg() ? AArch64::BLR : AArch64::BL);
2858     MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II);
2859     if (SymName)
2860       MIB.addExternalSymbol(SymName, 0);
2861     else if (Addr.getGlobalValue())
2862       MIB.addGlobalAddress(Addr.getGlobalValue(), 0, 0);
2863     else if (Addr.getReg()) {
2864       unsigned Reg = constrainOperandRegClass(II, Addr.getReg(), 0);
2865       MIB.addReg(Reg);
2866     } else
2867       return false;
2868   } else {
2869     unsigned CallReg = 0;
2870     if (SymName) {
2871       unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
2872       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
2873               ADRPReg)
2874         .addExternalSymbol(SymName, AArch64II::MO_GOT | AArch64II::MO_PAGE);
2875
2876       CallReg = createResultReg(&AArch64::GPR64RegClass);
2877       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::LDRXui),
2878               CallReg)
2879         .addReg(ADRPReg)
2880         .addExternalSymbol(SymName, AArch64II::MO_GOT | AArch64II::MO_PAGEOFF |
2881                            AArch64II::MO_NC);
2882     } else if (Addr.getGlobalValue())
2883       CallReg = materializeGV(Addr.getGlobalValue());
2884     else if (Addr.getReg())
2885       CallReg = Addr.getReg();
2886
2887     if (!CallReg)
2888       return false;
2889
2890     const MCInstrDesc &II = TII.get(AArch64::BLR);
2891     CallReg = constrainOperandRegClass(II, CallReg, 0);
2892     MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(CallReg);
2893   }
2894
2895   // Add implicit physical register uses to the call.
2896   for (auto Reg : CLI.OutRegs)
2897     MIB.addReg(Reg, RegState::Implicit);
2898
2899   // Add a register mask with the call-preserved registers.
2900   // Proper defs for return values will be added by setPhysRegsDeadExcept().
2901   MIB.addRegMask(TRI.getCallPreservedMask(CC));
2902
2903   CLI.Call = MIB;
2904
2905   // Finish off the call including any return values.
2906   return finishCall(CLI, RetVT, NumBytes);
2907 }
2908
2909 bool AArch64FastISel::isMemCpySmall(uint64_t Len, unsigned Alignment) {
2910   if (Alignment)
2911     return Len / Alignment <= 4;
2912   else
2913     return Len < 32;
2914 }
2915
2916 bool AArch64FastISel::tryEmitSmallMemCpy(Address Dest, Address Src,
2917                                          uint64_t Len, unsigned Alignment) {
2918   // Make sure we don't bloat code by inlining very large memcpy's.
2919   if (!isMemCpySmall(Len, Alignment))
2920     return false;
2921
2922   int64_t UnscaledOffset = 0;
2923   Address OrigDest = Dest;
2924   Address OrigSrc = Src;
2925
2926   while (Len) {
2927     MVT VT;
2928     if (!Alignment || Alignment >= 8) {
2929       if (Len >= 8)
2930         VT = MVT::i64;
2931       else if (Len >= 4)
2932         VT = MVT::i32;
2933       else if (Len >= 2)
2934         VT = MVT::i16;
2935       else {
2936         VT = MVT::i8;
2937       }
2938     } else {
2939       // Bound based on alignment.
2940       if (Len >= 4 && Alignment == 4)
2941         VT = MVT::i32;
2942       else if (Len >= 2 && Alignment == 2)
2943         VT = MVT::i16;
2944       else {
2945         VT = MVT::i8;
2946       }
2947     }
2948
2949     bool RV;
2950     unsigned ResultReg;
2951     RV = emitLoad(VT, VT, ResultReg, Src);
2952     if (!RV)
2953       return false;
2954
2955     RV = emitStore(VT, ResultReg, Dest);
2956     if (!RV)
2957       return false;
2958
2959     int64_t Size = VT.getSizeInBits() / 8;
2960     Len -= Size;
2961     UnscaledOffset += Size;
2962
2963     // We need to recompute the unscaled offset for each iteration.
2964     Dest.setOffset(OrigDest.getOffset() + UnscaledOffset);
2965     Src.setOffset(OrigSrc.getOffset() + UnscaledOffset);
2966   }
2967
2968   return true;
2969 }
2970
2971 /// \brief Check if it is possible to fold the condition from the XALU intrinsic
2972 /// into the user. The condition code will only be updated on success.
2973 bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC,
2974                                         const Instruction *I,
2975                                         const Value *Cond) {
2976   if (!isa<ExtractValueInst>(Cond))
2977     return false;
2978
2979   const auto *EV = cast<ExtractValueInst>(Cond);
2980   if (!isa<IntrinsicInst>(EV->getAggregateOperand()))
2981     return false;
2982
2983   const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand());
2984   MVT RetVT;
2985   const Function *Callee = II->getCalledFunction();
2986   Type *RetTy =
2987   cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U);
2988   if (!isTypeLegal(RetTy, RetVT))
2989     return false;
2990
2991   if (RetVT != MVT::i32 && RetVT != MVT::i64)
2992     return false;
2993
2994   const Value *LHS = II->getArgOperand(0);
2995   const Value *RHS = II->getArgOperand(1);
2996
2997   // Canonicalize immediate to the RHS.
2998   if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) &&
2999       isCommutativeIntrinsic(II))
3000     std::swap(LHS, RHS);
3001
3002   // Simplify multiplies.
3003   unsigned IID = II->getIntrinsicID();
3004   switch (IID) {
3005   default:
3006     break;
3007   case Intrinsic::smul_with_overflow:
3008     if (const auto *C = dyn_cast<ConstantInt>(RHS))
3009       if (C->getValue() == 2)
3010         IID = Intrinsic::sadd_with_overflow;
3011     break;
3012   case Intrinsic::umul_with_overflow:
3013     if (const auto *C = dyn_cast<ConstantInt>(RHS))
3014       if (C->getValue() == 2)
3015         IID = Intrinsic::uadd_with_overflow;
3016     break;
3017   }
3018
3019   AArch64CC::CondCode TmpCC;
3020   switch (IID) {
3021   default:
3022     return false;
3023   case Intrinsic::sadd_with_overflow:
3024   case Intrinsic::ssub_with_overflow:
3025     TmpCC = AArch64CC::VS;
3026     break;
3027   case Intrinsic::uadd_with_overflow:
3028     TmpCC = AArch64CC::HS;
3029     break;
3030   case Intrinsic::usub_with_overflow:
3031     TmpCC = AArch64CC::LO;
3032     break;
3033   case Intrinsic::smul_with_overflow:
3034   case Intrinsic::umul_with_overflow:
3035     TmpCC = AArch64CC::NE;
3036     break;
3037   }
3038
3039   // Check if both instructions are in the same basic block.
3040   if (!isValueAvailable(II))
3041     return false;
3042
3043   // Make sure nothing is in the way
3044   BasicBlock::const_iterator Start = I;
3045   BasicBlock::const_iterator End = II;
3046   for (auto Itr = std::prev(Start); Itr != End; --Itr) {
3047     // We only expect extractvalue instructions between the intrinsic and the
3048     // instruction to be selected.
3049     if (!isa<ExtractValueInst>(Itr))
3050       return false;
3051
3052     // Check that the extractvalue operand comes from the intrinsic.
3053     const auto *EVI = cast<ExtractValueInst>(Itr);
3054     if (EVI->getAggregateOperand() != II)
3055       return false;
3056   }
3057
3058   CC = TmpCC;
3059   return true;
3060 }
3061
3062 bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
3063   // FIXME: Handle more intrinsics.
3064   switch (II->getIntrinsicID()) {
3065   default: return false;
3066   case Intrinsic::frameaddress: {
3067     MachineFrameInfo *MFI = FuncInfo.MF->getFrameInfo();
3068     MFI->setFrameAddressIsTaken(true);
3069
3070     const AArch64RegisterInfo *RegInfo =
3071         static_cast<const AArch64RegisterInfo *>(
3072             TM.getSubtargetImpl()->getRegisterInfo());
3073     unsigned FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF));
3074     unsigned SrcReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3075     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3076             TII.get(TargetOpcode::COPY), SrcReg).addReg(FramePtr);
3077     // Recursively load frame address
3078     // ldr x0, [fp]
3079     // ldr x0, [x0]
3080     // ldr x0, [x0]
3081     // ...
3082     unsigned DestReg;
3083     unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue();
3084     while (Depth--) {
3085       DestReg = fastEmitInst_ri(AArch64::LDRXui, &AArch64::GPR64RegClass,
3086                                 SrcReg, /*IsKill=*/true, 0);
3087       assert(DestReg && "Unexpected LDR instruction emission failure.");
3088       SrcReg = DestReg;
3089     }
3090
3091     updateValueMap(II, SrcReg);
3092     return true;
3093   }
3094   case Intrinsic::memcpy:
3095   case Intrinsic::memmove: {
3096     const auto *MTI = cast<MemTransferInst>(II);
3097     // Don't handle volatile.
3098     if (MTI->isVolatile())
3099       return false;
3100
3101     // Disable inlining for memmove before calls to ComputeAddress.  Otherwise,
3102     // we would emit dead code because we don't currently handle memmoves.
3103     bool IsMemCpy = (II->getIntrinsicID() == Intrinsic::memcpy);
3104     if (isa<ConstantInt>(MTI->getLength()) && IsMemCpy) {
3105       // Small memcpy's are common enough that we want to do them without a call
3106       // if possible.
3107       uint64_t Len = cast<ConstantInt>(MTI->getLength())->getZExtValue();
3108       unsigned Alignment = MTI->getAlignment();
3109       if (isMemCpySmall(Len, Alignment)) {
3110         Address Dest, Src;
3111         if (!computeAddress(MTI->getRawDest(), Dest) ||
3112             !computeAddress(MTI->getRawSource(), Src))
3113           return false;
3114         if (tryEmitSmallMemCpy(Dest, Src, Len, Alignment))
3115           return true;
3116       }
3117     }
3118
3119     if (!MTI->getLength()->getType()->isIntegerTy(64))
3120       return false;
3121
3122     if (MTI->getSourceAddressSpace() > 255 || MTI->getDestAddressSpace() > 255)
3123       // Fast instruction selection doesn't support the special
3124       // address spaces.
3125       return false;
3126
3127     const char *IntrMemName = isa<MemCpyInst>(II) ? "memcpy" : "memmove";
3128     return lowerCallTo(II, IntrMemName, II->getNumArgOperands() - 2);
3129   }
3130   case Intrinsic::memset: {
3131     const MemSetInst *MSI = cast<MemSetInst>(II);
3132     // Don't handle volatile.
3133     if (MSI->isVolatile())
3134       return false;
3135
3136     if (!MSI->getLength()->getType()->isIntegerTy(64))
3137       return false;
3138
3139     if (MSI->getDestAddressSpace() > 255)
3140       // Fast instruction selection doesn't support the special
3141       // address spaces.
3142       return false;
3143
3144     return lowerCallTo(II, "memset", II->getNumArgOperands() - 2);
3145   }
3146   case Intrinsic::sin:
3147   case Intrinsic::cos:
3148   case Intrinsic::pow: {
3149     MVT RetVT;
3150     if (!isTypeLegal(II->getType(), RetVT))
3151       return false;
3152
3153     if (RetVT != MVT::f32 && RetVT != MVT::f64)
3154       return false;
3155
3156     static const RTLIB::Libcall LibCallTable[3][2] = {
3157       { RTLIB::SIN_F32, RTLIB::SIN_F64 },
3158       { RTLIB::COS_F32, RTLIB::COS_F64 },
3159       { RTLIB::POW_F32, RTLIB::POW_F64 }
3160     };
3161     RTLIB::Libcall LC;
3162     bool Is64Bit = RetVT == MVT::f64;
3163     switch (II->getIntrinsicID()) {
3164     default:
3165       llvm_unreachable("Unexpected intrinsic.");
3166     case Intrinsic::sin:
3167       LC = LibCallTable[0][Is64Bit];
3168       break;
3169     case Intrinsic::cos:
3170       LC = LibCallTable[1][Is64Bit];
3171       break;
3172     case Intrinsic::pow:
3173       LC = LibCallTable[2][Is64Bit];
3174       break;
3175     }
3176
3177     ArgListTy Args;
3178     Args.reserve(II->getNumArgOperands());
3179
3180     // Populate the argument list.
3181     for (auto &Arg : II->arg_operands()) {
3182       ArgListEntry Entry;
3183       Entry.Val = Arg;
3184       Entry.Ty = Arg->getType();
3185       Args.push_back(Entry);
3186     }
3187
3188     CallLoweringInfo CLI;
3189     CLI.setCallee(TLI.getLibcallCallingConv(LC), II->getType(),
3190                   TLI.getLibcallName(LC), std::move(Args));
3191     if (!lowerCallTo(CLI))
3192       return false;
3193     updateValueMap(II, CLI.ResultReg);
3194     return true;
3195   }
3196   case Intrinsic::trap: {
3197     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BRK))
3198         .addImm(1);
3199     return true;
3200   }
3201   case Intrinsic::sqrt: {
3202     Type *RetTy = II->getCalledFunction()->getReturnType();
3203
3204     MVT VT;
3205     if (!isTypeLegal(RetTy, VT))
3206       return false;
3207
3208     unsigned Op0Reg = getRegForValue(II->getOperand(0));
3209     if (!Op0Reg)
3210       return false;
3211     bool Op0IsKill = hasTrivialKill(II->getOperand(0));
3212
3213     unsigned ResultReg = fastEmit_r(VT, VT, ISD::FSQRT, Op0Reg, Op0IsKill);
3214     if (!ResultReg)
3215       return false;
3216
3217     updateValueMap(II, ResultReg);
3218     return true;
3219   }
3220   case Intrinsic::sadd_with_overflow:
3221   case Intrinsic::uadd_with_overflow:
3222   case Intrinsic::ssub_with_overflow:
3223   case Intrinsic::usub_with_overflow:
3224   case Intrinsic::smul_with_overflow:
3225   case Intrinsic::umul_with_overflow: {
3226     // This implements the basic lowering of the xalu with overflow intrinsics.
3227     const Function *Callee = II->getCalledFunction();
3228     auto *Ty = cast<StructType>(Callee->getReturnType());
3229     Type *RetTy = Ty->getTypeAtIndex(0U);
3230
3231     MVT VT;
3232     if (!isTypeLegal(RetTy, VT))
3233       return false;
3234
3235     if (VT != MVT::i32 && VT != MVT::i64)
3236       return false;
3237
3238     const Value *LHS = II->getArgOperand(0);
3239     const Value *RHS = II->getArgOperand(1);
3240     // Canonicalize immediate to the RHS.
3241     if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) &&
3242         isCommutativeIntrinsic(II))
3243       std::swap(LHS, RHS);
3244
3245     // Simplify multiplies.
3246     unsigned IID = II->getIntrinsicID();
3247     switch (IID) {
3248     default:
3249       break;
3250     case Intrinsic::smul_with_overflow:
3251       if (const auto *C = dyn_cast<ConstantInt>(RHS))
3252         if (C->getValue() == 2) {
3253           IID = Intrinsic::sadd_with_overflow;
3254           RHS = LHS;
3255         }
3256       break;
3257     case Intrinsic::umul_with_overflow:
3258       if (const auto *C = dyn_cast<ConstantInt>(RHS))
3259         if (C->getValue() == 2) {
3260           IID = Intrinsic::uadd_with_overflow;
3261           RHS = LHS;
3262         }
3263       break;
3264     }
3265
3266     unsigned ResultReg1 = 0, ResultReg2 = 0, MulReg = 0;
3267     AArch64CC::CondCode CC = AArch64CC::Invalid;
3268     switch (IID) {
3269     default: llvm_unreachable("Unexpected intrinsic!");
3270     case Intrinsic::sadd_with_overflow:
3271       ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
3272       CC = AArch64CC::VS;
3273       break;
3274     case Intrinsic::uadd_with_overflow:
3275       ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
3276       CC = AArch64CC::HS;
3277       break;
3278     case Intrinsic::ssub_with_overflow:
3279       ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
3280       CC = AArch64CC::VS;
3281       break;
3282     case Intrinsic::usub_with_overflow:
3283       ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
3284       CC = AArch64CC::LO;
3285       break;
3286     case Intrinsic::smul_with_overflow: {
3287       CC = AArch64CC::NE;
3288       unsigned LHSReg = getRegForValue(LHS);
3289       if (!LHSReg)
3290         return false;
3291       bool LHSIsKill = hasTrivialKill(LHS);
3292
3293       unsigned RHSReg = getRegForValue(RHS);
3294       if (!RHSReg)
3295         return false;
3296       bool RHSIsKill = hasTrivialKill(RHS);
3297
3298       if (VT == MVT::i32) {
3299         MulReg = emitSMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
3300         unsigned ShiftReg = emitLSR_ri(MVT::i64, MVT::i64, MulReg,
3301                                        /*IsKill=*/false, 32);
3302         MulReg = fastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true,
3303                                             AArch64::sub_32);
3304         ShiftReg = fastEmitInst_extractsubreg(VT, ShiftReg, /*IsKill=*/true,
3305                                               AArch64::sub_32);
3306         emitSubs_rs(VT, ShiftReg, /*IsKill=*/true, MulReg, /*IsKill=*/false,
3307                     AArch64_AM::ASR, 31, /*WantResult=*/false);
3308       } else {
3309         assert(VT == MVT::i64 && "Unexpected value type.");
3310         MulReg = emitMul_rr(VT, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
3311         unsigned SMULHReg = fastEmit_rr(VT, VT, ISD::MULHS, LHSReg, LHSIsKill,
3312                                         RHSReg, RHSIsKill);
3313         emitSubs_rs(VT, SMULHReg, /*IsKill=*/true, MulReg, /*IsKill=*/false,
3314                     AArch64_AM::ASR, 63, /*WantResult=*/false);
3315       }
3316       break;
3317     }
3318     case Intrinsic::umul_with_overflow: {
3319       CC = AArch64CC::NE;
3320       unsigned LHSReg = getRegForValue(LHS);
3321       if (!LHSReg)
3322         return false;
3323       bool LHSIsKill = hasTrivialKill(LHS);
3324
3325       unsigned RHSReg = getRegForValue(RHS);
3326       if (!RHSReg)
3327         return false;
3328       bool RHSIsKill = hasTrivialKill(RHS);
3329
3330       if (VT == MVT::i32) {
3331         MulReg = emitUMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
3332         emitSubs_rs(MVT::i64, AArch64::XZR, /*IsKill=*/true, MulReg,
3333                     /*IsKill=*/false, AArch64_AM::LSR, 32,
3334                     /*WantResult=*/false);
3335         MulReg = fastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true,
3336                                             AArch64::sub_32);
3337       } else {
3338         assert(VT == MVT::i64 && "Unexpected value type.");
3339         MulReg = emitMul_rr(VT, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
3340         unsigned UMULHReg = fastEmit_rr(VT, VT, ISD::MULHU, LHSReg, LHSIsKill,
3341                                         RHSReg, RHSIsKill);
3342         emitSubs_rr(VT, AArch64::XZR, /*IsKill=*/true, UMULHReg,
3343                     /*IsKill=*/false, /*WantResult=*/false);
3344       }
3345       break;
3346     }
3347     }
3348
3349     if (MulReg) {
3350       ResultReg1 = createResultReg(TLI.getRegClassFor(VT));
3351       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3352               TII.get(TargetOpcode::COPY), ResultReg1).addReg(MulReg);
3353     }
3354
3355     ResultReg2 = fastEmitInst_rri(AArch64::CSINCWr, &AArch64::GPR32RegClass,
3356                                   AArch64::WZR, /*IsKill=*/true, AArch64::WZR,
3357                                   /*IsKill=*/true, getInvertedCondCode(CC));
3358     (void)ResultReg2;
3359     assert((ResultReg1 + 1) == ResultReg2 &&
3360            "Nonconsecutive result registers.");
3361     updateValueMap(II, ResultReg1, 2);
3362     return true;
3363   }
3364   }
3365   return false;
3366 }
3367
3368 bool AArch64FastISel::selectRet(const Instruction *I) {
3369   const ReturnInst *Ret = cast<ReturnInst>(I);
3370   const Function &F = *I->getParent()->getParent();
3371
3372   if (!FuncInfo.CanLowerReturn)
3373     return false;
3374
3375   if (F.isVarArg())
3376     return false;
3377
3378   // Build a list of return value registers.
3379   SmallVector<unsigned, 4> RetRegs;
3380
3381   if (Ret->getNumOperands() > 0) {
3382     CallingConv::ID CC = F.getCallingConv();
3383     SmallVector<ISD::OutputArg, 4> Outs;
3384     GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI);
3385
3386     // Analyze operands of the call, assigning locations to each operand.
3387     SmallVector<CCValAssign, 16> ValLocs;
3388     CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext());
3389     CCAssignFn *RetCC = CC == CallingConv::WebKit_JS ? RetCC_AArch64_WebKit_JS
3390                                                      : RetCC_AArch64_AAPCS;
3391     CCInfo.AnalyzeReturn(Outs, RetCC);
3392
3393     // Only handle a single return value for now.
3394     if (ValLocs.size() != 1)
3395       return false;
3396
3397     CCValAssign &VA = ValLocs[0];
3398     const Value *RV = Ret->getOperand(0);
3399
3400     // Don't bother handling odd stuff for now.
3401     if ((VA.getLocInfo() != CCValAssign::Full) &&
3402         (VA.getLocInfo() != CCValAssign::BCvt))
3403       return false;
3404
3405     // Only handle register returns for now.
3406     if (!VA.isRegLoc())
3407       return false;
3408
3409     unsigned Reg = getRegForValue(RV);
3410     if (Reg == 0)
3411       return false;
3412
3413     unsigned SrcReg = Reg + VA.getValNo();
3414     unsigned DestReg = VA.getLocReg();
3415     // Avoid a cross-class copy. This is very unlikely.
3416     if (!MRI.getRegClass(SrcReg)->contains(DestReg))
3417       return false;
3418
3419     EVT RVEVT = TLI.getValueType(RV->getType());
3420     if (!RVEVT.isSimple())
3421       return false;
3422
3423     // Vectors (of > 1 lane) in big endian need tricky handling.
3424     if (RVEVT.isVector() && RVEVT.getVectorNumElements() > 1 &&
3425         !Subtarget->isLittleEndian())
3426       return false;
3427
3428     MVT RVVT = RVEVT.getSimpleVT();
3429     if (RVVT == MVT::f128)
3430       return false;
3431
3432     MVT DestVT = VA.getValVT();
3433     // Special handling for extended integers.
3434     if (RVVT != DestVT) {
3435       if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16)
3436         return false;
3437
3438       if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
3439         return false;
3440
3441       bool IsZExt = Outs[0].Flags.isZExt();
3442       SrcReg = emitIntExt(RVVT, SrcReg, DestVT, IsZExt);
3443       if (SrcReg == 0)
3444         return false;
3445     }
3446
3447     // Make the copy.
3448     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3449             TII.get(TargetOpcode::COPY), DestReg).addReg(SrcReg);
3450
3451     // Add register to return instruction.
3452     RetRegs.push_back(VA.getLocReg());
3453   }
3454
3455   MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3456                                     TII.get(AArch64::RET_ReallyLR));
3457   for (unsigned i = 0, e = RetRegs.size(); i != e; ++i)
3458     MIB.addReg(RetRegs[i], RegState::Implicit);
3459   return true;
3460 }
3461
3462 bool AArch64FastISel::selectTrunc(const Instruction *I) {
3463   Type *DestTy = I->getType();
3464   Value *Op = I->getOperand(0);
3465   Type *SrcTy = Op->getType();
3466
3467   EVT SrcEVT = TLI.getValueType(SrcTy, true);
3468   EVT DestEVT = TLI.getValueType(DestTy, true);
3469   if (!SrcEVT.isSimple())
3470     return false;
3471   if (!DestEVT.isSimple())
3472     return false;
3473
3474   MVT SrcVT = SrcEVT.getSimpleVT();
3475   MVT DestVT = DestEVT.getSimpleVT();
3476
3477   if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 &&
3478       SrcVT != MVT::i8)
3479     return false;
3480   if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8 &&
3481       DestVT != MVT::i1)
3482     return false;
3483
3484   unsigned SrcReg = getRegForValue(Op);
3485   if (!SrcReg)
3486     return false;
3487   bool SrcIsKill = hasTrivialKill(Op);
3488
3489   // If we're truncating from i64 to a smaller non-legal type then generate an
3490   // AND. Otherwise, we know the high bits are undefined and a truncate only
3491   // generate a COPY. We cannot mark the source register also as result
3492   // register, because this can incorrectly transfer the kill flag onto the
3493   // source register.
3494   unsigned ResultReg;
3495   if (SrcVT == MVT::i64) {
3496     uint64_t Mask = 0;
3497     switch (DestVT.SimpleTy) {
3498     default:
3499       // Trunc i64 to i32 is handled by the target-independent fast-isel.
3500       return false;
3501     case MVT::i1:
3502       Mask = 0x1;
3503       break;
3504     case MVT::i8:
3505       Mask = 0xff;
3506       break;
3507     case MVT::i16:
3508       Mask = 0xffff;
3509       break;
3510     }
3511     // Issue an extract_subreg to get the lower 32-bits.
3512     unsigned Reg32 = fastEmitInst_extractsubreg(MVT::i32, SrcReg, SrcIsKill,
3513                                                 AArch64::sub_32);
3514     // Create the AND instruction which performs the actual truncation.
3515     ResultReg = emitAnd_ri(MVT::i32, Reg32, /*IsKill=*/true, Mask);
3516     assert(ResultReg && "Unexpected AND instruction emission failure.");
3517   } else {
3518     ResultReg = createResultReg(&AArch64::GPR32RegClass);
3519     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3520             TII.get(TargetOpcode::COPY), ResultReg)
3521         .addReg(SrcReg, getKillRegState(SrcIsKill));
3522   }
3523
3524   updateValueMap(I, ResultReg);
3525   return true;
3526 }
3527
3528 unsigned AArch64FastISel::emiti1Ext(unsigned SrcReg, MVT DestVT, bool IsZExt) {
3529   assert((DestVT == MVT::i8 || DestVT == MVT::i16 || DestVT == MVT::i32 ||
3530           DestVT == MVT::i64) &&
3531          "Unexpected value type.");
3532   // Handle i8 and i16 as i32.
3533   if (DestVT == MVT::i8 || DestVT == MVT::i16)
3534     DestVT = MVT::i32;
3535
3536   if (IsZExt) {
3537     unsigned ResultReg = emitAnd_ri(MVT::i32, SrcReg, /*TODO:IsKill=*/false, 1);
3538     assert(ResultReg && "Unexpected AND instruction emission failure.");
3539     if (DestVT == MVT::i64) {
3540       // We're ZExt i1 to i64.  The ANDWri Wd, Ws, #1 implicitly clears the
3541       // upper 32 bits.  Emit a SUBREG_TO_REG to extend from Wd to Xd.
3542       unsigned Reg64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3543       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3544               TII.get(AArch64::SUBREG_TO_REG), Reg64)
3545           .addImm(0)
3546           .addReg(ResultReg)
3547           .addImm(AArch64::sub_32);
3548       ResultReg = Reg64;
3549     }
3550     return ResultReg;
3551   } else {
3552     if (DestVT == MVT::i64) {
3553       // FIXME: We're SExt i1 to i64.
3554       return 0;
3555     }
3556     return fastEmitInst_rii(AArch64::SBFMWri, &AArch64::GPR32RegClass, SrcReg,
3557                             /*TODO:IsKill=*/false, 0, 0);
3558   }
3559 }
3560
3561 unsigned AArch64FastISel::emitMul_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
3562                                       unsigned Op1, bool Op1IsKill) {
3563   unsigned Opc, ZReg;
3564   switch (RetVT.SimpleTy) {
3565   default: return 0;
3566   case MVT::i8:
3567   case MVT::i16:
3568   case MVT::i32:
3569     RetVT = MVT::i32;
3570     Opc = AArch64::MADDWrrr; ZReg = AArch64::WZR; break;
3571   case MVT::i64:
3572     Opc = AArch64::MADDXrrr; ZReg = AArch64::XZR; break;
3573   }
3574
3575   const TargetRegisterClass *RC =
3576       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
3577   return fastEmitInst_rrr(Opc, RC, Op0, Op0IsKill, Op1, Op1IsKill,
3578                           /*IsKill=*/ZReg, true);
3579 }
3580
3581 unsigned AArch64FastISel::emitSMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
3582                                         unsigned Op1, bool Op1IsKill) {
3583   if (RetVT != MVT::i64)
3584     return 0;
3585
3586   return fastEmitInst_rrr(AArch64::SMADDLrrr, &AArch64::GPR64RegClass,
3587                           Op0, Op0IsKill, Op1, Op1IsKill,
3588                           AArch64::XZR, /*IsKill=*/true);
3589 }
3590
3591 unsigned AArch64FastISel::emitUMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
3592                                         unsigned Op1, bool Op1IsKill) {
3593   if (RetVT != MVT::i64)
3594     return 0;
3595
3596   return fastEmitInst_rrr(AArch64::UMADDLrrr, &AArch64::GPR64RegClass,
3597                           Op0, Op0IsKill, Op1, Op1IsKill,
3598                           AArch64::XZR, /*IsKill=*/true);
3599 }
3600
3601 unsigned AArch64FastISel::emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
3602                                      unsigned Op1Reg, bool Op1IsKill) {
3603   unsigned Opc = 0;
3604   bool NeedTrunc = false;
3605   uint64_t Mask = 0;
3606   switch (RetVT.SimpleTy) {
3607   default: return 0;
3608   case MVT::i8:  Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xff;   break;
3609   case MVT::i16: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xffff; break;
3610   case MVT::i32: Opc = AArch64::LSLVWr;                                  break;
3611   case MVT::i64: Opc = AArch64::LSLVXr;                                  break;
3612   }
3613
3614   const TargetRegisterClass *RC =
3615       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
3616   if (NeedTrunc) {
3617     Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
3618     Op1IsKill = true;
3619   }
3620   unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
3621                                        Op1IsKill);
3622   if (NeedTrunc)
3623     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
3624   return ResultReg;
3625 }
3626
3627 unsigned AArch64FastISel::emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
3628                                      bool Op0IsKill, uint64_t Shift,
3629                                      bool IsZext) {
3630   assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
3631          "Unexpected source/return type pair.");
3632   assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
3633           SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
3634          "Unexpected source value type.");
3635   assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
3636           RetVT == MVT::i64) && "Unexpected return value type.");
3637
3638   bool Is64Bit = (RetVT == MVT::i64);
3639   unsigned RegSize = Is64Bit ? 64 : 32;
3640   unsigned DstBits = RetVT.getSizeInBits();
3641   unsigned SrcBits = SrcVT.getSizeInBits();
3642
3643   // Don't deal with undefined shifts.
3644   if (Shift >= DstBits)
3645     return 0;
3646
3647   // For immediate shifts we can fold the zero-/sign-extension into the shift.
3648   // {S|U}BFM Wd, Wn, #r, #s
3649   // Wd<32+s-r,32-r> = Wn<s:0> when r > s
3650
3651   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
3652   // %2 = shl i16 %1, 4
3653   // Wd<32+7-28,32-28> = Wn<7:0> <- clamp s to 7
3654   // 0b1111_1111_1111_1111__1111_1010_1010_0000 sext
3655   // 0b0000_0000_0000_0000__0000_0101_0101_0000 sext | zext
3656   // 0b0000_0000_0000_0000__0000_1010_1010_0000 zext
3657
3658   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
3659   // %2 = shl i16 %1, 8
3660   // Wd<32+7-24,32-24> = Wn<7:0>
3661   // 0b1111_1111_1111_1111__1010_1010_0000_0000 sext
3662   // 0b0000_0000_0000_0000__0101_0101_0000_0000 sext | zext
3663   // 0b0000_0000_0000_0000__1010_1010_0000_0000 zext
3664
3665   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
3666   // %2 = shl i16 %1, 12
3667   // Wd<32+3-20,32-20> = Wn<3:0>
3668   // 0b1111_1111_1111_1111__1010_0000_0000_0000 sext
3669   // 0b0000_0000_0000_0000__0101_0000_0000_0000 sext | zext
3670   // 0b0000_0000_0000_0000__1010_0000_0000_0000 zext
3671
3672   unsigned ImmR = RegSize - Shift;
3673   // Limit the width to the length of the source type.
3674   unsigned ImmS = std::min<unsigned>(SrcBits - 1, DstBits - 1 - Shift);
3675   static const unsigned OpcTable[2][2] = {
3676     {AArch64::SBFMWri, AArch64::SBFMXri},
3677     {AArch64::UBFMWri, AArch64::UBFMXri}
3678   };
3679   unsigned Opc = OpcTable[IsZext][Is64Bit];
3680   const TargetRegisterClass *RC =
3681       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
3682   if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
3683     unsigned TmpReg = MRI.createVirtualRegister(RC);
3684     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3685             TII.get(AArch64::SUBREG_TO_REG), TmpReg)
3686         .addImm(0)
3687         .addReg(Op0, getKillRegState(Op0IsKill))
3688         .addImm(AArch64::sub_32);
3689     Op0 = TmpReg;
3690     Op0IsKill = true;
3691   }
3692   return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
3693 }
3694
3695 unsigned AArch64FastISel::emitLSR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
3696                                      unsigned Op1Reg, bool Op1IsKill) {
3697   unsigned Opc = 0;
3698   bool NeedTrunc = false;
3699   uint64_t Mask = 0;
3700   switch (RetVT.SimpleTy) {
3701   default: return 0;
3702   case MVT::i8:  Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xff;   break;
3703   case MVT::i16: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xffff; break;
3704   case MVT::i32: Opc = AArch64::LSRVWr; break;
3705   case MVT::i64: Opc = AArch64::LSRVXr; break;
3706   }
3707
3708   const TargetRegisterClass *RC =
3709       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
3710   if (NeedTrunc) {
3711     Op0Reg = emitAnd_ri(MVT::i32, Op0Reg, Op0IsKill, Mask);
3712     Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
3713     Op0IsKill = Op1IsKill = true;
3714   }
3715   unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
3716                                        Op1IsKill);
3717   if (NeedTrunc)
3718     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
3719   return ResultReg;
3720 }
3721
3722 unsigned AArch64FastISel::emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
3723                                      bool Op0IsKill, uint64_t Shift,
3724                                      bool IsZExt) {
3725   assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
3726          "Unexpected source/return type pair.");
3727   assert((SrcVT == MVT::i8 || SrcVT == MVT::i16 || SrcVT == MVT::i32 ||
3728           SrcVT == MVT::i64) && "Unexpected source value type.");
3729   assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
3730           RetVT == MVT::i64) && "Unexpected return value type.");
3731
3732   bool Is64Bit = (RetVT == MVT::i64);
3733   unsigned RegSize = Is64Bit ? 64 : 32;
3734   unsigned DstBits = RetVT.getSizeInBits();
3735   unsigned SrcBits = SrcVT.getSizeInBits();
3736
3737   // Don't deal with undefined shifts.
3738   if (Shift >= DstBits)
3739     return 0;
3740
3741   // For immediate shifts we can fold the zero-/sign-extension into the shift.
3742   // {S|U}BFM Wd, Wn, #r, #s
3743   // Wd<s-r:0> = Wn<s:r> when r <= s
3744
3745   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
3746   // %2 = lshr i16 %1, 4
3747   // Wd<7-4:0> = Wn<7:4>
3748   // 0b0000_0000_0000_0000__0000_1111_1111_1010 sext
3749   // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
3750   // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
3751
3752   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
3753   // %2 = lshr i16 %1, 8
3754   // Wd<7-7,0> = Wn<7:7>
3755   // 0b0000_0000_0000_0000__0000_0000_1111_1111 sext
3756   // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
3757   // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
3758
3759   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
3760   // %2 = lshr i16 %1, 12
3761   // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
3762   // 0b0000_0000_0000_0000__0000_0000_0000_1111 sext
3763   // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
3764   // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
3765
3766   if (Shift >= SrcBits && IsZExt)
3767     return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
3768
3769   // It is not possible to fold a sign-extend into the LShr instruction. In this
3770   // case emit a sign-extend.
3771   if (!IsZExt) {
3772     Op0 = emitIntExt(SrcVT, Op0, RetVT, IsZExt);
3773     if (!Op0)
3774       return 0;
3775     Op0IsKill = true;
3776     SrcVT = RetVT;
3777     SrcBits = SrcVT.getSizeInBits();
3778     IsZExt = true;
3779   }
3780
3781   unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
3782   unsigned ImmS = SrcBits - 1;
3783   static const unsigned OpcTable[2][2] = {
3784     {AArch64::SBFMWri, AArch64::SBFMXri},
3785     {AArch64::UBFMWri, AArch64::UBFMXri}
3786   };
3787   unsigned Opc = OpcTable[IsZExt][Is64Bit];
3788   const TargetRegisterClass *RC =
3789       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
3790   if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
3791     unsigned TmpReg = MRI.createVirtualRegister(RC);
3792     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3793             TII.get(AArch64::SUBREG_TO_REG), TmpReg)
3794         .addImm(0)
3795         .addReg(Op0, getKillRegState(Op0IsKill))
3796         .addImm(AArch64::sub_32);
3797     Op0 = TmpReg;
3798     Op0IsKill = true;
3799   }
3800   return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
3801 }
3802
3803 unsigned AArch64FastISel::emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
3804                                      unsigned Op1Reg, bool Op1IsKill) {
3805   unsigned Opc = 0;
3806   bool NeedTrunc = false;
3807   uint64_t Mask = 0;
3808   switch (RetVT.SimpleTy) {
3809   default: return 0;
3810   case MVT::i8:  Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xff;   break;
3811   case MVT::i16: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xffff; break;
3812   case MVT::i32: Opc = AArch64::ASRVWr;                                  break;
3813   case MVT::i64: Opc = AArch64::ASRVXr;                                  break;
3814   }
3815
3816   const TargetRegisterClass *RC =
3817       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
3818   if (NeedTrunc) {
3819     Op0Reg = emitIntExt(RetVT, Op0Reg, MVT::i32, /*IsZExt=*/false);
3820     Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
3821     Op0IsKill = Op1IsKill = true;
3822   }
3823   unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
3824                                        Op1IsKill);
3825   if (NeedTrunc)
3826     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
3827   return ResultReg;
3828 }
3829
3830 unsigned AArch64FastISel::emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
3831                                      bool Op0IsKill, uint64_t Shift,
3832                                      bool IsZExt) {
3833   assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
3834          "Unexpected source/return type pair.");
3835   assert((SrcVT == MVT::i8 || SrcVT == MVT::i16 || SrcVT == MVT::i32 ||
3836           SrcVT == MVT::i64) && "Unexpected source value type.");
3837   assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
3838           RetVT == MVT::i64) && "Unexpected return value type.");
3839
3840   bool Is64Bit = (RetVT == MVT::i64);
3841   unsigned RegSize = Is64Bit ? 64 : 32;
3842   unsigned DstBits = RetVT.getSizeInBits();
3843   unsigned SrcBits = SrcVT.getSizeInBits();
3844
3845   // Don't deal with undefined shifts.
3846   if (Shift >= DstBits)
3847     return 0;
3848
3849   // For immediate shifts we can fold the zero-/sign-extension into the shift.
3850   // {S|U}BFM Wd, Wn, #r, #s
3851   // Wd<s-r:0> = Wn<s:r> when r <= s
3852
3853   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
3854   // %2 = ashr i16 %1, 4
3855   // Wd<7-4:0> = Wn<7:4>
3856   // 0b1111_1111_1111_1111__1111_1111_1111_1010 sext
3857   // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
3858   // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
3859
3860   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
3861   // %2 = ashr i16 %1, 8
3862   // Wd<7-7,0> = Wn<7:7>
3863   // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
3864   // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
3865   // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
3866
3867   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
3868   // %2 = ashr i16 %1, 12
3869   // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
3870   // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
3871   // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
3872   // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
3873
3874   if (Shift >= SrcBits && IsZExt)
3875     return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
3876
3877   unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
3878   unsigned ImmS = SrcBits - 1;
3879   static const unsigned OpcTable[2][2] = {
3880     {AArch64::SBFMWri, AArch64::SBFMXri},
3881     {AArch64::UBFMWri, AArch64::UBFMXri}
3882   };
3883   unsigned Opc = OpcTable[IsZExt][Is64Bit];
3884   const TargetRegisterClass *RC =
3885       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
3886   if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
3887     unsigned TmpReg = MRI.createVirtualRegister(RC);
3888     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3889             TII.get(AArch64::SUBREG_TO_REG), TmpReg)
3890         .addImm(0)
3891         .addReg(Op0, getKillRegState(Op0IsKill))
3892         .addImm(AArch64::sub_32);
3893     Op0 = TmpReg;
3894     Op0IsKill = true;
3895   }
3896   return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
3897 }
3898
3899 unsigned AArch64FastISel::emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
3900                                      bool IsZExt) {
3901   assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?");
3902
3903   // FastISel does not have plumbing to deal with extensions where the SrcVT or
3904   // DestVT are odd things, so test to make sure that they are both types we can
3905   // handle (i1/i8/i16/i32 for SrcVT and i8/i16/i32/i64 for DestVT), otherwise
3906   // bail out to SelectionDAG.
3907   if (((DestVT != MVT::i8) && (DestVT != MVT::i16) &&
3908        (DestVT != MVT::i32) && (DestVT != MVT::i64)) ||
3909       ((SrcVT !=  MVT::i1) && (SrcVT !=  MVT::i8) &&
3910        (SrcVT !=  MVT::i16) && (SrcVT !=  MVT::i32)))
3911     return 0;
3912
3913   unsigned Opc;
3914   unsigned Imm = 0;
3915
3916   switch (SrcVT.SimpleTy) {
3917   default:
3918     return 0;
3919   case MVT::i1:
3920     return emiti1Ext(SrcReg, DestVT, IsZExt);
3921   case MVT::i8:
3922     if (DestVT == MVT::i64)
3923       Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
3924     else
3925       Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
3926     Imm = 7;
3927     break;
3928   case MVT::i16:
3929     if (DestVT == MVT::i64)
3930       Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
3931     else
3932       Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
3933     Imm = 15;
3934     break;
3935   case MVT::i32:
3936     assert(DestVT == MVT::i64 && "IntExt i32 to i32?!?");
3937     Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
3938     Imm = 31;
3939     break;
3940   }
3941
3942   // Handle i8 and i16 as i32.
3943   if (DestVT == MVT::i8 || DestVT == MVT::i16)
3944     DestVT = MVT::i32;
3945   else if (DestVT == MVT::i64) {
3946     unsigned Src64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3947     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3948             TII.get(AArch64::SUBREG_TO_REG), Src64)
3949         .addImm(0)
3950         .addReg(SrcReg)
3951         .addImm(AArch64::sub_32);
3952     SrcReg = Src64;
3953   }
3954
3955   const TargetRegisterClass *RC =
3956       (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
3957   return fastEmitInst_rii(Opc, RC, SrcReg, /*TODO:IsKill=*/false, 0, Imm);
3958 }
3959
3960 bool AArch64FastISel::selectIntExt(const Instruction *I) {
3961   assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
3962          "Unexpected integer extend instruction.");
3963   MVT RetVT;
3964   MVT SrcVT;
3965   if (!isTypeSupported(I->getType(), RetVT))
3966     return false;
3967
3968   if (!isTypeSupported(I->getOperand(0)->getType(), SrcVT))
3969     return false;
3970
3971   unsigned SrcReg = getRegForValue(I->getOperand(0));
3972   if (!SrcReg)
3973     return false;
3974   bool SrcIsKill = hasTrivialKill(I->getOperand(0));
3975
3976   // The load instruction selection code handles the sign-/zero-extension.
3977   if (const auto *LI = dyn_cast<LoadInst>(I->getOperand(0))) {
3978     if (LI->hasOneUse()) {
3979       updateValueMap(I, SrcReg);
3980       return true;
3981     }
3982   }
3983
3984   bool IsZExt = isa<ZExtInst>(I);
3985   if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0))) {
3986     if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) {
3987       if (RetVT == MVT::i64 && SrcVT != MVT::i64) {
3988         unsigned ResultReg = createResultReg(&AArch64::GPR64RegClass);
3989         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3990                 TII.get(AArch64::SUBREG_TO_REG), ResultReg)
3991             .addImm(0)
3992             .addReg(SrcReg, getKillRegState(SrcIsKill))
3993             .addImm(AArch64::sub_32);
3994         SrcReg = ResultReg;
3995       }
3996       updateValueMap(I, SrcReg);
3997       return true;
3998     }
3999   }
4000
4001   unsigned ResultReg = emitIntExt(SrcVT, SrcReg, RetVT, IsZExt);
4002   if (!ResultReg)
4003     return false;
4004
4005   updateValueMap(I, ResultReg);
4006   return true;
4007 }
4008
4009 bool AArch64FastISel::selectRem(const Instruction *I, unsigned ISDOpcode) {
4010   EVT DestEVT = TLI.getValueType(I->getType(), true);
4011   if (!DestEVT.isSimple())
4012     return false;
4013
4014   MVT DestVT = DestEVT.getSimpleVT();
4015   if (DestVT != MVT::i64 && DestVT != MVT::i32)
4016     return false;
4017
4018   unsigned DivOpc;
4019   bool Is64bit = (DestVT == MVT::i64);
4020   switch (ISDOpcode) {
4021   default:
4022     return false;
4023   case ISD::SREM:
4024     DivOpc = Is64bit ? AArch64::SDIVXr : AArch64::SDIVWr;
4025     break;
4026   case ISD::UREM:
4027     DivOpc = Is64bit ? AArch64::UDIVXr : AArch64::UDIVWr;
4028     break;
4029   }
4030   unsigned MSubOpc = Is64bit ? AArch64::MSUBXrrr : AArch64::MSUBWrrr;
4031   unsigned Src0Reg = getRegForValue(I->getOperand(0));
4032   if (!Src0Reg)
4033     return false;
4034   bool Src0IsKill = hasTrivialKill(I->getOperand(0));
4035
4036   unsigned Src1Reg = getRegForValue(I->getOperand(1));
4037   if (!Src1Reg)
4038     return false;
4039   bool Src1IsKill = hasTrivialKill(I->getOperand(1));
4040
4041   const TargetRegisterClass *RC =
4042       (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4043   unsigned QuotReg = fastEmitInst_rr(DivOpc, RC, Src0Reg, /*IsKill=*/false,
4044                                      Src1Reg, /*IsKill=*/false);
4045   assert(QuotReg && "Unexpected DIV instruction emission failure.");
4046   // The remainder is computed as numerator - (quotient * denominator) using the
4047   // MSUB instruction.
4048   unsigned ResultReg = fastEmitInst_rrr(MSubOpc, RC, QuotReg, /*IsKill=*/true,
4049                                         Src1Reg, Src1IsKill, Src0Reg,
4050                                         Src0IsKill);
4051   updateValueMap(I, ResultReg);
4052   return true;
4053 }
4054
4055 bool AArch64FastISel::selectMul(const Instruction *I) {
4056   MVT VT;
4057   if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
4058     return false;
4059
4060   if (VT.isVector())
4061     return selectBinaryOp(I, ISD::MUL);
4062
4063   const Value *Src0 = I->getOperand(0);
4064   const Value *Src1 = I->getOperand(1);
4065   if (const auto *C = dyn_cast<ConstantInt>(Src0))
4066     if (C->getValue().isPowerOf2())
4067       std::swap(Src0, Src1);
4068
4069   // Try to simplify to a shift instruction.
4070   if (const auto *C = dyn_cast<ConstantInt>(Src1))
4071     if (C->getValue().isPowerOf2()) {
4072       uint64_t ShiftVal = C->getValue().logBase2();
4073       MVT SrcVT = VT;
4074       bool IsZExt = true;
4075       if (const auto *ZExt = dyn_cast<ZExtInst>(Src0)) {
4076         if (!isIntExtFree(ZExt)) {
4077           MVT VT;
4078           if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), VT)) {
4079             SrcVT = VT;
4080             IsZExt = true;
4081             Src0 = ZExt->getOperand(0);
4082           }
4083         }
4084       } else if (const auto *SExt = dyn_cast<SExtInst>(Src0)) {
4085         if (!isIntExtFree(SExt)) {
4086           MVT VT;
4087           if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), VT)) {
4088             SrcVT = VT;
4089             IsZExt = false;
4090             Src0 = SExt->getOperand(0);
4091           }
4092         }
4093       }
4094
4095       unsigned Src0Reg = getRegForValue(Src0);
4096       if (!Src0Reg)
4097         return false;
4098       bool Src0IsKill = hasTrivialKill(Src0);
4099
4100       unsigned ResultReg =
4101           emitLSL_ri(VT, SrcVT, Src0Reg, Src0IsKill, ShiftVal, IsZExt);
4102
4103       if (ResultReg) {
4104         updateValueMap(I, ResultReg);
4105         return true;
4106       }
4107     }
4108
4109   unsigned Src0Reg = getRegForValue(I->getOperand(0));
4110   if (!Src0Reg)
4111     return false;
4112   bool Src0IsKill = hasTrivialKill(I->getOperand(0));
4113
4114   unsigned Src1Reg = getRegForValue(I->getOperand(1));
4115   if (!Src1Reg)
4116     return false;
4117   bool Src1IsKill = hasTrivialKill(I->getOperand(1));
4118
4119   unsigned ResultReg = emitMul_rr(VT, Src0Reg, Src0IsKill, Src1Reg, Src1IsKill);
4120
4121   if (!ResultReg)
4122     return false;
4123
4124   updateValueMap(I, ResultReg);
4125   return true;
4126 }
4127
4128 bool AArch64FastISel::selectShift(const Instruction *I) {
4129   MVT RetVT;
4130   if (!isTypeSupported(I->getType(), RetVT, /*IsVectorAllowed=*/true))
4131     return false;
4132
4133   if (RetVT.isVector())
4134     return selectOperator(I, I->getOpcode());
4135
4136   if (const auto *C = dyn_cast<ConstantInt>(I->getOperand(1))) {
4137     unsigned ResultReg = 0;
4138     uint64_t ShiftVal = C->getZExtValue();
4139     MVT SrcVT = RetVT;
4140     bool IsZExt = (I->getOpcode() == Instruction::AShr) ? false : true;
4141     const Value *Op0 = I->getOperand(0);
4142     if (const auto *ZExt = dyn_cast<ZExtInst>(Op0)) {
4143       if (!isIntExtFree(ZExt)) {
4144         MVT TmpVT;
4145         if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), TmpVT)) {
4146           SrcVT = TmpVT;
4147           IsZExt = true;
4148           Op0 = ZExt->getOperand(0);
4149         }
4150       }
4151     } else if (const auto *SExt = dyn_cast<SExtInst>(Op0)) {
4152       if (!isIntExtFree(SExt)) {
4153         MVT TmpVT;
4154         if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), TmpVT)) {
4155           SrcVT = TmpVT;
4156           IsZExt = false;
4157           Op0 = SExt->getOperand(0);
4158         }
4159       }
4160     }
4161
4162     unsigned Op0Reg = getRegForValue(Op0);
4163     if (!Op0Reg)
4164       return false;
4165     bool Op0IsKill = hasTrivialKill(Op0);
4166
4167     switch (I->getOpcode()) {
4168     default: llvm_unreachable("Unexpected instruction.");
4169     case Instruction::Shl:
4170       ResultReg = emitLSL_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
4171       break;
4172     case Instruction::AShr:
4173       ResultReg = emitASR_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
4174       break;
4175     case Instruction::LShr:
4176       ResultReg = emitLSR_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
4177       break;
4178     }
4179     if (!ResultReg)
4180       return false;
4181
4182     updateValueMap(I, ResultReg);
4183     return true;
4184   }
4185
4186   unsigned Op0Reg = getRegForValue(I->getOperand(0));
4187   if (!Op0Reg)
4188     return false;
4189   bool Op0IsKill = hasTrivialKill(I->getOperand(0));
4190
4191   unsigned Op1Reg = getRegForValue(I->getOperand(1));
4192   if (!Op1Reg)
4193     return false;
4194   bool Op1IsKill = hasTrivialKill(I->getOperand(1));
4195
4196   unsigned ResultReg = 0;
4197   switch (I->getOpcode()) {
4198   default: llvm_unreachable("Unexpected instruction.");
4199   case Instruction::Shl:
4200     ResultReg = emitLSL_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
4201     break;
4202   case Instruction::AShr:
4203     ResultReg = emitASR_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
4204     break;
4205   case Instruction::LShr:
4206     ResultReg = emitLSR_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
4207     break;
4208   }
4209
4210   if (!ResultReg)
4211     return false;
4212
4213   updateValueMap(I, ResultReg);
4214   return true;
4215 }
4216
4217 bool AArch64FastISel::selectBitCast(const Instruction *I) {
4218   MVT RetVT, SrcVT;
4219
4220   if (!isTypeLegal(I->getOperand(0)->getType(), SrcVT))
4221     return false;
4222   if (!isTypeLegal(I->getType(), RetVT))
4223     return false;
4224
4225   unsigned Opc;
4226   if (RetVT == MVT::f32 && SrcVT == MVT::i32)
4227     Opc = AArch64::FMOVWSr;
4228   else if (RetVT == MVT::f64 && SrcVT == MVT::i64)
4229     Opc = AArch64::FMOVXDr;
4230   else if (RetVT == MVT::i32 && SrcVT == MVT::f32)
4231     Opc = AArch64::FMOVSWr;
4232   else if (RetVT == MVT::i64 && SrcVT == MVT::f64)
4233     Opc = AArch64::FMOVDXr;
4234   else
4235     return false;
4236
4237   const TargetRegisterClass *RC = nullptr;
4238   switch (RetVT.SimpleTy) {
4239   default: llvm_unreachable("Unexpected value type.");
4240   case MVT::i32: RC = &AArch64::GPR32RegClass; break;
4241   case MVT::i64: RC = &AArch64::GPR64RegClass; break;
4242   case MVT::f32: RC = &AArch64::FPR32RegClass; break;
4243   case MVT::f64: RC = &AArch64::FPR64RegClass; break;
4244   }
4245   unsigned Op0Reg = getRegForValue(I->getOperand(0));
4246   if (!Op0Reg)
4247     return false;
4248   bool Op0IsKill = hasTrivialKill(I->getOperand(0));
4249   unsigned ResultReg = fastEmitInst_r(Opc, RC, Op0Reg, Op0IsKill);
4250
4251   if (!ResultReg)
4252     return false;
4253
4254   updateValueMap(I, ResultReg);
4255   return true;
4256 }
4257
4258 bool AArch64FastISel::selectFRem(const Instruction *I) {
4259   MVT RetVT;
4260   if (!isTypeLegal(I->getType(), RetVT))
4261     return false;
4262
4263   RTLIB::Libcall LC;
4264   switch (RetVT.SimpleTy) {
4265   default:
4266     return false;
4267   case MVT::f32:
4268     LC = RTLIB::REM_F32;
4269     break;
4270   case MVT::f64:
4271     LC = RTLIB::REM_F64;
4272     break;
4273   }
4274
4275   ArgListTy Args;
4276   Args.reserve(I->getNumOperands());
4277
4278   // Populate the argument list.
4279   for (auto &Arg : I->operands()) {
4280     ArgListEntry Entry;
4281     Entry.Val = Arg;
4282     Entry.Ty = Arg->getType();
4283     Args.push_back(Entry);
4284   }
4285
4286   CallLoweringInfo CLI;
4287   CLI.setCallee(TLI.getLibcallCallingConv(LC), I->getType(),
4288                 TLI.getLibcallName(LC), std::move(Args));
4289   if (!lowerCallTo(CLI))
4290     return false;
4291   updateValueMap(I, CLI.ResultReg);
4292   return true;
4293 }
4294
4295 bool AArch64FastISel::selectSDiv(const Instruction *I) {
4296   MVT VT;
4297   if (!isTypeLegal(I->getType(), VT))
4298     return false;
4299
4300   if (!isa<ConstantInt>(I->getOperand(1)))
4301     return selectBinaryOp(I, ISD::SDIV);
4302
4303   const APInt &C = cast<ConstantInt>(I->getOperand(1))->getValue();
4304   if ((VT != MVT::i32 && VT != MVT::i64) || !C ||
4305       !(C.isPowerOf2() || (-C).isPowerOf2()))
4306     return selectBinaryOp(I, ISD::SDIV);
4307
4308   unsigned Lg2 = C.countTrailingZeros();
4309   unsigned Src0Reg = getRegForValue(I->getOperand(0));
4310   if (!Src0Reg)
4311     return false;
4312   bool Src0IsKill = hasTrivialKill(I->getOperand(0));
4313
4314   if (cast<BinaryOperator>(I)->isExact()) {
4315     unsigned ResultReg = emitASR_ri(VT, VT, Src0Reg, Src0IsKill, Lg2);
4316     if (!ResultReg)
4317       return false;
4318     updateValueMap(I, ResultReg);
4319     return true;
4320   }
4321
4322   unsigned Pow2MinusOne = (1 << Lg2) - 1;
4323   unsigned AddReg = emitAddSub_ri(/*UseAdd=*/true, VT, Src0Reg,
4324                                   /*IsKill=*/false, Pow2MinusOne);
4325   if (!AddReg)
4326     return false;
4327
4328   // (Src0 < 0) ? Pow2 - 1 : 0;
4329   if (!emitICmp_ri(VT, Src0Reg, /*IsKill=*/false, 0))
4330     return false;
4331
4332   unsigned SelectOpc;
4333   const TargetRegisterClass *RC;
4334   if (VT == MVT::i64) {
4335     SelectOpc = AArch64::CSELXr;
4336     RC = &AArch64::GPR64RegClass;
4337   } else {
4338     SelectOpc = AArch64::CSELWr;
4339     RC = &AArch64::GPR32RegClass;
4340   }
4341   unsigned SelectReg =
4342       fastEmitInst_rri(SelectOpc, RC, AddReg, /*IsKill=*/true, Src0Reg,
4343                        Src0IsKill, AArch64CC::LT);
4344   if (!SelectReg)
4345     return false;
4346
4347   // Divide by Pow2 --> ashr. If we're dividing by a negative value we must also
4348   // negate the result.
4349   unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
4350   unsigned ResultReg;
4351   if (C.isNegative())
4352     ResultReg = emitAddSub_rs(/*UseAdd=*/false, VT, ZeroReg, /*IsKill=*/true,
4353                               SelectReg, /*IsKill=*/true, AArch64_AM::ASR, Lg2);
4354   else
4355     ResultReg = emitASR_ri(VT, VT, SelectReg, /*IsKill=*/true, Lg2);
4356
4357   if (!ResultReg)
4358     return false;
4359
4360   updateValueMap(I, ResultReg);
4361   return true;
4362 }
4363
4364 bool AArch64FastISel::fastSelectInstruction(const Instruction *I) {
4365   switch (I->getOpcode()) {
4366   default:
4367     break;
4368   case Instruction::Add:
4369   case Instruction::Sub:
4370     return selectAddSub(I);
4371   case Instruction::Mul:
4372     return selectMul(I);
4373   case Instruction::SDiv:
4374     return selectSDiv(I);
4375   case Instruction::SRem:
4376     if (!selectBinaryOp(I, ISD::SREM))
4377       return selectRem(I, ISD::SREM);
4378     return true;
4379   case Instruction::URem:
4380     if (!selectBinaryOp(I, ISD::UREM))
4381       return selectRem(I, ISD::UREM);
4382     return true;
4383   case Instruction::Shl:
4384   case Instruction::LShr:
4385   case Instruction::AShr:
4386     return selectShift(I);
4387   case Instruction::And:
4388   case Instruction::Or:
4389   case Instruction::Xor:
4390     return selectLogicalOp(I);
4391   case Instruction::Br:
4392     return selectBranch(I);
4393   case Instruction::IndirectBr:
4394     return selectIndirectBr(I);
4395   case Instruction::BitCast:
4396     if (!FastISel::selectBitCast(I))
4397       return selectBitCast(I);
4398     return true;
4399   case Instruction::FPToSI:
4400     if (!selectCast(I, ISD::FP_TO_SINT))
4401       return selectFPToInt(I, /*Signed=*/true);
4402     return true;
4403   case Instruction::FPToUI:
4404     return selectFPToInt(I, /*Signed=*/false);
4405   case Instruction::ZExt:
4406   case Instruction::SExt:
4407     return selectIntExt(I);
4408   case Instruction::Trunc:
4409     if (!selectCast(I, ISD::TRUNCATE))
4410       return selectTrunc(I);
4411     return true;
4412   case Instruction::FPExt:
4413     return selectFPExt(I);
4414   case Instruction::FPTrunc:
4415     return selectFPTrunc(I);
4416   case Instruction::SIToFP:
4417     if (!selectCast(I, ISD::SINT_TO_FP))
4418       return selectIntToFP(I, /*Signed=*/true);
4419     return true;
4420   case Instruction::UIToFP:
4421     return selectIntToFP(I, /*Signed=*/false);
4422   case Instruction::Load:
4423     return selectLoad(I);
4424   case Instruction::Store:
4425     return selectStore(I);
4426   case Instruction::FCmp:
4427   case Instruction::ICmp:
4428     return selectCmp(I);
4429   case Instruction::Select:
4430     return selectSelect(I);
4431   case Instruction::Ret:
4432     return selectRet(I);
4433   case Instruction::FRem:
4434     return selectFRem(I);
4435   }
4436
4437   // fall-back to target-independent instruction selection.
4438   return selectOperator(I, I->getOpcode());
4439   // Silence warnings.
4440   (void)&CC_AArch64_DarwinPCS_VarArg;
4441 }
4442
4443 namespace llvm {
4444 llvm::FastISel *AArch64::createFastISel(FunctionLoweringInfo &FuncInfo,
4445                                         const TargetLibraryInfo *LibInfo) {
4446   return new AArch64FastISel(FuncInfo, LibInfo);
4447 }
4448 }