lib/Target/AArch64/AArch64FastISel.cpp

   1 //===-- AArch6464FastISel.cpp - AArch64 FastISel implementation -----------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file defines the AArch64-specific support for the FastISel class. Some
  11 // of the target-specific code is generated by tablegen in the file
  12 // AArch64GenFastISel.inc, which is #included here.
  13 //
  14 //===----------------------------------------------------------------------===//
  15
  16 #include "AArch64.h"
  17 #include "AArch64Subtarget.h"
  18 #include "AArch64TargetMachine.h"
  19 #include "MCTargetDesc/AArch64AddressingModes.h"
  20 #include "llvm/Analysis/BranchProbabilityInfo.h"
  21 #include "llvm/CodeGen/CallingConvLower.h"
  22 #include "llvm/CodeGen/FastISel.h"
  23 #include "llvm/CodeGen/FunctionLoweringInfo.h"
  24 #include "llvm/CodeGen/MachineConstantPool.h"
  25 #include "llvm/CodeGen/MachineFrameInfo.h"
  26 #include "llvm/CodeGen/MachineInstrBuilder.h"
  27 #include "llvm/CodeGen/MachineRegisterInfo.h"
  28 #include "llvm/IR/CallingConv.h"
  29 #include "llvm/IR/DataLayout.h"
  30 #include "llvm/IR/DerivedTypes.h"
  31 #include "llvm/IR/Function.h"
  32 #include "llvm/IR/GetElementPtrTypeIterator.h"
  33 #include "llvm/IR/GlobalAlias.h"
  34 #include "llvm/IR/GlobalVariable.h"
  35 #include "llvm/IR/Instructions.h"
  36 #include "llvm/IR/IntrinsicInst.h"
  37 #include "llvm/IR/Operator.h"
  38 #include "llvm/Support/CommandLine.h"
  39 using namespace llvm;
  40
  41 namespace {
  42
  43 class AArch64FastISel : public FastISel {
  44   class Address {
  45   public:
  46     typedef enum {
  47       RegBase,
  48       FrameIndexBase
  49     } BaseKind;
  50
  51   private:
  52     BaseKind Kind;
  53     AArch64_AM::ShiftExtendType ExtType;
  54     union {
  55       unsigned Reg;
  56       int FI;
  57     } Base;
  58     unsigned OffsetReg;
  59     unsigned Shift;
  60     int64_t Offset;
  61     const GlobalValue *GV;
  62
  63   public:
  64     Address() : Kind(RegBase), ExtType(AArch64_AM::InvalidShiftExtend),
  65       OffsetReg(0), Shift(0), Offset(0), GV(nullptr) { Base.Reg = 0; }
  66     void setKind(BaseKind K) { Kind = K; }
  67     BaseKind getKind() const { return Kind; }
  68     void setExtendType(AArch64_AM::ShiftExtendType E) { ExtType = E; }
  69     AArch64_AM::ShiftExtendType getExtendType() const { return ExtType; }
  70     bool isRegBase() const { return Kind == RegBase; }
  71     bool isFIBase() const { return Kind == FrameIndexBase; }
  72     void setReg(unsigned Reg) {
  73       assert(isRegBase() && "Invalid base register access!");
  74       Base.Reg = Reg;
  75     }
  76     unsigned getReg() const {
  77       assert(isRegBase() && "Invalid base register access!");
  78       return Base.Reg;
  79     }
  80     void setOffsetReg(unsigned Reg) {
  81       assert(isRegBase() && "Invalid offset register access!");
  82       OffsetReg = Reg;
  83     }
  84     unsigned getOffsetReg() const {
  85       assert(isRegBase() && "Invalid offset register access!");
  86       return OffsetReg;
  87     }
  88     void setFI(unsigned FI) {
  89       assert(isFIBase() && "Invalid base frame index  access!");
  90       Base.FI = FI;
  91     }
  92     unsigned getFI() const {
  93       assert(isFIBase() && "Invalid base frame index access!");
  94       return Base.FI;
  95     }
  96     void setOffset(int64_t O) { Offset = O; }
  97     int64_t getOffset() { return Offset; }
  98     void setShift(unsigned S) { Shift = S; }
  99     unsigned getShift() { return Shift; }
 100
 101     void setGlobalValue(const GlobalValue *G) { GV = G; }
 102     const GlobalValue *getGlobalValue() { return GV; }
 103   };
 104
 105   /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
 106   /// make the right decision when generating code for different targets.
 107   const AArch64Subtarget *Subtarget;
 108   LLVMContext *Context;
 109
 110   bool FastLowerArguments() override;
 111   bool FastLowerCall(CallLoweringInfo &CLI) override;
 112   bool FastLowerIntrinsicCall(const IntrinsicInst *II) override;
 113
 114 private:
 115   // Selection routines.
 116   bool SelectLoad(const Instruction *I);
 117   bool SelectStore(const Instruction *I);
 118   bool SelectBranch(const Instruction *I);
 119   bool SelectIndirectBr(const Instruction *I);
 120   bool SelectCmp(const Instruction *I);
 121   bool SelectSelect(const Instruction *I);
 122   bool SelectFPExt(const Instruction *I);
 123   bool SelectFPTrunc(const Instruction *I);
 124   bool SelectFPToInt(const Instruction *I, bool Signed);
 125   bool SelectIntToFP(const Instruction *I, bool Signed);
 126   bool SelectRem(const Instruction *I, unsigned ISDOpcode);
 127   bool SelectRet(const Instruction *I);
 128   bool SelectTrunc(const Instruction *I);
 129   bool SelectIntExt(const Instruction *I);
 130   bool SelectMul(const Instruction *I);
 131   bool SelectShift(const Instruction *I);
 132   bool SelectBitCast(const Instruction *I);
 133
 134   // Utility helper routines.
 135   bool isTypeLegal(Type *Ty, MVT &VT);
 136   bool isLoadStoreTypeLegal(Type *Ty, MVT &VT);
 137   bool ComputeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr);
 138   bool ComputeCallAddress(const Value *V, Address &Addr);
 139   bool SimplifyAddress(Address &Addr, MVT VT);
 140   void AddLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB,
 141                             unsigned Flags, unsigned ScaleFactor,
 142                             MachineMemOperand *MMO);
 143   bool IsMemCpySmall(uint64_t Len, unsigned Alignment);
 144   bool TryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
 145                           unsigned Alignment);
 146   bool foldXALUIntrinsic(AArch64CC::CondCode &CC, const Instruction *I,
 147                          const Value *Cond);
 148
 149   // Emit helper routines.
 150   unsigned emitAddsSubs(bool UseAdds, MVT RetVT, const Value *LHS,
 151                         const Value *RHS, bool IsZExt = false,
 152                         bool WantResult = true);
 153   unsigned emitAddsSubs_rr(bool UseAdds, MVT RetVT, unsigned LHSReg,
 154                            bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
 155                            bool WantResult = true);
 156   unsigned emitAddsSubs_ri(bool UseAdds, MVT RetVT, unsigned LHSReg,
 157                            bool LHSIsKill, uint64_t Imm,
 158                            bool WantResult = true);
 159   unsigned emitAddsSubs_rs(bool UseAdds, MVT RetVT, unsigned LHSReg,
 160                            bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
 161                            AArch64_AM::ShiftExtendType ShiftType,
 162                            uint64_t ShiftImm, bool WantResult = true);
 163   unsigned emitAddsSubs_rx(bool UseAdds, MVT RetVT, unsigned LHSReg,
 164                            bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
 165                            AArch64_AM::ShiftExtendType ExtType,
 166                            uint64_t ShiftImm, bool WantResult = true);
 167
 168   // Emit functions.
 169   bool emitCmp(const Value *LHS, const Value *RHS, bool IsZExt);
 170   bool emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, bool IsZExt);
 171   bool emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm);
 172   bool emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS);
 173   bool EmitLoad(MVT VT, unsigned &ResultReg, Address Addr,
 174                 MachineMemOperand *MMO = nullptr);
 175   bool EmitStore(MVT VT, unsigned SrcReg, Address Addr,
 176                  MachineMemOperand *MMO = nullptr);
 177   unsigned EmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt);
 178   unsigned Emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt);
 179   unsigned emitAdds(MVT RetVT, const Value *LHS, const Value *RHS,
 180                     bool IsZExt = false, bool WantResult = true);
 181   unsigned emitSubs(MVT RetVT, const Value *LHS, const Value *RHS,
 182                     bool IsZExt = false, bool WantResult = true);
 183   unsigned emitSubs_rr(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
 184                        unsigned RHSReg, bool RHSIsKill, bool WantResult = true);
 185   unsigned emitSubs_rs(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
 186                        unsigned RHSReg, bool RHSIsKill,
 187                        AArch64_AM::ShiftExtendType ShiftType, uint64_t ShiftImm,
 188                        bool WantResult = true);
 189   unsigned emitAND_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm);
 190   unsigned Emit_MUL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
 191                        unsigned Op1, bool Op1IsKill);
 192   unsigned Emit_SMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
 193                          unsigned Op1, bool Op1IsKill);
 194   unsigned Emit_UMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
 195                          unsigned Op1, bool Op1IsKill);
 196   unsigned emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
 197                       unsigned Op1Reg, bool Op1IsKill);
 198   unsigned emitLSL_ri(MVT RetVT, unsigned Op0Reg, bool Op0IsKill, uint64_t Imm);
 199   unsigned emitLSR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
 200                       unsigned Op1Reg, bool Op1IsKill);
 201   unsigned emitLSR_ri(MVT RetVT, unsigned Op0Reg, bool Op0IsKill, uint64_t Imm);
 202   unsigned emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
 203                       unsigned Op1Reg, bool Op1IsKill);
 204   unsigned emitASR_ri(MVT RetVT, unsigned Op0Reg, bool Op0IsKill, uint64_t Imm);
 205
 206   unsigned AArch64MaterializeInt(const ConstantInt *CI, MVT VT);
 207   unsigned AArch64MaterializeFP(const ConstantFP *CFP, MVT VT);
 208   unsigned AArch64MaterializeGV(const GlobalValue *GV);
 209
 210   // Call handling routines.
 211 private:
 212   CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const;
 213   bool ProcessCallArgs(CallLoweringInfo &CLI, SmallVectorImpl<MVT> &ArgVTs,
 214                        unsigned &NumBytes);
 215   bool FinishCall(CallLoweringInfo &CLI, MVT RetVT, unsigned NumBytes);
 216
 217 public:
 218   // Backend specific FastISel code.
 219   unsigned TargetMaterializeAlloca(const AllocaInst *AI) override;
 220   unsigned TargetMaterializeConstant(const Constant *C) override;
 221
 222   explicit AArch64FastISel(FunctionLoweringInfo &funcInfo,
 223                          const TargetLibraryInfo *libInfo)
 224       : FastISel(funcInfo, libInfo) {
 225     Subtarget = &TM.getSubtarget<AArch64Subtarget>();
 226     Context = &funcInfo.Fn->getContext();
 227   }
 228
 229   bool TargetSelectInstruction(const Instruction *I) override;
 230
 231 #include "AArch64GenFastISel.inc"
 232 };
 233
 234 } // end anonymous namespace
 235
 236 #include "AArch64GenCallingConv.inc"
 237
 238 CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const {
 239   if (CC == CallingConv::WebKit_JS)
 240     return CC_AArch64_WebKit_JS;
 241   return Subtarget->isTargetDarwin() ? CC_AArch64_DarwinPCS : CC_AArch64_AAPCS;
 242 }
 243
 244 unsigned AArch64FastISel::TargetMaterializeAlloca(const AllocaInst *AI) {
 245   assert(TLI.getValueType(AI->getType(), true) == MVT::i64 &&
 246          "Alloca should always return a pointer.");
 247
 248   // Don't handle dynamic allocas.
 249   if (!FuncInfo.StaticAllocaMap.count(AI))
 250     return 0;
 251
 252   DenseMap<const AllocaInst *, int>::iterator SI =
 253       FuncInfo.StaticAllocaMap.find(AI);
 254
 255   if (SI != FuncInfo.StaticAllocaMap.end()) {
 256     unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass);
 257     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
 258             ResultReg)
 259         .addFrameIndex(SI->second)
 260         .addImm(0)
 261         .addImm(0);
 262     return ResultReg;
 263   }
 264
 265   return 0;
 266 }
 267
 268 unsigned AArch64FastISel::AArch64MaterializeInt(const ConstantInt *CI, MVT VT) {
 269   if (VT > MVT::i64)
 270     return 0;
 271
 272   if (!CI->isZero())
 273     return FastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue());
 274
 275   // Create a copy from the zero register to materialize a "0" value.
 276   const TargetRegisterClass *RC = (VT == MVT::i64) ? &AArch64::GPR64RegClass
 277                                                    : &AArch64::GPR32RegClass;
 278   unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
 279   unsigned ResultReg = createResultReg(RC);
 280   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY),
 281           ResultReg).addReg(ZeroReg, getKillRegState(true));
 282   return ResultReg;
 283 }
 284
 285 unsigned AArch64FastISel::AArch64MaterializeFP(const ConstantFP *CFP, MVT VT) {
 286   if (VT != MVT::f32 && VT != MVT::f64)
 287     return 0;
 288
 289   const APFloat Val = CFP->getValueAPF();
 290   bool Is64Bit = (VT == MVT::f64);
 291
 292   // This checks to see if we can use FMOV instructions to materialize
 293   // a constant, otherwise we have to materialize via the constant pool.
 294   if (TLI.isFPImmLegal(Val, VT)) {
 295     unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
 296     // Positive zero (+0.0) has to be materialized with a fmov from the zero
 297     // register, because the immediate version of fmov cannot encode zero.
 298     if (Val.isPosZero()) {
 299       unsigned ZReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
 300       unsigned Opc = Is64Bit ? AArch64::FMOVXDr : AArch64::FMOVWSr;
 301       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
 302         .addReg(ZReg, getKillRegState(true));
 303       return ResultReg;
 304     }
 305     int Imm = Is64Bit ? AArch64_AM::getFP64Imm(Val)
 306                       : AArch64_AM::getFP32Imm(Val);
 307     unsigned Opc = Is64Bit ? AArch64::FMOVDi : AArch64::FMOVSi;
 308     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
 309       .addImm(Imm);
 310     return ResultReg;
 311   }
 312
 313   // Materialize via constant pool.  MachineConstantPool wants an explicit
 314   // alignment.
 315   unsigned Align = DL.getPrefTypeAlignment(CFP->getType());
 316   if (Align == 0)
 317     Align = DL.getTypeAllocSize(CFP->getType());
 318
 319   unsigned CPI = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align);
 320   unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
 321   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
 322           ADRPReg)
 323     .addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGE);
 324
 325   unsigned Opc = Is64Bit ? AArch64::LDRDui : AArch64::LDRSui;
 326   unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
 327   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
 328     .addReg(ADRPReg)
 329     .addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
 330   return ResultReg;
 331 }
 332
 333 unsigned AArch64FastISel::AArch64MaterializeGV(const GlobalValue *GV) {
 334   // We can't handle thread-local variables quickly yet.
 335   if (GV->isThreadLocal())
 336     return 0;
 337
 338   // MachO still uses GOT for large code-model accesses, but ELF requires
 339   // movz/movk sequences, which FastISel doesn't handle yet.
 340   if (TM.getCodeModel() != CodeModel::Small && !Subtarget->isTargetMachO())
 341     return 0;
 342
 343   unsigned char OpFlags = Subtarget->ClassifyGlobalReference(GV, TM);
 344
 345   EVT DestEVT = TLI.getValueType(GV->getType(), true);
 346   if (!DestEVT.isSimple())
 347     return 0;
 348
 349   unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
 350   unsigned ResultReg;
 351
 352   if (OpFlags & AArch64II::MO_GOT) {
 353     // ADRP + LDRX
 354     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
 355             ADRPReg)
 356       .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGE);
 357
 358     ResultReg = createResultReg(&AArch64::GPR64RegClass);
 359     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::LDRXui),
 360             ResultReg)
 361       .addReg(ADRPReg)
 362       .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGEOFF |
 363                         AArch64II::MO_NC);
 364   } else {
 365     // ADRP + ADDX
 366     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
 367             ADRPReg)
 368       .addGlobalAddress(GV, 0, AArch64II::MO_PAGE);
 369
 370     ResultReg = createResultReg(&AArch64::GPR64spRegClass);
 371     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
 372             ResultReg)
 373       .addReg(ADRPReg)
 374       .addGlobalAddress(GV, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC)
 375       .addImm(0);
 376   }
 377   return ResultReg;
 378 }
 379
 380 unsigned AArch64FastISel::TargetMaterializeConstant(const Constant *C) {
 381   EVT CEVT = TLI.getValueType(C->getType(), true);
 382
 383   // Only handle simple types.
 384   if (!CEVT.isSimple())
 385     return 0;
 386   MVT VT = CEVT.getSimpleVT();
 387
 388   if (const auto *CI = dyn_cast<ConstantInt>(C))
 389     return AArch64MaterializeInt(CI, VT);
 390   else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
 391     return AArch64MaterializeFP(CFP, VT);
 392   else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
 393     return AArch64MaterializeGV(GV);
 394
 395   return 0;
 396 }
 397
 398 // Computes the address to get to an object.
 399 bool AArch64FastISel::ComputeAddress(const Value *Obj, Address &Addr, Type *Ty)
 400 {
 401   const User *U = nullptr;
 402   unsigned Opcode = Instruction::UserOp1;
 403   if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
 404     // Don't walk into other basic blocks unless the object is an alloca from
 405     // another block, otherwise it may not have a virtual register assigned.
 406     if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
 407         FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
 408       Opcode = I->getOpcode();
 409       U = I;
 410     }
 411   } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
 412     Opcode = C->getOpcode();
 413     U = C;
 414   }
 415
 416   if (const PointerType *Ty = dyn_cast<PointerType>(Obj->getType()))
 417     if (Ty->getAddressSpace() > 255)
 418       // Fast instruction selection doesn't support the special
 419       // address spaces.
 420       return false;
 421
 422   switch (Opcode) {
 423   default:
 424     break;
 425   case Instruction::BitCast: {
 426     // Look through bitcasts.
 427     return ComputeAddress(U->getOperand(0), Addr, Ty);
 428   }
 429   case Instruction::IntToPtr: {
 430     // Look past no-op inttoptrs.
 431     if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
 432       return ComputeAddress(U->getOperand(0), Addr, Ty);
 433     break;
 434   }
 435   case Instruction::PtrToInt: {
 436     // Look past no-op ptrtoints.
 437     if (TLI.getValueType(U->getType()) == TLI.getPointerTy())
 438       return ComputeAddress(U->getOperand(0), Addr, Ty);
 439     break;
 440   }
 441   case Instruction::GetElementPtr: {
 442     Address SavedAddr = Addr;
 443     uint64_t TmpOffset = Addr.getOffset();
 444
 445     // Iterate through the GEP folding the constants into offsets where
 446     // we can.
 447     gep_type_iterator GTI = gep_type_begin(U);
 448     for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end(); i != e;
 449          ++i, ++GTI) {
 450       const Value *Op = *i;
 451       if (StructType *STy = dyn_cast<StructType>(*GTI)) {
 452         const StructLayout *SL = DL.getStructLayout(STy);
 453         unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
 454         TmpOffset += SL->getElementOffset(Idx);
 455       } else {
 456         uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
 457         for (;;) {
 458           if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
 459             // Constant-offset addressing.
 460             TmpOffset += CI->getSExtValue() * S;
 461             break;
 462           }
 463           if (canFoldAddIntoGEP(U, Op)) {
 464             // A compatible add with a constant operand. Fold the constant.
 465             ConstantInt *CI =
 466                 cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
 467             TmpOffset += CI->getSExtValue() * S;
 468             // Iterate on the other operand.
 469             Op = cast<AddOperator>(Op)->getOperand(0);
 470             continue;
 471           }
 472           // Unsupported
 473           goto unsupported_gep;
 474         }
 475       }
 476     }
 477
 478     // Try to grab the base operand now.
 479     Addr.setOffset(TmpOffset);
 480     if (ComputeAddress(U->getOperand(0), Addr, Ty))
 481       return true;
 482
 483     // We failed, restore everything and try the other options.
 484     Addr = SavedAddr;
 485
 486   unsupported_gep:
 487     break;
 488   }
 489   case Instruction::Alloca: {
 490     const AllocaInst *AI = cast<AllocaInst>(Obj);
 491     DenseMap<const AllocaInst *, int>::iterator SI =
 492         FuncInfo.StaticAllocaMap.find(AI);
 493     if (SI != FuncInfo.StaticAllocaMap.end()) {
 494       Addr.setKind(Address::FrameIndexBase);
 495       Addr.setFI(SI->second);
 496       return true;
 497     }
 498     break;
 499   }
 500   case Instruction::Add: {
 501     // Adds of constants are common and easy enough.
 502     const Value *LHS = U->getOperand(0);
 503     const Value *RHS = U->getOperand(1);
 504
 505     if (isa<ConstantInt>(LHS))
 506       std::swap(LHS, RHS);
 507
 508     if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
 509       Addr.setOffset(Addr.getOffset() + (uint64_t)CI->getSExtValue());
 510       return ComputeAddress(LHS, Addr, Ty);
 511     }
 512
 513     Address Backup = Addr;
 514     if (ComputeAddress(LHS, Addr, Ty) && ComputeAddress(RHS, Addr, Ty))
 515       return true;
 516     Addr = Backup;
 517
 518     break;
 519   }
 520   case Instruction::Shl:
 521     if (Addr.getOffsetReg())
 522       break;
 523
 524     if (const auto *CI = dyn_cast<ConstantInt>(U->getOperand(1))) {
 525       unsigned Val = CI->getZExtValue();
 526       if (Val < 1 || Val > 3)
 527         break;
 528
 529       uint64_t NumBytes = 0;
 530       if (Ty && Ty->isSized()) {
 531         uint64_t NumBits = DL.getTypeSizeInBits(Ty);
 532         NumBytes = NumBits / 8;
 533         if (!isPowerOf2_64(NumBits))
 534           NumBytes = 0;
 535       }
 536
 537       if (NumBytes != (1ULL << Val))
 538         break;
 539
 540       Addr.setShift(Val);
 541       Addr.setExtendType(AArch64_AM::LSL);
 542
 543       if (const auto *I = dyn_cast<Instruction>(U->getOperand(0)))
 544         if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB)
 545           U = I;
 546
 547       if (const auto *ZE = dyn_cast<ZExtInst>(U))
 548         if (ZE->getOperand(0)->getType()->isIntegerTy(32))
 549           Addr.setExtendType(AArch64_AM::UXTW);
 550
 551       if (const auto *SE = dyn_cast<SExtInst>(U))
 552         if (SE->getOperand(0)->getType()->isIntegerTy(32))
 553           Addr.setExtendType(AArch64_AM::SXTW);
 554
 555       unsigned Reg = getRegForValue(U->getOperand(0));
 556       if (!Reg)
 557         return false;
 558       Addr.setOffsetReg(Reg);
 559       return true;
 560     }
 561     break;
 562   }
 563
 564   if (Addr.getReg()) {
 565     if (!Addr.getOffsetReg()) {
 566       unsigned Reg = getRegForValue(Obj);
 567       if (!Reg)
 568         return false;
 569       Addr.setOffsetReg(Reg);
 570       return true;
 571     }
 572     return false;
 573   }
 574
 575   unsigned Reg = getRegForValue(Obj);
 576   if (!Reg)
 577     return false;
 578   Addr.setReg(Reg);
 579   return true;
 580 }
 581
 582 bool AArch64FastISel::ComputeCallAddress(const Value *V, Address &Addr) {
 583   const User *U = nullptr;
 584   unsigned Opcode = Instruction::UserOp1;
 585   bool InMBB = true;
 586
 587   if (const auto *I = dyn_cast<Instruction>(V)) {
 588     Opcode = I->getOpcode();
 589     U = I;
 590     InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock();
 591   } else if (const auto *C = dyn_cast<ConstantExpr>(V)) {
 592     Opcode = C->getOpcode();
 593     U = C;
 594   }
 595
 596   switch (Opcode) {
 597   default: break;
 598   case Instruction::BitCast:
 599     // Look past bitcasts if its operand is in the same BB.
 600     if (InMBB)
 601       return ComputeCallAddress(U->getOperand(0), Addr);
 602     break;
 603   case Instruction::IntToPtr:
 604     // Look past no-op inttoptrs if its operand is in the same BB.
 605     if (InMBB &&
 606         TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
 607       return ComputeCallAddress(U->getOperand(0), Addr);
 608     break;
 609   case Instruction::PtrToInt:
 610     // Look past no-op ptrtoints if its operand is in the same BB.
 611     if (InMBB &&
 612         TLI.getValueType(U->getType()) == TLI.getPointerTy())
 613       return ComputeCallAddress(U->getOperand(0), Addr);
 614     break;
 615   }
 616
 617   if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
 618     Addr.setGlobalValue(GV);
 619     return true;
 620   }
 621
 622   // If all else fails, try to materialize the value in a register.
 623   if (!Addr.getGlobalValue()) {
 624     Addr.setReg(getRegForValue(V));
 625     return Addr.getReg() != 0;
 626   }
 627
 628   return false;
 629 }
 630
 631
 632 bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) {
 633   EVT evt = TLI.getValueType(Ty, true);
 634
 635   // Only handle simple types.
 636   if (evt == MVT::Other || !evt.isSimple())
 637     return false;
 638   VT = evt.getSimpleVT();
 639
 640   // This is a legal type, but it's not something we handle in fast-isel.
 641   if (VT == MVT::f128)
 642     return false;
 643
 644   // Handle all other legal types, i.e. a register that will directly hold this
 645   // value.
 646   return TLI.isTypeLegal(VT);
 647 }
 648
 649 bool AArch64FastISel::isLoadStoreTypeLegal(Type *Ty, MVT &VT) {
 650   if (isTypeLegal(Ty, VT))
 651     return true;
 652
 653   // If this is a type than can be sign or zero-extended to a basic operation
 654   // go ahead and accept it now. For stores, this reflects truncation.
 655   if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
 656     return true;
 657
 658   return false;
 659 }
 660
 661 bool AArch64FastISel::SimplifyAddress(Address &Addr, MVT VT) {
 662   unsigned ScaleFactor;
 663   switch (VT.SimpleTy) {
 664   default: return false;
 665   case MVT::i1:  // fall-through
 666   case MVT::i8:  ScaleFactor = 1; break;
 667   case MVT::i16: ScaleFactor = 2; break;
 668   case MVT::i32: // fall-through
 669   case MVT::f32: ScaleFactor = 4; break;
 670   case MVT::i64: // fall-through
 671   case MVT::f64: ScaleFactor = 8; break;
 672   }
 673
 674   bool ImmediateOffsetNeedsLowering = false;
 675   bool RegisterOffsetNeedsLowering = false;
 676   int64_t Offset = Addr.getOffset();
 677   if (((Offset < 0) || (Offset & (ScaleFactor - 1))) && !isInt<9>(Offset))
 678     ImmediateOffsetNeedsLowering = true;
 679   else if (Offset > 0 && !(Offset & (ScaleFactor - 1)) &&
 680            !isUInt<12>(Offset / ScaleFactor))
 681     ImmediateOffsetNeedsLowering = true;
 682
 683   // Cannot encode an offset register and an immediate offset in the same
 684   // instruction. Fold the immediate offset into the load/store instruction and
 685   // emit an additonal add to take care of the offset register.
 686   if (!ImmediateOffsetNeedsLowering && Addr.getOffset() && Addr.isRegBase() &&
 687       Addr.getOffsetReg())
 688     RegisterOffsetNeedsLowering = true;
 689
 690   // If this is a stack pointer and the offset needs to be simplified then put
 691   // the alloca address into a register, set the base type back to register and
 692   // continue. This should almost never happen.
 693   if (ImmediateOffsetNeedsLowering && Addr.isFIBase()) {
 694     unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass);
 695     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
 696             ResultReg)
 697       .addFrameIndex(Addr.getFI())
 698       .addImm(0)
 699       .addImm(0);
 700     Addr.setKind(Address::RegBase);
 701     Addr.setReg(ResultReg);
 702   }
 703
 704   if (RegisterOffsetNeedsLowering) {
 705     unsigned ResultReg = 0;
 706     if (Addr.getReg())
 707       ResultReg = FastEmitInst_rri(AArch64::ADDXrs, &AArch64::GPR64RegClass,
 708                                    Addr.getReg(), /*TODO:IsKill=*/false,
 709                                    Addr.getOffsetReg(), /*TODO:IsKill=*/false,
 710                                    Addr.getShift());
 711     else
 712       ResultReg = emitLSL_ri(MVT::i64, Addr.getOffsetReg(), /*Op0IsKill=*/false,
 713                              Addr.getShift());
 714     if (!ResultReg)
 715       return false;
 716
 717     Addr.setReg(ResultReg);
 718     Addr.setOffsetReg(0);
 719     Addr.setShift(0);
 720   }
 721
 722   // Since the offset is too large for the load/store instruction get the
 723   // reg+offset into a register.
 724   if (ImmediateOffsetNeedsLowering) {
 725     unsigned ResultReg = 0;
 726     if (Addr.getReg())
 727       ResultReg = FastEmit_ri_(MVT::i64, ISD::ADD, Addr.getReg(),
 728                                /*IsKill=*/false, Offset, MVT::i64);
 729     else
 730       ResultReg = FastEmit_i(MVT::i64, MVT::i64, ISD::Constant, Offset);
 731
 732     if (!ResultReg)
 733       return false;
 734     Addr.setReg(ResultReg);
 735     Addr.setOffset(0);
 736   }
 737   return true;
 738 }
 739
 740 void AArch64FastISel::AddLoadStoreOperands(Address &Addr,
 741                                            const MachineInstrBuilder &MIB,
 742                                            unsigned Flags,
 743                                            unsigned ScaleFactor,
 744                                            MachineMemOperand *MMO) {
 745   int64_t Offset = Addr.getOffset() / ScaleFactor;
 746   // Frame base works a bit differently. Handle it separately.
 747   if (Addr.isFIBase()) {
 748     int FI = Addr.getFI();
 749     // FIXME: We shouldn't be using getObjectSize/getObjectAlignment.  The size
 750     // and alignment should be based on the VT.
 751     MMO = FuncInfo.MF->getMachineMemOperand(
 752       MachinePointerInfo::getFixedStack(FI, Offset), Flags,
 753       MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
 754     // Now add the rest of the operands.
 755     MIB.addFrameIndex(FI).addImm(Offset);
 756   } else {
 757     assert(Addr.isRegBase() && "Unexpected address kind.");
 758     const MCInstrDesc &II = MIB->getDesc();
 759     unsigned Idx = (Flags & MachineMemOperand::MOStore) ? 1 : 0;
 760     Addr.setReg(
 761       constrainOperandRegClass(II, Addr.getReg(), II.getNumDefs()+Idx));
 762     Addr.setOffsetReg(
 763       constrainOperandRegClass(II, Addr.getOffsetReg(), II.getNumDefs()+Idx+1));
 764     if (Addr.getOffsetReg()) {
 765       assert(Addr.getOffset() == 0 && "Unexpected offset");
 766       bool IsSigned = Addr.getExtendType() == AArch64_AM::SXTW ||
 767                       Addr.getExtendType() == AArch64_AM::SXTX;
 768       MIB.addReg(Addr.getReg());
 769       MIB.addReg(Addr.getOffsetReg());
 770       MIB.addImm(IsSigned);
 771       MIB.addImm(Addr.getShift() != 0);
 772     } else {
 773       MIB.addReg(Addr.getReg());
 774       MIB.addImm(Offset);
 775     }
 776   }
 777
 778   if (MMO)
 779     MIB.addMemOperand(MMO);
 780 }
 781
 782 unsigned AArch64FastISel::emitAddsSubs(bool UseAdds, MVT RetVT,
 783                                        const Value *LHS, const Value *RHS,
 784                                        bool IsZExt, bool WantResult) {
 785   AArch64_AM::ShiftExtendType ExtendType = AArch64_AM::InvalidShiftExtend;
 786   bool NeedExtend = false;
 787   switch (RetVT.SimpleTy) {
 788   default:
 789     return 0;
 790   case MVT::i1:
 791     NeedExtend = true;
 792     break;
 793   case MVT::i8:
 794     NeedExtend = true;
 795     ExtendType = IsZExt ? AArch64_AM::UXTB : AArch64_AM::SXTB;
 796     break;
 797   case MVT::i16:
 798     NeedExtend = true;
 799     ExtendType = IsZExt ? AArch64_AM::UXTH : AArch64_AM::SXTH;
 800     break;
 801   case MVT::i32:  // fall-through
 802   case MVT::i64:
 803     break;
 804   }
 805   MVT SrcVT = RetVT;
 806   RetVT.SimpleTy = std::max(RetVT.SimpleTy, MVT::i32);
 807
 808   // Canonicalize immediates to the RHS first.
 809   if (UseAdds && isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS))
 810     std::swap(LHS, RHS);
 811
 812   // Canonicalize shift immediate to the RHS.
 813   if (UseAdds)
 814     if (const auto *SI = dyn_cast<BinaryOperator>(LHS))
 815       if (isa<ConstantInt>(SI->getOperand(1)))
 816         if (SI->getOpcode() == Instruction::Shl  ||
 817             SI->getOpcode() == Instruction::LShr ||
 818             SI->getOpcode() == Instruction::AShr   )
 819           std::swap(LHS, RHS);
 820
 821   unsigned LHSReg = getRegForValue(LHS);
 822   if (!LHSReg)
 823     return 0;
 824   bool LHSIsKill = hasTrivialKill(LHS);
 825
 826   if (NeedExtend)
 827     LHSReg = EmitIntExt(SrcVT, LHSReg, RetVT, IsZExt);
 828
 829   unsigned ResultReg = 0;
 830   if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
 831     uint64_t Imm = IsZExt ? C->getZExtValue() : C->getSExtValue();
 832     if (C->isNegative())
 833       ResultReg =
 834           emitAddsSubs_ri(!UseAdds, RetVT, LHSReg, LHSIsKill, -Imm, WantResult);
 835     else
 836       ResultReg =
 837           emitAddsSubs_ri(UseAdds, RetVT, LHSReg, LHSIsKill, Imm, WantResult);
 838   }
 839   if (ResultReg)
 840     return ResultReg;
 841
 842   // Only extend the RHS within the instruction if there is a valid extend type.
 843   if (ExtendType != AArch64_AM::InvalidShiftExtend) {
 844     if (const auto *SI = dyn_cast<BinaryOperator>(RHS))
 845       if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1)))
 846         if ((SI->getOpcode() == Instruction::Shl) && (C->getZExtValue() < 4)) {
 847           unsigned RHSReg = getRegForValue(SI->getOperand(0));
 848           if (!RHSReg)
 849             return 0;
 850           bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
 851           return emitAddsSubs_rx(UseAdds, RetVT, LHSReg, LHSIsKill, RHSReg,
 852                                  RHSIsKill, ExtendType, C->getZExtValue(),
 853                                  WantResult);
 854         }
 855     unsigned RHSReg = getRegForValue(RHS);
 856     if (!RHSReg)
 857       return 0;
 858     bool RHSIsKill = hasTrivialKill(RHS);
 859     return emitAddsSubs_rx(UseAdds, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
 860                            ExtendType, 0, WantResult);
 861   }
 862
 863   // Check if the shift can be folded into the instruction.
 864   if (const auto *SI = dyn_cast<BinaryOperator>(RHS)) {
 865     if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
 866       AArch64_AM::ShiftExtendType ShiftType = AArch64_AM::InvalidShiftExtend;
 867       switch (SI->getOpcode()) {
 868       default: break;
 869       case Instruction::Shl:  ShiftType = AArch64_AM::LSL; break;
 870       case Instruction::LShr: ShiftType = AArch64_AM::LSR; break;
 871       case Instruction::AShr: ShiftType = AArch64_AM::ASR; break;
 872       }
 873       uint64_t ShiftVal = C->getZExtValue();
 874       if (ShiftType != AArch64_AM::InvalidShiftExtend) {
 875         unsigned RHSReg = getRegForValue(SI->getOperand(0));
 876         if (!RHSReg)
 877           return 0;
 878         bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
 879         return emitAddsSubs_rs(UseAdds, RetVT, LHSReg, LHSIsKill, RHSReg,
 880                                RHSIsKill, ShiftType, ShiftVal, WantResult);
 881       }
 882     }
 883   }
 884
 885   unsigned RHSReg = getRegForValue(RHS);
 886   if (!RHSReg)
 887     return 0;
 888   bool RHSIsKill = hasTrivialKill(RHS);
 889
 890   if (NeedExtend)
 891     RHSReg = EmitIntExt(SrcVT, RHSReg, RetVT, IsZExt);
 892
 893   return emitAddsSubs_rr(UseAdds, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
 894                          WantResult);
 895 }
 896
 897 unsigned AArch64FastISel::emitAddsSubs_rr(bool UseAdds, MVT RetVT,
 898                                           unsigned LHSReg, bool LHSIsKill,
 899                                           unsigned RHSReg, bool RHSIsKill,
 900                                           bool WantResult) {
 901   assert(LHSReg && RHSReg && "Invalid register number.");
 902
 903   if (RetVT != MVT::i32 && RetVT != MVT::i64)
 904     return 0;
 905
 906   static const unsigned OpcTable[2][2] = {
 907     { AArch64::ADDSWrr, AArch64::ADDSXrr },
 908     { AArch64::SUBSWrr, AArch64::SUBSXrr }
 909   };
 910   unsigned Opc = OpcTable[!UseAdds][(RetVT == MVT::i64)];
 911   unsigned ResultReg;
 912   if (WantResult) {
 913     const TargetRegisterClass *RC =
 914         (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
 915     ResultReg = createResultReg(RC);
 916   } else
 917     ResultReg = (RetVT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
 918
 919   const MCInstrDesc &II = TII.get(Opc);
 920   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
 921   RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
 922   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
 923       .addReg(LHSReg, getKillRegState(LHSIsKill))
 924       .addReg(RHSReg, getKillRegState(RHSIsKill));
 925
 926   return ResultReg;
 927 }
 928
 929 unsigned AArch64FastISel::emitAddsSubs_ri(bool UseAdds, MVT RetVT,
 930                                           unsigned LHSReg, bool LHSIsKill,
 931                                           uint64_t Imm, bool WantResult) {
 932   assert(LHSReg && "Invalid register number.");
 933
 934   if (RetVT != MVT::i32 && RetVT != MVT::i64)
 935     return 0;
 936
 937   unsigned ShiftImm;
 938   if (isUInt<12>(Imm))
 939     ShiftImm = 0;
 940   else if ((Imm & 0xfff000) == Imm) {
 941     ShiftImm = 12;
 942     Imm >>= 12;
 943   } else
 944     return 0;
 945
 946   static const unsigned OpcTable[2][2] = {
 947     { AArch64::ADDSWri, AArch64::ADDSXri },
 948     { AArch64::SUBSWri, AArch64::SUBSXri }
 949   };
 950   unsigned Opc = OpcTable[!UseAdds][(RetVT == MVT::i64)];
 951   unsigned ResultReg;
 952   if (WantResult) {
 953     const TargetRegisterClass *RC =
 954         (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
 955     ResultReg = createResultReg(RC);
 956   } else
 957     ResultReg = (RetVT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
 958
 959   const MCInstrDesc &II = TII.get(Opc);
 960   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
 961   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
 962       .addReg(LHSReg, getKillRegState(LHSIsKill))
 963       .addImm(Imm)
 964       .addImm(getShifterImm(AArch64_AM::LSL, ShiftImm));
 965
 966   return ResultReg;
 967 }
 968
 969 unsigned AArch64FastISel::emitAddsSubs_rs(bool UseAdds, MVT RetVT,
 970                                           unsigned LHSReg, bool LHSIsKill,
 971                                           unsigned RHSReg, bool RHSIsKill,
 972                                           AArch64_AM::ShiftExtendType ShiftType,
 973                                           uint64_t ShiftImm, bool WantResult) {
 974   assert(LHSReg && RHSReg && "Invalid register number.");
 975
 976   if (RetVT != MVT::i32 && RetVT != MVT::i64)
 977     return 0;
 978
 979   static const unsigned OpcTable[2][2] = {
 980     { AArch64::ADDSWrs, AArch64::ADDSXrs },
 981     { AArch64::SUBSWrs, AArch64::SUBSXrs }
 982   };
 983   unsigned Opc = OpcTable[!UseAdds][(RetVT == MVT::i64)];
 984   unsigned ResultReg;
 985   if (WantResult) {
 986     const TargetRegisterClass *RC =
 987         (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
 988     ResultReg = createResultReg(RC);
 989   } else
 990     ResultReg = (RetVT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
 991
 992   const MCInstrDesc &II = TII.get(Opc);
 993   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
 994   RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
 995   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
 996       .addReg(LHSReg, getKillRegState(LHSIsKill))
 997       .addReg(RHSReg, getKillRegState(RHSIsKill))
 998       .addImm(getShifterImm(ShiftType, ShiftImm));
 999
1000   return ResultReg;
1001 }
1002
1003 unsigned AArch64FastISel::emitAddsSubs_rx(bool UseAdds, MVT RetVT,
1004                                           unsigned LHSReg, bool LHSIsKill,
1005                                           unsigned RHSReg, bool RHSIsKill,
1006                                           AArch64_AM::ShiftExtendType ExtType,
1007                                           uint64_t ShiftImm, bool WantResult) {
1008   assert(LHSReg && RHSReg && "Invalid register number.");
1009
1010   if (RetVT != MVT::i32 && RetVT != MVT::i64)
1011     return 0;
1012
1013   static const unsigned OpcTable[2][2] = {
1014     { AArch64::ADDSWrx, AArch64::ADDSXrx },
1015     { AArch64::SUBSWrx, AArch64::SUBSXrx }
1016   };
1017   unsigned Opc = OpcTable[!UseAdds][(RetVT == MVT::i64)];
1018   unsigned ResultReg;
1019   if (WantResult) {
1020     const TargetRegisterClass *RC =
1021         (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1022     ResultReg = createResultReg(RC);
1023   } else
1024     ResultReg = (RetVT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
1025
1026   const MCInstrDesc &II = TII.get(Opc);
1027   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1028   RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1029   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1030       .addReg(LHSReg, getKillRegState(LHSIsKill))
1031       .addReg(RHSReg, getKillRegState(RHSIsKill))
1032       .addImm(getArithExtendImm(ExtType, ShiftImm));
1033
1034   return ResultReg;
1035 }
1036
1037 bool AArch64FastISel::emitCmp(const Value *LHS, const Value *RHS, bool IsZExt) {
1038   Type *Ty = LHS->getType();
1039   EVT EVT = TLI.getValueType(Ty, true);
1040   if (!EVT.isSimple())
1041     return false;
1042   MVT VT = EVT.getSimpleVT();
1043
1044   switch (VT.SimpleTy) {
1045   default:
1046     return false;
1047   case MVT::i1:
1048   case MVT::i8:
1049   case MVT::i16:
1050   case MVT::i32:
1051   case MVT::i64:
1052     return emitICmp(VT, LHS, RHS, IsZExt);
1053   case MVT::f32:
1054   case MVT::f64:
1055     return emitFCmp(VT, LHS, RHS);
1056   }
1057 }
1058
1059 bool AArch64FastISel::emitICmp(MVT RetVT, const Value *LHS, const Value *RHS,
1060                                bool IsZExt) {
1061   return emitSubs(RetVT, LHS, RHS, IsZExt, /*WantResult=*/false) != 0;
1062 }
1063
1064 bool AArch64FastISel::emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
1065                                   uint64_t Imm) {
1066   return emitAddsSubs_ri(false, RetVT, LHSReg, LHSIsKill, Imm,
1067                          /*WantResult=*/false) != 0;
1068 }
1069
1070 bool AArch64FastISel::emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS) {
1071   if (RetVT != MVT::f32 && RetVT != MVT::f64)
1072     return false;
1073
1074   // Check to see if the 2nd operand is a constant that we can encode directly
1075   // in the compare.
1076   bool UseImm = false;
1077   if (const auto *CFP = dyn_cast<ConstantFP>(RHS))
1078     if (CFP->isZero() && !CFP->isNegative())
1079       UseImm = true;
1080
1081   unsigned LHSReg = getRegForValue(LHS);
1082   if (!LHSReg)
1083     return false;
1084   bool LHSIsKill = hasTrivialKill(LHS);
1085
1086   if (UseImm) {
1087     unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDri : AArch64::FCMPSri;
1088     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
1089         .addReg(LHSReg, getKillRegState(LHSIsKill));
1090     return true;
1091   }
1092
1093   unsigned RHSReg = getRegForValue(RHS);
1094   if (!RHSReg)
1095     return false;
1096   bool RHSIsKill = hasTrivialKill(RHS);
1097
1098   unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDrr : AArch64::FCMPSrr;
1099   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
1100       .addReg(LHSReg, getKillRegState(LHSIsKill))
1101       .addReg(RHSReg, getKillRegState(RHSIsKill));
1102   return true;
1103 }
1104
1105 unsigned AArch64FastISel::emitAdds(MVT RetVT, const Value *LHS,
1106                                    const Value *RHS, bool IsZExt,
1107                                    bool WantResult) {
1108   return emitAddsSubs(true, RetVT, LHS, RHS, IsZExt, WantResult);
1109 }
1110
1111 unsigned AArch64FastISel::emitSubs(MVT RetVT, const Value *LHS,
1112                                    const Value *RHS, bool IsZExt,
1113                                    bool WantResult) {
1114   return emitAddsSubs(false, RetVT, LHS, RHS, IsZExt, WantResult);
1115 }
1116
1117 unsigned AArch64FastISel::emitSubs_rr(MVT RetVT, unsigned LHSReg,
1118                                       bool LHSIsKill, unsigned RHSReg,
1119                                       bool RHSIsKill, bool WantResult) {
1120   return emitAddsSubs_rr(false, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
1121                          WantResult);
1122 }
1123
1124 unsigned AArch64FastISel::emitSubs_rs(MVT RetVT, unsigned LHSReg,
1125                                       bool LHSIsKill, unsigned RHSReg,
1126                                       bool RHSIsKill,
1127                                       AArch64_AM::ShiftExtendType ShiftType,
1128                                       uint64_t ShiftImm, bool WantResult) {
1129   return emitAddsSubs_rs(false, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
1130                          ShiftType, ShiftImm, WantResult);
1131 }
1132
1133 // FIXME: This should be eventually generated automatically by tblgen.
1134 unsigned AArch64FastISel::emitAND_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
1135                                      uint64_t Imm) {
1136   const TargetRegisterClass *RC = nullptr;
1137   unsigned Opc = 0;
1138   unsigned RegSize = 0;
1139   switch (RetVT.SimpleTy) {
1140   default:
1141     return 0;
1142   case MVT::i32:
1143     Opc = AArch64::ANDWri;
1144     RC = &AArch64::GPR32spRegClass;
1145     RegSize = 32;
1146     break;
1147   case MVT::i64:
1148     Opc = AArch64::ANDXri;
1149     RC = &AArch64::GPR64spRegClass;
1150     RegSize = 64;
1151     break;
1152   }
1153
1154   if (!AArch64_AM::isLogicalImmediate(Imm, RegSize))
1155     return 0;
1156
1157   return FastEmitInst_ri(Opc, RC, LHSReg, LHSIsKill,
1158                          AArch64_AM::encodeLogicalImmediate(Imm, RegSize));
1159 }
1160
1161 bool AArch64FastISel::EmitLoad(MVT VT, unsigned &ResultReg, Address Addr,
1162                                MachineMemOperand *MMO) {
1163   // Simplify this down to something we can handle.
1164   if (!SimplifyAddress(Addr, VT))
1165     return false;
1166
1167   unsigned ScaleFactor;
1168   switch (VT.SimpleTy) {
1169   default: llvm_unreachable("Unexpected value type.");
1170   case MVT::i1:  // fall-through
1171   case MVT::i8:  ScaleFactor = 1; break;
1172   case MVT::i16: ScaleFactor = 2; break;
1173   case MVT::i32: // fall-through
1174   case MVT::f32: ScaleFactor = 4; break;
1175   case MVT::i64: // fall-through
1176   case MVT::f64: ScaleFactor = 8; break;
1177   }
1178
1179   // Negative offsets require unscaled, 9-bit, signed immediate offsets.
1180   // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
1181   bool UseScaled = true;
1182   if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
1183     UseScaled = false;
1184     ScaleFactor = 1;
1185   }
1186
1187   static const unsigned OpcTable[4][6] = {
1188     { AArch64::LDURBBi,  AArch64::LDURHHi,  AArch64::LDURWi,  AArch64::LDURXi,
1189       AArch64::LDURSi,   AArch64::LDURDi },
1190     { AArch64::LDRBBui,  AArch64::LDRHHui,  AArch64::LDRWui,  AArch64::LDRXui,
1191       AArch64::LDRSui,   AArch64::LDRDui },
1192     { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX, AArch64::LDRXroX,
1193       AArch64::LDRSroX,  AArch64::LDRDroX },
1194     { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW, AArch64::LDRXroW,
1195       AArch64::LDRSroW,  AArch64::LDRDroW }
1196   };
1197
1198   unsigned Opc;
1199   const TargetRegisterClass *RC;
1200   bool VTIsi1 = false;
1201   bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
1202                       Addr.getOffsetReg();
1203   unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
1204   if (Addr.getExtendType() == AArch64_AM::UXTW ||
1205       Addr.getExtendType() == AArch64_AM::SXTW)
1206     Idx++;
1207
1208   switch (VT.SimpleTy) {
1209   default: llvm_unreachable("Unexpected value type.");
1210   case MVT::i1:  VTIsi1 = true; // Intentional fall-through.
1211   case MVT::i8:  Opc = OpcTable[Idx][0]; RC = &AArch64::GPR32RegClass; break;
1212   case MVT::i16: Opc = OpcTable[Idx][1]; RC = &AArch64::GPR32RegClass; break;
1213   case MVT::i32: Opc = OpcTable[Idx][2]; RC = &AArch64::GPR32RegClass; break;
1214   case MVT::i64: Opc = OpcTable[Idx][3]; RC = &AArch64::GPR64RegClass; break;
1215   case MVT::f32: Opc = OpcTable[Idx][4]; RC = &AArch64::FPR32RegClass; break;
1216   case MVT::f64: Opc = OpcTable[Idx][5]; RC = &AArch64::FPR64RegClass; break;
1217   }
1218
1219   // Create the base instruction, then add the operands.
1220   ResultReg = createResultReg(RC);
1221   MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1222                                     TII.get(Opc), ResultReg);
1223   AddLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, ScaleFactor, MMO);
1224
1225   // Loading an i1 requires special handling.
1226   if (VTIsi1) {
1227     unsigned ANDReg = emitAND_ri(MVT::i32, ResultReg, /*IsKill=*/true, 1);
1228     assert(ANDReg && "Unexpected AND instruction emission failure.");
1229     ResultReg = ANDReg;
1230   }
1231   return true;
1232 }
1233
1234 bool AArch64FastISel::SelectLoad(const Instruction *I) {
1235   MVT VT;
1236   // Verify we have a legal type before going any further.  Currently, we handle
1237   // simple types that will directly fit in a register (i32/f32/i64/f64) or
1238   // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
1239   if (!isLoadStoreTypeLegal(I->getType(), VT) || cast<LoadInst>(I)->isAtomic())
1240     return false;
1241
1242   // See if we can handle this address.
1243   Address Addr;
1244   if (!ComputeAddress(I->getOperand(0), Addr, I->getType()))
1245     return false;
1246
1247   unsigned ResultReg;
1248   if (!EmitLoad(VT, ResultReg, Addr, createMachineMemOperandFor(I)))
1249     return false;
1250
1251   UpdateValueMap(I, ResultReg);
1252   return true;
1253 }
1254
1255 bool AArch64FastISel::EmitStore(MVT VT, unsigned SrcReg, Address Addr,
1256                                 MachineMemOperand *MMO) {
1257   // Simplify this down to something we can handle.
1258   if (!SimplifyAddress(Addr, VT))
1259     return false;
1260
1261   unsigned ScaleFactor;
1262   switch (VT.SimpleTy) {
1263   default: llvm_unreachable("Unexpected value type.");
1264   case MVT::i1:  // fall-through
1265   case MVT::i8:  ScaleFactor = 1; break;
1266   case MVT::i16: ScaleFactor = 2; break;
1267   case MVT::i32: // fall-through
1268   case MVT::f32: ScaleFactor = 4; break;
1269   case MVT::i64: // fall-through
1270   case MVT::f64: ScaleFactor = 8; break;
1271   }
1272
1273   // Negative offsets require unscaled, 9-bit, signed immediate offsets.
1274   // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
1275   bool UseScaled = true;
1276   if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
1277     UseScaled = false;
1278     ScaleFactor = 1;
1279   }
1280
1281
1282   static const unsigned OpcTable[4][6] = {
1283     { AArch64::STURBBi,  AArch64::STURHHi,  AArch64::STURWi,  AArch64::STURXi,
1284       AArch64::STURSi,   AArch64::STURDi },
1285     { AArch64::STRBBui,  AArch64::STRHHui,  AArch64::STRWui,  AArch64::STRXui,
1286       AArch64::STRSui,   AArch64::STRDui },
1287     { AArch64::STRBBroX, AArch64::STRHHroX, AArch64::STRWroX, AArch64::STRXroX,
1288       AArch64::STRSroX,  AArch64::STRDroX },
1289     { AArch64::STRBBroW, AArch64::STRHHroW, AArch64::STRWroW, AArch64::STRXroW,
1290       AArch64::STRSroW,  AArch64::STRDroW }
1291
1292   };
1293
1294   unsigned Opc;
1295   bool VTIsi1 = false;
1296   bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
1297                       Addr.getOffsetReg();
1298   unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
1299   if (Addr.getExtendType() == AArch64_AM::UXTW ||
1300       Addr.getExtendType() == AArch64_AM::SXTW)
1301     Idx++;
1302
1303   switch (VT.SimpleTy) {
1304   default: llvm_unreachable("Unexpected value type.");
1305   case MVT::i1:  VTIsi1 = true;
1306   case MVT::i8:  Opc = OpcTable[Idx][0]; break;
1307   case MVT::i16: Opc = OpcTable[Idx][1]; break;
1308   case MVT::i32: Opc = OpcTable[Idx][2]; break;
1309   case MVT::i64: Opc = OpcTable[Idx][3]; break;
1310   case MVT::f32: Opc = OpcTable[Idx][4]; break;
1311   case MVT::f64: Opc = OpcTable[Idx][5]; break;
1312   }
1313
1314   // Storing an i1 requires special handling.
1315   if (VTIsi1) {
1316     unsigned ANDReg = emitAND_ri(MVT::i32, SrcReg, /*TODO:IsKill=*/false, 1);
1317     assert(ANDReg && "Unexpected AND instruction emission failure.");
1318     SrcReg = ANDReg;
1319   }
1320   // Create the base instruction, then add the operands.
1321   const MCInstrDesc &II = TII.get(Opc);
1322   SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs());
1323   MachineInstrBuilder MIB =
1324       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(SrcReg);
1325   AddLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, ScaleFactor, MMO);
1326
1327   return true;
1328 }
1329
1330 bool AArch64FastISel::SelectStore(const Instruction *I) {
1331   MVT VT;
1332   Value *Op0 = I->getOperand(0);
1333   // Verify we have a legal type before going any further.  Currently, we handle
1334   // simple types that will directly fit in a register (i32/f32/i64/f64) or
1335   // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
1336   if (!isLoadStoreTypeLegal(Op0->getType(), VT) ||
1337       cast<StoreInst>(I)->isAtomic())
1338     return false;
1339
1340   // Get the value to be stored into a register.
1341   unsigned SrcReg = getRegForValue(Op0);
1342   if (SrcReg == 0)
1343     return false;
1344
1345   // See if we can handle this address.
1346   Address Addr;
1347   if (!ComputeAddress(I->getOperand(1), Addr, I->getOperand(0)->getType()))
1348     return false;
1349
1350   if (!EmitStore(VT, SrcReg, Addr, createMachineMemOperandFor(I)))
1351     return false;
1352   return true;
1353 }
1354
1355 static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred) {
1356   switch (Pred) {
1357   case CmpInst::FCMP_ONE:
1358   case CmpInst::FCMP_UEQ:
1359   default:
1360     // AL is our "false" for now. The other two need more compares.
1361     return AArch64CC::AL;
1362   case CmpInst::ICMP_EQ:
1363   case CmpInst::FCMP_OEQ:
1364     return AArch64CC::EQ;
1365   case CmpInst::ICMP_SGT:
1366   case CmpInst::FCMP_OGT:
1367     return AArch64CC::GT;
1368   case CmpInst::ICMP_SGE:
1369   case CmpInst::FCMP_OGE:
1370     return AArch64CC::GE;
1371   case CmpInst::ICMP_UGT:
1372   case CmpInst::FCMP_UGT:
1373     return AArch64CC::HI;
1374   case CmpInst::FCMP_OLT:
1375     return AArch64CC::MI;
1376   case CmpInst::ICMP_ULE:
1377   case CmpInst::FCMP_OLE:
1378     return AArch64CC::LS;
1379   case CmpInst::FCMP_ORD:
1380     return AArch64CC::VC;
1381   case CmpInst::FCMP_UNO:
1382     return AArch64CC::VS;
1383   case CmpInst::FCMP_UGE:
1384     return AArch64CC::PL;
1385   case CmpInst::ICMP_SLT:
1386   case CmpInst::FCMP_ULT:
1387     return AArch64CC::LT;
1388   case CmpInst::ICMP_SLE:
1389   case CmpInst::FCMP_ULE:
1390     return AArch64CC::LE;
1391   case CmpInst::FCMP_UNE:
1392   case CmpInst::ICMP_NE:
1393     return AArch64CC::NE;
1394   case CmpInst::ICMP_UGE:
1395     return AArch64CC::HS;
1396   case CmpInst::ICMP_ULT:
1397     return AArch64CC::LO;
1398   }
1399 }
1400
1401 bool AArch64FastISel::SelectBranch(const Instruction *I) {
1402   const BranchInst *BI = cast<BranchInst>(I);
1403   MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
1404   MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
1405
1406   AArch64CC::CondCode CC = AArch64CC::NE;
1407   if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
1408     if (CI->hasOneUse() && (CI->getParent() == I->getParent())) {
1409       // We may not handle every CC for now.
1410       CC = getCompareCC(CI->getPredicate());
1411       if (CC == AArch64CC::AL)
1412         return false;
1413
1414       // Emit the cmp.
1415       if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
1416         return false;
1417
1418       // Emit the branch.
1419       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
1420           .addImm(CC)
1421           .addMBB(TBB);
1422
1423       // Obtain the branch weight and add the TrueBB to the successor list.
1424       uint32_t BranchWeight = 0;
1425       if (FuncInfo.BPI)
1426         BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
1427                                                   TBB->getBasicBlock());
1428       FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
1429
1430       FastEmitBranch(FBB, DbgLoc);
1431       return true;
1432     }
1433   } else if (TruncInst *TI = dyn_cast<TruncInst>(BI->getCondition())) {
1434     MVT SrcVT;
1435     if (TI->hasOneUse() && TI->getParent() == I->getParent() &&
1436         (isLoadStoreTypeLegal(TI->getOperand(0)->getType(), SrcVT))) {
1437       unsigned CondReg = getRegForValue(TI->getOperand(0));
1438       if (!CondReg)
1439         return false;
1440       bool CondIsKill = hasTrivialKill(TI->getOperand(0));
1441
1442       // Issue an extract_subreg to get the lower 32-bits.
1443       if (SrcVT == MVT::i64) {
1444         CondReg = FastEmitInst_extractsubreg(MVT::i32, CondReg, CondIsKill,
1445                                              AArch64::sub_32);
1446         CondIsKill = true;
1447       }
1448
1449       unsigned ANDReg = emitAND_ri(MVT::i32, CondReg, CondIsKill, 1);
1450       assert(ANDReg && "Unexpected AND instruction emission failure.");
1451       emitICmp_ri(MVT::i32, ANDReg, /*IsKill=*/true, 0);
1452
1453       if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
1454         std::swap(TBB, FBB);
1455         CC = AArch64CC::EQ;
1456       }
1457       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
1458           .addImm(CC)
1459           .addMBB(TBB);
1460
1461       // Obtain the branch weight and add the TrueBB to the successor list.
1462       uint32_t BranchWeight = 0;
1463       if (FuncInfo.BPI)
1464         BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
1465                                                   TBB->getBasicBlock());
1466       FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
1467
1468       FastEmitBranch(FBB, DbgLoc);
1469       return true;
1470     }
1471   } else if (const ConstantInt *CI =
1472                  dyn_cast<ConstantInt>(BI->getCondition())) {
1473     uint64_t Imm = CI->getZExtValue();
1474     MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
1475     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::B))
1476         .addMBB(Target);
1477
1478     // Obtain the branch weight and add the target to the successor list.
1479     uint32_t BranchWeight = 0;
1480     if (FuncInfo.BPI)
1481       BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
1482                                                  Target->getBasicBlock());
1483     FuncInfo.MBB->addSuccessor(Target, BranchWeight);
1484     return true;
1485   } else if (foldXALUIntrinsic(CC, I, BI->getCondition())) {
1486     // Fake request the condition, otherwise the intrinsic might be completely
1487     // optimized away.
1488     unsigned CondReg = getRegForValue(BI->getCondition());
1489     if (!CondReg)
1490       return false;
1491
1492     // Emit the branch.
1493     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
1494       .addImm(CC)
1495       .addMBB(TBB);
1496
1497     // Obtain the branch weight and add the TrueBB to the successor list.
1498     uint32_t BranchWeight = 0;
1499     if (FuncInfo.BPI)
1500       BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
1501                                                  TBB->getBasicBlock());
1502     FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
1503
1504     FastEmitBranch(FBB, DbgLoc);
1505     return true;
1506   }
1507
1508   unsigned CondReg = getRegForValue(BI->getCondition());
1509   if (CondReg == 0)
1510     return false;
1511   bool CondRegIsKill = hasTrivialKill(BI->getCondition());
1512
1513   // We've been divorced from our compare!  Our block was split, and
1514   // now our compare lives in a predecessor block.  We musn't
1515   // re-compare here, as the children of the compare aren't guaranteed
1516   // live across the block boundary (we *could* check for this).
1517   // Regardless, the compare has been done in the predecessor block,
1518   // and it left a value for us in a virtual register.  Ergo, we test
1519   // the one-bit value left in the virtual register.
1520   emitICmp_ri(MVT::i32, CondReg, CondRegIsKill, 0);
1521
1522   if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
1523     std::swap(TBB, FBB);
1524     CC = AArch64CC::EQ;
1525   }
1526
1527   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
1528       .addImm(CC)
1529       .addMBB(TBB);
1530
1531   // Obtain the branch weight and add the TrueBB to the successor list.
1532   uint32_t BranchWeight = 0;
1533   if (FuncInfo.BPI)
1534     BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
1535                                                TBB->getBasicBlock());
1536   FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
1537
1538   FastEmitBranch(FBB, DbgLoc);
1539   return true;
1540 }
1541
1542 bool AArch64FastISel::SelectIndirectBr(const Instruction *I) {
1543   const IndirectBrInst *BI = cast<IndirectBrInst>(I);
1544   unsigned AddrReg = getRegForValue(BI->getOperand(0));
1545   if (AddrReg == 0)
1546     return false;
1547
1548   // Emit the indirect branch.
1549   const MCInstrDesc &II = TII.get(AArch64::BR);
1550   AddrReg = constrainOperandRegClass(II, AddrReg,  II.getNumDefs());
1551   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(AddrReg);
1552
1553   // Make sure the CFG is up-to-date.
1554   for (unsigned i = 0, e = BI->getNumSuccessors(); i != e; ++i)
1555     FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[BI->getSuccessor(i)]);
1556
1557   return true;
1558 }
1559
1560 bool AArch64FastISel::SelectCmp(const Instruction *I) {
1561   const CmpInst *CI = cast<CmpInst>(I);
1562
1563   // We may not handle every CC for now.
1564   AArch64CC::CondCode CC = getCompareCC(CI->getPredicate());
1565   if (CC == AArch64CC::AL)
1566     return false;
1567
1568   // Emit the cmp.
1569   if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
1570     return false;
1571
1572   // Now set a register based on the comparison.
1573   AArch64CC::CondCode invertedCC = getInvertedCondCode(CC);
1574   unsigned ResultReg = createResultReg(&AArch64::GPR32RegClass);
1575   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
1576           ResultReg)
1577       .addReg(AArch64::WZR)
1578       .addReg(AArch64::WZR)
1579       .addImm(invertedCC);
1580
1581   UpdateValueMap(I, ResultReg);
1582   return true;
1583 }
1584
1585 bool AArch64FastISel::SelectSelect(const Instruction *I) {
1586   const SelectInst *SI = cast<SelectInst>(I);
1587
1588   EVT DestEVT = TLI.getValueType(SI->getType(), true);
1589   if (!DestEVT.isSimple())
1590     return false;
1591
1592   MVT DestVT = DestEVT.getSimpleVT();
1593   if (DestVT != MVT::i32 && DestVT != MVT::i64 && DestVT != MVT::f32 &&
1594       DestVT != MVT::f64)
1595     return false;
1596
1597   unsigned SelectOpc;
1598   const TargetRegisterClass *RC = nullptr;
1599   switch (DestVT.SimpleTy) {
1600   default: return false;
1601   case MVT::i32:
1602     SelectOpc = AArch64::CSELWr;    RC = &AArch64::GPR32RegClass; break;
1603   case MVT::i64:
1604     SelectOpc = AArch64::CSELXr;    RC = &AArch64::GPR64RegClass; break;
1605   case MVT::f32:
1606     SelectOpc = AArch64::FCSELSrrr; RC = &AArch64::FPR32RegClass; break;
1607   case MVT::f64:
1608     SelectOpc = AArch64::FCSELDrrr; RC = &AArch64::FPR64RegClass; break;
1609   }
1610
1611   const Value *Cond = SI->getCondition();
1612   bool NeedTest = true;
1613   AArch64CC::CondCode CC = AArch64CC::NE;
1614   if (foldXALUIntrinsic(CC, I, Cond))
1615     NeedTest = false;
1616
1617   unsigned CondReg = getRegForValue(Cond);
1618   if (!CondReg)
1619     return false;
1620   bool CondIsKill = hasTrivialKill(Cond);
1621
1622   if (NeedTest) {
1623     unsigned ANDReg = emitAND_ri(MVT::i32, CondReg, CondIsKill, 1);
1624     assert(ANDReg && "Unexpected AND instruction emission failure.");
1625     emitICmp_ri(MVT::i32, ANDReg, /*IsKill=*/true, 0);
1626   }
1627
1628   unsigned TrueReg = getRegForValue(SI->getTrueValue());
1629   bool TrueIsKill = hasTrivialKill(SI->getTrueValue());
1630
1631   unsigned FalseReg = getRegForValue(SI->getFalseValue());
1632   bool FalseIsKill = hasTrivialKill(SI->getFalseValue());
1633
1634   if (!TrueReg || !FalseReg)
1635     return false;
1636
1637   unsigned ResultReg = FastEmitInst_rri(SelectOpc, RC, TrueReg, TrueIsKill,
1638                                         FalseReg, FalseIsKill, CC);
1639   UpdateValueMap(I, ResultReg);
1640   return true;
1641 }
1642
1643 bool AArch64FastISel::SelectFPExt(const Instruction *I) {
1644   Value *V = I->getOperand(0);
1645   if (!I->getType()->isDoubleTy() || !V->getType()->isFloatTy())
1646     return false;
1647
1648   unsigned Op = getRegForValue(V);
1649   if (Op == 0)
1650     return false;
1651
1652   unsigned ResultReg = createResultReg(&AArch64::FPR64RegClass);
1653   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTDSr),
1654           ResultReg).addReg(Op);
1655   UpdateValueMap(I, ResultReg);
1656   return true;
1657 }
1658
1659 bool AArch64FastISel::SelectFPTrunc(const Instruction *I) {
1660   Value *V = I->getOperand(0);
1661   if (!I->getType()->isFloatTy() || !V->getType()->isDoubleTy())
1662     return false;
1663
1664   unsigned Op = getRegForValue(V);
1665   if (Op == 0)
1666     return false;
1667
1668   unsigned ResultReg = createResultReg(&AArch64::FPR32RegClass);
1669   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTSDr),
1670           ResultReg).addReg(Op);
1671   UpdateValueMap(I, ResultReg);
1672   return true;
1673 }
1674
1675 // FPToUI and FPToSI
1676 bool AArch64FastISel::SelectFPToInt(const Instruction *I, bool Signed) {
1677   MVT DestVT;
1678   if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
1679     return false;
1680
1681   unsigned SrcReg = getRegForValue(I->getOperand(0));
1682   if (SrcReg == 0)
1683     return false;
1684
1685   EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType(), true);
1686   if (SrcVT == MVT::f128)
1687     return false;
1688
1689   unsigned Opc;
1690   if (SrcVT == MVT::f64) {
1691     if (Signed)
1692       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWDr : AArch64::FCVTZSUXDr;
1693     else
1694       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWDr : AArch64::FCVTZUUXDr;
1695   } else {
1696     if (Signed)
1697       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWSr : AArch64::FCVTZSUXSr;
1698     else
1699       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWSr : AArch64::FCVTZUUXSr;
1700   }
1701   unsigned ResultReg = createResultReg(
1702       DestVT == MVT::i32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
1703   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
1704       .addReg(SrcReg);
1705   UpdateValueMap(I, ResultReg);
1706   return true;
1707 }
1708
1709 bool AArch64FastISel::SelectIntToFP(const Instruction *I, bool Signed) {
1710   MVT DestVT;
1711   if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
1712     return false;
1713   assert ((DestVT == MVT::f32 || DestVT == MVT::f64) &&
1714           "Unexpected value type.");
1715
1716   unsigned SrcReg = getRegForValue(I->getOperand(0));
1717   if (!SrcReg)
1718     return false;
1719   bool SrcIsKill = hasTrivialKill(I->getOperand(0));
1720
1721   EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType(), true);
1722
1723   // Handle sign-extension.
1724   if (SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) {
1725     SrcReg =
1726         EmitIntExt(SrcVT.getSimpleVT(), SrcReg, MVT::i32, /*isZExt*/ !Signed);
1727     if (!SrcReg)
1728       return false;
1729     SrcIsKill = true;
1730   }
1731
1732   unsigned Opc;
1733   if (SrcVT == MVT::i64) {
1734     if (Signed)
1735       Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUXSri : AArch64::SCVTFUXDri;
1736     else
1737       Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUXSri : AArch64::UCVTFUXDri;
1738   } else {
1739     if (Signed)
1740       Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUWSri : AArch64::SCVTFUWDri;
1741     else
1742       Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUWSri : AArch64::UCVTFUWDri;
1743   }
1744
1745   unsigned ResultReg = FastEmitInst_r(Opc, TLI.getRegClassFor(DestVT), SrcReg,
1746                                       SrcIsKill);
1747   UpdateValueMap(I, ResultReg);
1748   return true;
1749 }
1750
1751 bool AArch64FastISel::FastLowerArguments() {
1752   if (!FuncInfo.CanLowerReturn)
1753     return false;
1754
1755   const Function *F = FuncInfo.Fn;
1756   if (F->isVarArg())
1757     return false;
1758
1759   CallingConv::ID CC = F->getCallingConv();
1760   if (CC != CallingConv::C)
1761     return false;
1762
1763   // Only handle simple cases like i1/i8/i16/i32/i64/f32/f64 of up to 8 GPR and
1764   // FPR each.
1765   unsigned GPRCnt = 0;
1766   unsigned FPRCnt = 0;
1767   unsigned Idx = 0;
1768   for (auto const &Arg : F->args()) {
1769     // The first argument is at index 1.
1770     ++Idx;
1771     if (F->getAttributes().hasAttribute(Idx, Attribute::ByVal) ||
1772         F->getAttributes().hasAttribute(Idx, Attribute::InReg) ||
1773         F->getAttributes().hasAttribute(Idx, Attribute::StructRet) ||
1774         F->getAttributes().hasAttribute(Idx, Attribute::Nest))
1775       return false;
1776
1777     Type *ArgTy = Arg.getType();
1778     if (ArgTy->isStructTy() || ArgTy->isArrayTy() || ArgTy->isVectorTy())
1779       return false;
1780
1781     EVT ArgVT = TLI.getValueType(ArgTy);
1782     if (!ArgVT.isSimple()) return false;
1783     switch (ArgVT.getSimpleVT().SimpleTy) {
1784     default: return false;
1785     case MVT::i1:
1786     case MVT::i8:
1787     case MVT::i16:
1788     case MVT::i32:
1789     case MVT::i64:
1790       ++GPRCnt;
1791       break;
1792     case MVT::f16:
1793     case MVT::f32:
1794     case MVT::f64:
1795       ++FPRCnt;
1796       break;
1797     }
1798
1799     if (GPRCnt > 8 || FPRCnt > 8)
1800       return false;
1801   }
1802
1803   static const MCPhysReg Registers[5][8] = {
1804     { AArch64::W0, AArch64::W1, AArch64::W2, AArch64::W3, AArch64::W4,
1805       AArch64::W5, AArch64::W6, AArch64::W7 },
1806     { AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4,
1807       AArch64::X5, AArch64::X6, AArch64::X7 },
1808     { AArch64::H0, AArch64::H1, AArch64::H2, AArch64::H3, AArch64::H4,
1809       AArch64::H5, AArch64::H6, AArch64::H7 },
1810     { AArch64::S0, AArch64::S1, AArch64::S2, AArch64::S3, AArch64::S4,
1811       AArch64::S5, AArch64::S6, AArch64::S7 },
1812     { AArch64::D0, AArch64::D1, AArch64::D2, AArch64::D3, AArch64::D4,
1813       AArch64::D5, AArch64::D6, AArch64::D7 }
1814   };
1815
1816   unsigned GPRIdx = 0;
1817   unsigned FPRIdx = 0;
1818   for (auto const &Arg : F->args()) {
1819     MVT VT = TLI.getSimpleValueType(Arg.getType());
1820     unsigned SrcReg;
1821     const TargetRegisterClass *RC = nullptr;
1822     switch (VT.SimpleTy) {
1823     default: llvm_unreachable("Unexpected value type.");
1824     case MVT::i1:
1825     case MVT::i8:
1826     case MVT::i16: VT = MVT::i32; // fall-through
1827     case MVT::i32:
1828       SrcReg = Registers[0][GPRIdx++]; RC = &AArch64::GPR32RegClass; break;
1829     case MVT::i64:
1830       SrcReg = Registers[1][GPRIdx++]; RC = &AArch64::GPR64RegClass; break;
1831     case MVT::f16:
1832       SrcReg = Registers[2][FPRIdx++]; RC = &AArch64::FPR16RegClass; break;
1833     case MVT::f32:
1834       SrcReg = Registers[3][FPRIdx++]; RC = &AArch64::FPR32RegClass; break;
1835     case MVT::f64:
1836       SrcReg = Registers[4][FPRIdx++]; RC = &AArch64::FPR64RegClass; break;
1837     }
1838
1839     // Skip unused arguments.
1840     if (Arg.use_empty()) {
1841       UpdateValueMap(&Arg, 0);
1842       continue;
1843     }
1844
1845     unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
1846     // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
1847     // Without this, EmitLiveInCopies may eliminate the livein if its only
1848     // use is a bitcast (which isn't turned into an instruction).
1849     unsigned ResultReg = createResultReg(RC);
1850     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1851             TII.get(TargetOpcode::COPY), ResultReg)
1852         .addReg(DstReg, getKillRegState(true));
1853     UpdateValueMap(&Arg, ResultReg);
1854   }
1855   return true;
1856 }
1857
1858 bool AArch64FastISel::ProcessCallArgs(CallLoweringInfo &CLI,
1859                                       SmallVectorImpl<MVT> &OutVTs,
1860                                       unsigned &NumBytes) {
1861   CallingConv::ID CC = CLI.CallConv;
1862   SmallVector<CCValAssign, 16> ArgLocs;
1863   CCState CCInfo(CC, false, *FuncInfo.MF, ArgLocs, *Context);
1864   CCInfo.AnalyzeCallOperands(OutVTs, CLI.OutFlags, CCAssignFnForCall(CC));
1865
1866   // Get a count of how many bytes are to be pushed on the stack.
1867   NumBytes = CCInfo.getNextStackOffset();
1868
1869   // Issue CALLSEQ_START
1870   unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
1871   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown))
1872     .addImm(NumBytes);
1873
1874   // Process the args.
1875   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
1876     CCValAssign &VA = ArgLocs[i];
1877     const Value *ArgVal = CLI.OutVals[VA.getValNo()];
1878     MVT ArgVT = OutVTs[VA.getValNo()];
1879
1880     unsigned ArgReg = getRegForValue(ArgVal);
1881     if (!ArgReg)
1882       return false;
1883
1884     // Handle arg promotion: SExt, ZExt, AExt.
1885     switch (VA.getLocInfo()) {
1886     case CCValAssign::Full:
1887       break;
1888     case CCValAssign::SExt: {
1889       MVT DestVT = VA.getLocVT();
1890       MVT SrcVT = ArgVT;
1891       ArgReg = EmitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/false);
1892       if (!ArgReg)
1893         return false;
1894       break;
1895     }
1896     case CCValAssign::AExt:
1897     // Intentional fall-through.
1898     case CCValAssign::ZExt: {
1899       MVT DestVT = VA.getLocVT();
1900       MVT SrcVT = ArgVT;
1901       ArgReg = EmitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/true);
1902       if (!ArgReg)
1903         return false;
1904       break;
1905     }
1906     default:
1907       llvm_unreachable("Unknown arg promotion!");
1908     }
1909
1910     // Now copy/store arg to correct locations.
1911     if (VA.isRegLoc() && !VA.needsCustom()) {
1912       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1913               TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg);
1914       CLI.OutRegs.push_back(VA.getLocReg());
1915     } else if (VA.needsCustom()) {
1916       // FIXME: Handle custom args.
1917       return false;
1918     } else {
1919       assert(VA.isMemLoc() && "Assuming store on stack.");
1920
1921       // Don't emit stores for undef values.
1922       if (isa<UndefValue>(ArgVal))
1923         continue;
1924
1925       // Need to store on the stack.
1926       unsigned ArgSize = (ArgVT.getSizeInBits() + 7) / 8;
1927
1928       unsigned BEAlign = 0;
1929       if (ArgSize < 8 && !Subtarget->isLittleEndian())
1930         BEAlign = 8 - ArgSize;
1931
1932       Address Addr;
1933       Addr.setKind(Address::RegBase);
1934       Addr.setReg(AArch64::SP);
1935       Addr.setOffset(VA.getLocMemOffset() + BEAlign);
1936
1937       unsigned Alignment = DL.getABITypeAlignment(ArgVal->getType());
1938       MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
1939         MachinePointerInfo::getStack(Addr.getOffset()),
1940         MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment);
1941
1942       if (!EmitStore(ArgVT, ArgReg, Addr, MMO))
1943         return false;
1944     }
1945   }
1946   return true;
1947 }
1948
1949 bool AArch64FastISel::FinishCall(CallLoweringInfo &CLI, MVT RetVT,
1950                                  unsigned NumBytes) {
1951   CallingConv::ID CC = CLI.CallConv;
1952
1953   // Issue CALLSEQ_END
1954   unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
1955   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp))
1956     .addImm(NumBytes).addImm(0);
1957
1958   // Now the return value.
1959   if (RetVT != MVT::isVoid) {
1960     SmallVector<CCValAssign, 16> RVLocs;
1961     CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
1962     CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC));
1963
1964     // Only handle a single return value.
1965     if (RVLocs.size() != 1)
1966       return false;
1967
1968     // Copy all of the result registers out of their specified physreg.
1969     MVT CopyVT = RVLocs[0].getValVT();
1970     unsigned ResultReg = createResultReg(TLI.getRegClassFor(CopyVT));
1971     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1972             TII.get(TargetOpcode::COPY), ResultReg)
1973         .addReg(RVLocs[0].getLocReg());
1974     CLI.InRegs.push_back(RVLocs[0].getLocReg());
1975
1976     CLI.ResultReg = ResultReg;
1977     CLI.NumResultRegs = 1;
1978   }
1979
1980   return true;
1981 }
1982
1983 bool AArch64FastISel::FastLowerCall(CallLoweringInfo &CLI) {
1984   CallingConv::ID CC  = CLI.CallConv;
1985   bool IsTailCall     = CLI.IsTailCall;
1986   bool IsVarArg       = CLI.IsVarArg;
1987   const Value *Callee = CLI.Callee;
1988   const char *SymName = CLI.SymName;
1989
1990   // Allow SelectionDAG isel to handle tail calls.
1991   if (IsTailCall)
1992     return false;
1993
1994   CodeModel::Model CM = TM.getCodeModel();
1995   // Only support the small and large code model.
1996   if (CM != CodeModel::Small && CM != CodeModel::Large)
1997     return false;
1998
1999   // FIXME: Add large code model support for ELF.
2000   if (CM == CodeModel::Large && !Subtarget->isTargetMachO())
2001     return false;
2002
2003   // Let SDISel handle vararg functions.
2004   if (IsVarArg)
2005     return false;
2006
2007   // FIXME: Only handle *simple* calls for now.
2008   MVT RetVT;
2009   if (CLI.RetTy->isVoidTy())
2010     RetVT = MVT::isVoid;
2011   else if (!isTypeLegal(CLI.RetTy, RetVT))
2012     return false;
2013
2014   for (auto Flag : CLI.OutFlags)
2015     if (Flag.isInReg() || Flag.isSRet() || Flag.isNest() || Flag.isByVal())
2016       return false;
2017
2018   // Set up the argument vectors.
2019   SmallVector<MVT, 16> OutVTs;
2020   OutVTs.reserve(CLI.OutVals.size());
2021
2022   for (auto *Val : CLI.OutVals) {
2023     MVT VT;
2024     if (!isTypeLegal(Val->getType(), VT) &&
2025         !(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16))
2026       return false;
2027
2028     // We don't handle vector parameters yet.
2029     if (VT.isVector() || VT.getSizeInBits() > 64)
2030       return false;
2031
2032     OutVTs.push_back(VT);
2033   }
2034
2035   Address Addr;
2036   if (!ComputeCallAddress(Callee, Addr))
2037     return false;
2038
2039   // Handle the arguments now that we've gotten them.
2040   unsigned NumBytes;
2041   if (!ProcessCallArgs(CLI, OutVTs, NumBytes))
2042     return false;
2043
2044   // Issue the call.
2045   MachineInstrBuilder MIB;
2046   if (CM == CodeModel::Small) {
2047     unsigned CallOpc = Addr.getReg() ? AArch64::BLR : AArch64::BL;
2048     MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CallOpc));
2049     if (SymName)
2050       MIB.addExternalSymbol(SymName, 0);
2051     else if (Addr.getGlobalValue())
2052       MIB.addGlobalAddress(Addr.getGlobalValue(), 0, 0);
2053     else if (Addr.getReg())
2054       MIB.addReg(Addr.getReg());
2055     else
2056       return false;
2057   } else {
2058     unsigned CallReg = 0;
2059     if (SymName) {
2060       unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
2061       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
2062               ADRPReg)
2063         .addExternalSymbol(SymName, AArch64II::MO_GOT | AArch64II::MO_PAGE);
2064
2065       CallReg = createResultReg(&AArch64::GPR64RegClass);
2066       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::LDRXui),
2067               CallReg)
2068         .addReg(ADRPReg)
2069         .addExternalSymbol(SymName, AArch64II::MO_GOT | AArch64II::MO_PAGEOFF |
2070                            AArch64II::MO_NC);
2071     } else if (Addr.getGlobalValue()) {
2072       CallReg = AArch64MaterializeGV(Addr.getGlobalValue());
2073     } else if (Addr.getReg())
2074       CallReg = Addr.getReg();
2075
2076     if (!CallReg)
2077       return false;
2078
2079     MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2080                   TII.get(AArch64::BLR)).addReg(CallReg);
2081   }
2082
2083   // Add implicit physical register uses to the call.
2084   for (auto Reg : CLI.OutRegs)
2085     MIB.addReg(Reg, RegState::Implicit);
2086
2087   // Add a register mask with the call-preserved registers.
2088   // Proper defs for return values will be added by setPhysRegsDeadExcept().
2089   MIB.addRegMask(TRI.getCallPreservedMask(CC));
2090
2091   CLI.Call = MIB;
2092
2093   // Finish off the call including any return values.
2094   return FinishCall(CLI, RetVT, NumBytes);
2095 }
2096
2097 bool AArch64FastISel::IsMemCpySmall(uint64_t Len, unsigned Alignment) {
2098   if (Alignment)
2099     return Len / Alignment <= 4;
2100   else
2101     return Len < 32;
2102 }
2103
2104 bool AArch64FastISel::TryEmitSmallMemCpy(Address Dest, Address Src,
2105                                          uint64_t Len, unsigned Alignment) {
2106   // Make sure we don't bloat code by inlining very large memcpy's.
2107   if (!IsMemCpySmall(Len, Alignment))
2108     return false;
2109
2110   int64_t UnscaledOffset = 0;
2111   Address OrigDest = Dest;
2112   Address OrigSrc = Src;
2113
2114   while (Len) {
2115     MVT VT;
2116     if (!Alignment || Alignment >= 8) {
2117       if (Len >= 8)
2118         VT = MVT::i64;
2119       else if (Len >= 4)
2120         VT = MVT::i32;
2121       else if (Len >= 2)
2122         VT = MVT::i16;
2123       else {
2124         VT = MVT::i8;
2125       }
2126     } else {
2127       // Bound based on alignment.
2128       if (Len >= 4 && Alignment == 4)
2129         VT = MVT::i32;
2130       else if (Len >= 2 && Alignment == 2)
2131         VT = MVT::i16;
2132       else {
2133         VT = MVT::i8;
2134       }
2135     }
2136
2137     bool RV;
2138     unsigned ResultReg;
2139     RV = EmitLoad(VT, ResultReg, Src);
2140     if (!RV)
2141       return false;
2142
2143     RV = EmitStore(VT, ResultReg, Dest);
2144     if (!RV)
2145       return false;
2146
2147     int64_t Size = VT.getSizeInBits() / 8;
2148     Len -= Size;
2149     UnscaledOffset += Size;
2150
2151     // We need to recompute the unscaled offset for each iteration.
2152     Dest.setOffset(OrigDest.getOffset() + UnscaledOffset);
2153     Src.setOffset(OrigSrc.getOffset() + UnscaledOffset);
2154   }
2155
2156   return true;
2157 }
2158
2159 /// \brief Check if it is possible to fold the condition from the XALU intrinsic
2160 /// into the user. The condition code will only be updated on success.
2161 bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC,
2162                                         const Instruction *I,
2163                                         const Value *Cond) {
2164   if (!isa<ExtractValueInst>(Cond))
2165     return false;
2166
2167   const auto *EV = cast<ExtractValueInst>(Cond);
2168   if (!isa<IntrinsicInst>(EV->getAggregateOperand()))
2169     return false;
2170
2171   const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand());
2172   MVT RetVT;
2173   const Function *Callee = II->getCalledFunction();
2174   Type *RetTy =
2175   cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U);
2176   if (!isTypeLegal(RetTy, RetVT))
2177     return false;
2178
2179   if (RetVT != MVT::i32 && RetVT != MVT::i64)
2180     return false;
2181
2182   AArch64CC::CondCode TmpCC;
2183   switch (II->getIntrinsicID()) {
2184     default: return false;
2185     case Intrinsic::sadd_with_overflow:
2186     case Intrinsic::ssub_with_overflow: TmpCC = AArch64CC::VS; break;
2187     case Intrinsic::uadd_with_overflow: TmpCC = AArch64CC::HS; break;
2188     case Intrinsic::usub_with_overflow: TmpCC = AArch64CC::LO; break;
2189     case Intrinsic::smul_with_overflow:
2190     case Intrinsic::umul_with_overflow: TmpCC = AArch64CC::NE; break;
2191   }
2192
2193   // Check if both instructions are in the same basic block.
2194   if (II->getParent() != I->getParent())
2195     return false;
2196
2197   // Make sure nothing is in the way
2198   BasicBlock::const_iterator Start = I;
2199   BasicBlock::const_iterator End = II;
2200   for (auto Itr = std::prev(Start); Itr != End; --Itr) {
2201     // We only expect extractvalue instructions between the intrinsic and the
2202     // instruction to be selected.
2203     if (!isa<ExtractValueInst>(Itr))
2204       return false;
2205
2206     // Check that the extractvalue operand comes from the intrinsic.
2207     const auto *EVI = cast<ExtractValueInst>(Itr);
2208     if (EVI->getAggregateOperand() != II)
2209       return false;
2210   }
2211
2212   CC = TmpCC;
2213   return true;
2214 }
2215
2216 bool AArch64FastISel::FastLowerIntrinsicCall(const IntrinsicInst *II) {
2217   // FIXME: Handle more intrinsics.
2218   switch (II->getIntrinsicID()) {
2219   default: return false;
2220   case Intrinsic::frameaddress: {
2221     MachineFrameInfo *MFI = FuncInfo.MF->getFrameInfo();
2222     MFI->setFrameAddressIsTaken(true);
2223
2224     const AArch64RegisterInfo *RegInfo =
2225         static_cast<const AArch64RegisterInfo *>(
2226             TM.getSubtargetImpl()->getRegisterInfo());
2227     unsigned FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF));
2228     unsigned SrcReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2229     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2230             TII.get(TargetOpcode::COPY), SrcReg).addReg(FramePtr);
2231     // Recursively load frame address
2232     // ldr x0, [fp]
2233     // ldr x0, [x0]
2234     // ldr x0, [x0]
2235     // ...
2236     unsigned DestReg;
2237     unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue();
2238     while (Depth--) {
2239       DestReg = FastEmitInst_ri(AArch64::LDRXui, &AArch64::GPR64RegClass,
2240                                 SrcReg, /*IsKill=*/true, 0);
2241       assert(DestReg && "Unexpected LDR instruction emission failure.");
2242       SrcReg = DestReg;
2243     }
2244
2245     UpdateValueMap(II, SrcReg);
2246     return true;
2247   }
2248   case Intrinsic::memcpy:
2249   case Intrinsic::memmove: {
2250     const auto *MTI = cast<MemTransferInst>(II);
2251     // Don't handle volatile.
2252     if (MTI->isVolatile())
2253       return false;
2254
2255     // Disable inlining for memmove before calls to ComputeAddress.  Otherwise,
2256     // we would emit dead code because we don't currently handle memmoves.
2257     bool IsMemCpy = (II->getIntrinsicID() == Intrinsic::memcpy);
2258     if (isa<ConstantInt>(MTI->getLength()) && IsMemCpy) {
2259       // Small memcpy's are common enough that we want to do them without a call
2260       // if possible.
2261       uint64_t Len = cast<ConstantInt>(MTI->getLength())->getZExtValue();
2262       unsigned Alignment = MTI->getAlignment();
2263       if (IsMemCpySmall(Len, Alignment)) {
2264         Address Dest, Src;
2265         if (!ComputeAddress(MTI->getRawDest(), Dest) ||
2266             !ComputeAddress(MTI->getRawSource(), Src))
2267           return false;
2268         if (TryEmitSmallMemCpy(Dest, Src, Len, Alignment))
2269           return true;
2270       }
2271     }
2272
2273     if (!MTI->getLength()->getType()->isIntegerTy(64))
2274       return false;
2275
2276     if (MTI->getSourceAddressSpace() > 255 || MTI->getDestAddressSpace() > 255)
2277       // Fast instruction selection doesn't support the special
2278       // address spaces.
2279       return false;
2280
2281     const char *IntrMemName = isa<MemCpyInst>(II) ? "memcpy" : "memmove";
2282     return LowerCallTo(II, IntrMemName, II->getNumArgOperands() - 2);
2283   }
2284   case Intrinsic::memset: {
2285     const MemSetInst *MSI = cast<MemSetInst>(II);
2286     // Don't handle volatile.
2287     if (MSI->isVolatile())
2288       return false;
2289
2290     if (!MSI->getLength()->getType()->isIntegerTy(64))
2291       return false;
2292
2293     if (MSI->getDestAddressSpace() > 255)
2294       // Fast instruction selection doesn't support the special
2295       // address spaces.
2296       return false;
2297
2298     return LowerCallTo(II, "memset", II->getNumArgOperands() - 2);
2299   }
2300   case Intrinsic::trap: {
2301     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BRK))
2302         .addImm(1);
2303     return true;
2304   }
2305   case Intrinsic::sqrt: {
2306     Type *RetTy = II->getCalledFunction()->getReturnType();
2307
2308     MVT VT;
2309     if (!isTypeLegal(RetTy, VT))
2310       return false;
2311
2312     unsigned Op0Reg = getRegForValue(II->getOperand(0));
2313     if (!Op0Reg)
2314       return false;
2315     bool Op0IsKill = hasTrivialKill(II->getOperand(0));
2316
2317     unsigned ResultReg = FastEmit_r(VT, VT, ISD::FSQRT, Op0Reg, Op0IsKill);
2318     if (!ResultReg)
2319       return false;
2320
2321     UpdateValueMap(II, ResultReg);
2322     return true;
2323   }
2324   case Intrinsic::sadd_with_overflow:
2325   case Intrinsic::uadd_with_overflow:
2326   case Intrinsic::ssub_with_overflow:
2327   case Intrinsic::usub_with_overflow:
2328   case Intrinsic::smul_with_overflow:
2329   case Intrinsic::umul_with_overflow: {
2330     // This implements the basic lowering of the xalu with overflow intrinsics.
2331     const Function *Callee = II->getCalledFunction();
2332     auto *Ty = cast<StructType>(Callee->getReturnType());
2333     Type *RetTy = Ty->getTypeAtIndex(0U);
2334
2335     MVT VT;
2336     if (!isTypeLegal(RetTy, VT))
2337       return false;
2338
2339     if (VT != MVT::i32 && VT != MVT::i64)
2340       return false;
2341
2342     const Value *LHS = II->getArgOperand(0);
2343     const Value *RHS = II->getArgOperand(1);
2344     // Canonicalize immediate to the RHS.
2345     if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) &&
2346         isCommutativeIntrinsic(II))
2347       std::swap(LHS, RHS);
2348
2349     unsigned ResultReg1 = 0, ResultReg2 = 0, MulReg = 0;
2350     AArch64CC::CondCode CC = AArch64CC::Invalid;
2351     switch (II->getIntrinsicID()) {
2352     default: llvm_unreachable("Unexpected intrinsic!");
2353     case Intrinsic::sadd_with_overflow:
2354       ResultReg1 = emitAdds(VT, LHS, RHS); CC = AArch64CC::VS; break;
2355     case Intrinsic::uadd_with_overflow:
2356       ResultReg1 = emitAdds(VT, LHS, RHS); CC = AArch64CC::HS; break;
2357     case Intrinsic::ssub_with_overflow:
2358       ResultReg1 = emitSubs(VT, LHS, RHS); CC = AArch64CC::VS; break;
2359     case Intrinsic::usub_with_overflow:
2360       ResultReg1 = emitSubs(VT, LHS, RHS); CC = AArch64CC::LO; break;
2361     case Intrinsic::smul_with_overflow: {
2362       CC = AArch64CC::NE;
2363       unsigned LHSReg = getRegForValue(LHS);
2364       if (!LHSReg)
2365         return false;
2366       bool LHSIsKill = hasTrivialKill(LHS);
2367
2368       unsigned RHSReg = getRegForValue(RHS);
2369       if (!RHSReg)
2370         return false;
2371       bool RHSIsKill = hasTrivialKill(RHS);
2372
2373       if (VT == MVT::i32) {
2374         MulReg = Emit_SMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
2375         unsigned ShiftReg = emitLSR_ri(MVT::i64, MulReg, /*IsKill=*/false, 32);
2376         MulReg = FastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true,
2377                                             AArch64::sub_32);
2378         ShiftReg = FastEmitInst_extractsubreg(VT, ShiftReg, /*IsKill=*/true,
2379                                               AArch64::sub_32);
2380         emitSubs_rs(VT, ShiftReg, /*IsKill=*/true, MulReg, /*IsKill=*/false,
2381                     AArch64_AM::ASR, 31, /*WantResult=*/false);
2382       } else {
2383         assert(VT == MVT::i64 && "Unexpected value type.");
2384         MulReg = Emit_MUL_rr(VT, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
2385         unsigned SMULHReg = FastEmit_rr(VT, VT, ISD::MULHS, LHSReg, LHSIsKill,
2386                                         RHSReg, RHSIsKill);
2387         emitSubs_rs(VT, SMULHReg, /*IsKill=*/true, MulReg, /*IsKill=*/false,
2388                     AArch64_AM::ASR, 63, /*WantResult=*/false);
2389       }
2390       break;
2391     }
2392     case Intrinsic::umul_with_overflow: {
2393       CC = AArch64CC::NE;
2394       unsigned LHSReg = getRegForValue(LHS);
2395       if (!LHSReg)
2396         return false;
2397       bool LHSIsKill = hasTrivialKill(LHS);
2398
2399       unsigned RHSReg = getRegForValue(RHS);
2400       if (!RHSReg)
2401         return false;
2402       bool RHSIsKill = hasTrivialKill(RHS);
2403
2404       if (VT == MVT::i32) {
2405         MulReg = Emit_UMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
2406         emitSubs_rs(MVT::i64, AArch64::XZR, /*IsKill=*/true, MulReg,
2407                     /*IsKill=*/false, AArch64_AM::LSR, 32,
2408                     /*WantResult=*/false);
2409         MulReg = FastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true,
2410                                             AArch64::sub_32);
2411       } else {
2412         assert(VT == MVT::i64 && "Unexpected value type.");
2413         MulReg = Emit_MUL_rr(VT, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
2414         unsigned UMULHReg = FastEmit_rr(VT, VT, ISD::MULHU, LHSReg, LHSIsKill,
2415                                         RHSReg, RHSIsKill);
2416         emitSubs_rr(VT, AArch64::XZR, /*IsKill=*/true, UMULHReg,
2417                     /*IsKill=*/false, /*WantResult=*/false);
2418       }
2419       break;
2420     }
2421     }
2422
2423     if (MulReg) {
2424       ResultReg1 = createResultReg(TLI.getRegClassFor(VT));
2425       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2426               TII.get(TargetOpcode::COPY), ResultReg1).addReg(MulReg);
2427     }
2428
2429     ResultReg2 = FastEmitInst_rri(AArch64::CSINCWr, &AArch64::GPR32RegClass,
2430                                   AArch64::WZR, /*IsKill=*/true, AArch64::WZR,
2431                                   /*IsKill=*/true, getInvertedCondCode(CC));
2432     assert((ResultReg1 + 1) == ResultReg2 &&
2433            "Nonconsecutive result registers.");
2434     UpdateValueMap(II, ResultReg1, 2);
2435     return true;
2436   }
2437   }
2438   return false;
2439 }
2440
2441 bool AArch64FastISel::SelectRet(const Instruction *I) {
2442   const ReturnInst *Ret = cast<ReturnInst>(I);
2443   const Function &F = *I->getParent()->getParent();
2444
2445   if (!FuncInfo.CanLowerReturn)
2446     return false;
2447
2448   if (F.isVarArg())
2449     return false;
2450
2451   // Build a list of return value registers.
2452   SmallVector<unsigned, 4> RetRegs;
2453
2454   if (Ret->getNumOperands() > 0) {
2455     CallingConv::ID CC = F.getCallingConv();
2456     SmallVector<ISD::OutputArg, 4> Outs;
2457     GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI);
2458
2459     // Analyze operands of the call, assigning locations to each operand.
2460     SmallVector<CCValAssign, 16> ValLocs;
2461     CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext());
2462     CCAssignFn *RetCC = CC == CallingConv::WebKit_JS ? RetCC_AArch64_WebKit_JS
2463                                                      : RetCC_AArch64_AAPCS;
2464     CCInfo.AnalyzeReturn(Outs, RetCC);
2465
2466     // Only handle a single return value for now.
2467     if (ValLocs.size() != 1)
2468       return false;
2469
2470     CCValAssign &VA = ValLocs[0];
2471     const Value *RV = Ret->getOperand(0);
2472
2473     // Don't bother handling odd stuff for now.
2474     if (VA.getLocInfo() != CCValAssign::Full)
2475       return false;
2476     // Only handle register returns for now.
2477     if (!VA.isRegLoc())
2478       return false;
2479     unsigned Reg = getRegForValue(RV);
2480     if (Reg == 0)
2481       return false;
2482
2483     unsigned SrcReg = Reg + VA.getValNo();
2484     unsigned DestReg = VA.getLocReg();
2485     // Avoid a cross-class copy. This is very unlikely.
2486     if (!MRI.getRegClass(SrcReg)->contains(DestReg))
2487       return false;
2488
2489     EVT RVEVT = TLI.getValueType(RV->getType());
2490     if (!RVEVT.isSimple())
2491       return false;
2492
2493     // Vectors (of > 1 lane) in big endian need tricky handling.
2494     if (RVEVT.isVector() && RVEVT.getVectorNumElements() > 1)
2495       return false;
2496
2497     MVT RVVT = RVEVT.getSimpleVT();
2498     if (RVVT == MVT::f128)
2499       return false;
2500     MVT DestVT = VA.getValVT();
2501     // Special handling for extended integers.
2502     if (RVVT != DestVT) {
2503       if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16)
2504         return false;
2505
2506       if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
2507         return false;
2508
2509       bool isZExt = Outs[0].Flags.isZExt();
2510       SrcReg = EmitIntExt(RVVT, SrcReg, DestVT, isZExt);
2511       if (SrcReg == 0)
2512         return false;
2513     }
2514
2515     // Make the copy.
2516     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2517             TII.get(TargetOpcode::COPY), DestReg).addReg(SrcReg);
2518
2519     // Add register to return instruction.
2520     RetRegs.push_back(VA.getLocReg());
2521   }
2522
2523   MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2524                                     TII.get(AArch64::RET_ReallyLR));
2525   for (unsigned i = 0, e = RetRegs.size(); i != e; ++i)
2526     MIB.addReg(RetRegs[i], RegState::Implicit);
2527   return true;
2528 }
2529
2530 bool AArch64FastISel::SelectTrunc(const Instruction *I) {
2531   Type *DestTy = I->getType();
2532   Value *Op = I->getOperand(0);
2533   Type *SrcTy = Op->getType();
2534
2535   EVT SrcEVT = TLI.getValueType(SrcTy, true);
2536   EVT DestEVT = TLI.getValueType(DestTy, true);
2537   if (!SrcEVT.isSimple())
2538     return false;
2539   if (!DestEVT.isSimple())
2540     return false;
2541
2542   MVT SrcVT = SrcEVT.getSimpleVT();
2543   MVT DestVT = DestEVT.getSimpleVT();
2544
2545   if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 &&
2546       SrcVT != MVT::i8)
2547     return false;
2548   if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8 &&
2549       DestVT != MVT::i1)
2550     return false;
2551
2552   unsigned SrcReg = getRegForValue(Op);
2553   if (!SrcReg)
2554     return false;
2555   bool SrcIsKill = hasTrivialKill(Op);
2556
2557   // If we're truncating from i64 to a smaller non-legal type then generate an
2558   // AND.  Otherwise, we know the high bits are undefined and a truncate doesn't
2559   // generate any code.
2560   if (SrcVT == MVT::i64) {
2561     uint64_t Mask = 0;
2562     switch (DestVT.SimpleTy) {
2563     default:
2564       // Trunc i64 to i32 is handled by the target-independent fast-isel.
2565       return false;
2566     case MVT::i1:
2567       Mask = 0x1;
2568       break;
2569     case MVT::i8:
2570       Mask = 0xff;
2571       break;
2572     case MVT::i16:
2573       Mask = 0xffff;
2574       break;
2575     }
2576     // Issue an extract_subreg to get the lower 32-bits.
2577     unsigned Reg32 = FastEmitInst_extractsubreg(MVT::i32, SrcReg, SrcIsKill,
2578                                                 AArch64::sub_32);
2579     // Create the AND instruction which performs the actual truncation.
2580     unsigned ANDReg = emitAND_ri(MVT::i32, Reg32, /*IsKill=*/true, Mask);
2581     assert(ANDReg && "Unexpected AND instruction emission failure.");
2582     SrcReg = ANDReg;
2583   }
2584
2585   UpdateValueMap(I, SrcReg);
2586   return true;
2587 }
2588
2589 unsigned AArch64FastISel::Emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt) {
2590   assert((DestVT == MVT::i8 || DestVT == MVT::i16 || DestVT == MVT::i32 ||
2591           DestVT == MVT::i64) &&
2592          "Unexpected value type.");
2593   // Handle i8 and i16 as i32.
2594   if (DestVT == MVT::i8 || DestVT == MVT::i16)
2595     DestVT = MVT::i32;
2596
2597   if (isZExt) {
2598     unsigned ResultReg = emitAND_ri(MVT::i32, SrcReg, /*TODO:IsKill=*/false, 1);
2599     assert(ResultReg && "Unexpected AND instruction emission failure.");
2600     if (DestVT == MVT::i64) {
2601       // We're ZExt i1 to i64.  The ANDWri Wd, Ws, #1 implicitly clears the
2602       // upper 32 bits.  Emit a SUBREG_TO_REG to extend from Wd to Xd.
2603       unsigned Reg64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2604       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2605               TII.get(AArch64::SUBREG_TO_REG), Reg64)
2606           .addImm(0)
2607           .addReg(ResultReg)
2608           .addImm(AArch64::sub_32);
2609       ResultReg = Reg64;
2610     }
2611     return ResultReg;
2612   } else {
2613     if (DestVT == MVT::i64) {
2614       // FIXME: We're SExt i1 to i64.
2615       return 0;
2616     }
2617     return FastEmitInst_rii(AArch64::SBFMWri, &AArch64::GPR32RegClass, SrcReg,
2618                             /*TODO:IsKill=*/false, 0, 0);
2619   }
2620 }
2621
2622 unsigned AArch64FastISel::Emit_MUL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
2623                                       unsigned Op1, bool Op1IsKill) {
2624   unsigned Opc, ZReg;
2625   switch (RetVT.SimpleTy) {
2626   default: return 0;
2627   case MVT::i8:
2628   case MVT::i16:
2629   case MVT::i32:
2630     RetVT = MVT::i32;
2631     Opc = AArch64::MADDWrrr; ZReg = AArch64::WZR; break;
2632   case MVT::i64:
2633     Opc = AArch64::MADDXrrr; ZReg = AArch64::XZR; break;
2634   }
2635
2636   const TargetRegisterClass *RC =
2637       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
2638   return FastEmitInst_rrr(Opc, RC, Op0, Op0IsKill, Op1, Op1IsKill,
2639                           /*IsKill=*/ZReg, true);
2640 }
2641
2642 unsigned AArch64FastISel::Emit_SMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
2643                                         unsigned Op1, bool Op1IsKill) {
2644   if (RetVT != MVT::i64)
2645     return 0;
2646
2647   return FastEmitInst_rrr(AArch64::SMADDLrrr, &AArch64::GPR64RegClass,
2648                           Op0, Op0IsKill, Op1, Op1IsKill,
2649                           AArch64::XZR, /*IsKill=*/true);
2650 }
2651
2652 unsigned AArch64FastISel::Emit_UMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
2653                                         unsigned Op1, bool Op1IsKill) {
2654   if (RetVT != MVT::i64)
2655     return 0;
2656
2657   return FastEmitInst_rrr(AArch64::UMADDLrrr, &AArch64::GPR64RegClass,
2658                           Op0, Op0IsKill, Op1, Op1IsKill,
2659                           AArch64::XZR, /*IsKill=*/true);
2660 }
2661
2662 unsigned AArch64FastISel::emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
2663                                      unsigned Op1Reg, bool Op1IsKill) {
2664   unsigned Opc = 0;
2665   bool NeedTrunc = false;
2666   uint64_t Mask = 0;
2667   switch (RetVT.SimpleTy) {
2668   default: return 0;
2669   case MVT::i8:  Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xff;   break;
2670   case MVT::i16: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xffff; break;
2671   case MVT::i32: Opc = AArch64::LSLVWr;                                  break;
2672   case MVT::i64: Opc = AArch64::LSLVXr;                                  break;
2673   }
2674
2675   const TargetRegisterClass *RC =
2676       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
2677   if (NeedTrunc) {
2678     Op1Reg = emitAND_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
2679     Op1IsKill = true;
2680   }
2681   unsigned ResultReg = FastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
2682                                        Op1IsKill);
2683   if (NeedTrunc)
2684     ResultReg = emitAND_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
2685   return ResultReg;
2686 }
2687
2688 unsigned AArch64FastISel::emitLSL_ri(MVT RetVT, unsigned Op0, bool Op0IsKill,
2689                                      uint64_t Shift) {
2690   unsigned Opc, ImmR, ImmS;
2691   switch (RetVT.SimpleTy) {
2692   default: return 0;
2693   case MVT::i8:
2694     Opc = AArch64::UBFMWri; ImmR = -Shift % 32; ImmS =  7 - Shift; break;
2695   case MVT::i16:
2696     Opc = AArch64::UBFMWri; ImmR = -Shift % 32; ImmS = 15 - Shift; break;
2697   case MVT::i32:
2698     Opc = AArch64::UBFMWri; ImmR = -Shift % 32; ImmS = 31 - Shift; break;
2699   case MVT::i64:
2700     Opc = AArch64::UBFMXri; ImmR = -Shift % 64; ImmS = 63 - Shift; break;
2701   }
2702
2703   const TargetRegisterClass *RC =
2704       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
2705   return FastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
2706 }
2707
2708 unsigned AArch64FastISel::emitLSR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
2709                                      unsigned Op1Reg, bool Op1IsKill) {
2710   unsigned Opc = 0;
2711   bool NeedTrunc = false;
2712   uint64_t Mask = 0;
2713   switch (RetVT.SimpleTy) {
2714   default: return 0;
2715   case MVT::i8:  Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xff;   break;
2716   case MVT::i16: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xffff; break;
2717   case MVT::i32: Opc = AArch64::LSRVWr; break;
2718   case MVT::i64: Opc = AArch64::LSRVXr; break;
2719   }
2720
2721   const TargetRegisterClass *RC =
2722       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
2723   if (NeedTrunc) {
2724     Op0Reg = emitAND_ri(MVT::i32, Op0Reg, Op0IsKill, Mask);
2725     Op1Reg = emitAND_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
2726     Op0IsKill = Op1IsKill = true;
2727   }
2728   unsigned ResultReg = FastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
2729                                        Op1IsKill);
2730   if (NeedTrunc)
2731     ResultReg = emitAND_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
2732   return ResultReg;
2733 }
2734
2735 unsigned AArch64FastISel::emitLSR_ri(MVT RetVT, unsigned Op0, bool Op0IsKill,
2736                                      uint64_t Shift) {
2737   unsigned Opc, ImmS;
2738   switch (RetVT.SimpleTy) {
2739   default: return 0;
2740   case MVT::i8:  Opc = AArch64::UBFMWri; ImmS =  7; break;
2741   case MVT::i16: Opc = AArch64::UBFMWri; ImmS = 15; break;
2742   case MVT::i32: Opc = AArch64::UBFMWri; ImmS = 31; break;
2743   case MVT::i64: Opc = AArch64::UBFMXri; ImmS = 63; break;
2744   }
2745
2746   const TargetRegisterClass *RC =
2747       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
2748   return FastEmitInst_rii(Opc, RC, Op0, Op0IsKill, Shift, ImmS);
2749 }
2750
2751 unsigned AArch64FastISel::emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
2752                                      unsigned Op1Reg, bool Op1IsKill) {
2753   unsigned Opc = 0;
2754   bool NeedTrunc = false;
2755   uint64_t Mask = 0;
2756   switch (RetVT.SimpleTy) {
2757   default: return 0;
2758   case MVT::i8:  Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xff;   break;
2759   case MVT::i16: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xffff; break;
2760   case MVT::i32: Opc = AArch64::ASRVWr;                                  break;
2761   case MVT::i64: Opc = AArch64::ASRVXr;                                  break;
2762   }
2763
2764   const TargetRegisterClass *RC =
2765       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
2766   if (NeedTrunc) {
2767     Op0Reg = EmitIntExt(RetVT, Op0Reg, MVT::i32, /*IsZExt=*/false);
2768     Op1Reg = emitAND_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
2769     Op0IsKill = Op1IsKill = true;
2770   }
2771   unsigned ResultReg = FastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
2772                                        Op1IsKill);
2773   if (NeedTrunc)
2774     ResultReg = emitAND_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
2775   return ResultReg;
2776 }
2777
2778 unsigned AArch64FastISel::emitASR_ri(MVT RetVT, unsigned Op0, bool Op0IsKill,
2779                                      uint64_t Shift) {
2780   unsigned Opc, ImmS;
2781   switch (RetVT.SimpleTy) {
2782   default: return 0;
2783   case MVT::i8:  Opc = AArch64::SBFMWri; ImmS =  7; break;
2784   case MVT::i16: Opc = AArch64::SBFMWri; ImmS = 15; break;
2785   case MVT::i32: Opc = AArch64::SBFMWri; ImmS = 31; break;
2786   case MVT::i64: Opc = AArch64::SBFMXri; ImmS = 63; break;
2787   }
2788
2789   const TargetRegisterClass *RC =
2790       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
2791   return FastEmitInst_rii(Opc, RC, Op0, Op0IsKill, Shift, ImmS);
2792 }
2793
2794 unsigned AArch64FastISel::EmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
2795                                      bool isZExt) {
2796   assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?");
2797
2798   // FastISel does not have plumbing to deal with extensions where the SrcVT or
2799   // DestVT are odd things, so test to make sure that they are both types we can
2800   // handle (i1/i8/i16/i32 for SrcVT and i8/i16/i32/i64 for DestVT), otherwise
2801   // bail out to SelectionDAG.
2802   if (((DestVT != MVT::i8) && (DestVT != MVT::i16) &&
2803        (DestVT != MVT::i32) && (DestVT != MVT::i64)) ||
2804       ((SrcVT !=  MVT::i1) && (SrcVT !=  MVT::i8) &&
2805        (SrcVT !=  MVT::i16) && (SrcVT !=  MVT::i32)))
2806     return 0;
2807
2808   unsigned Opc;
2809   unsigned Imm = 0;
2810
2811   switch (SrcVT.SimpleTy) {
2812   default:
2813     return 0;
2814   case MVT::i1:
2815     return Emiti1Ext(SrcReg, DestVT, isZExt);
2816   case MVT::i8:
2817     if (DestVT == MVT::i64)
2818       Opc = isZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
2819     else
2820       Opc = isZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
2821     Imm = 7;
2822     break;
2823   case MVT::i16:
2824     if (DestVT == MVT::i64)
2825       Opc = isZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
2826     else
2827       Opc = isZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
2828     Imm = 15;
2829     break;
2830   case MVT::i32:
2831     assert(DestVT == MVT::i64 && "IntExt i32 to i32?!?");
2832     Opc = isZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
2833     Imm = 31;
2834     break;
2835   }
2836
2837   // Handle i8 and i16 as i32.
2838   if (DestVT == MVT::i8 || DestVT == MVT::i16)
2839     DestVT = MVT::i32;
2840   else if (DestVT == MVT::i64) {
2841     unsigned Src64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2842     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2843             TII.get(AArch64::SUBREG_TO_REG), Src64)
2844         .addImm(0)
2845         .addReg(SrcReg)
2846         .addImm(AArch64::sub_32);
2847     SrcReg = Src64;
2848   }
2849
2850   const TargetRegisterClass *RC =
2851       (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
2852   return FastEmitInst_rii(Opc, RC, SrcReg, /*TODO:IsKill=*/false, 0, Imm);
2853 }
2854
2855 bool AArch64FastISel::SelectIntExt(const Instruction *I) {
2856   // On ARM, in general, integer casts don't involve legal types; this code
2857   // handles promotable integers.  The high bits for a type smaller than
2858   // the register size are assumed to be undefined.
2859   Type *DestTy = I->getType();
2860   Value *Src = I->getOperand(0);
2861   Type *SrcTy = Src->getType();
2862
2863   bool isZExt = isa<ZExtInst>(I);
2864   unsigned SrcReg = getRegForValue(Src);
2865   if (!SrcReg)
2866     return false;
2867
2868   EVT SrcEVT = TLI.getValueType(SrcTy, true);
2869   EVT DestEVT = TLI.getValueType(DestTy, true);
2870   if (!SrcEVT.isSimple())
2871     return false;
2872   if (!DestEVT.isSimple())
2873     return false;
2874
2875   MVT SrcVT = SrcEVT.getSimpleVT();
2876   MVT DestVT = DestEVT.getSimpleVT();
2877   unsigned ResultReg = 0;
2878
2879   // Check if it is an argument and if it is already zero/sign-extended.
2880   if (const auto *Arg = dyn_cast<Argument>(Src)) {
2881     if ((isZExt && Arg->hasZExtAttr()) || (!isZExt && Arg->hasSExtAttr())) {
2882       if (DestVT == MVT::i64) {
2883         ResultReg = createResultReg(TLI.getRegClassFor(DestVT));
2884         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2885                 TII.get(AArch64::SUBREG_TO_REG), ResultReg)
2886           .addImm(0)
2887           .addReg(SrcReg)
2888           .addImm(AArch64::sub_32);
2889       } else
2890         ResultReg = SrcReg;
2891     }
2892   }
2893
2894   if (!ResultReg)
2895     ResultReg = EmitIntExt(SrcVT, SrcReg, DestVT, isZExt);
2896
2897   if (!ResultReg)
2898     return false;
2899
2900   UpdateValueMap(I, ResultReg);
2901   return true;
2902 }
2903
2904 bool AArch64FastISel::SelectRem(const Instruction *I, unsigned ISDOpcode) {
2905   EVT DestEVT = TLI.getValueType(I->getType(), true);
2906   if (!DestEVT.isSimple())
2907     return false;
2908
2909   MVT DestVT = DestEVT.getSimpleVT();
2910   if (DestVT != MVT::i64 && DestVT != MVT::i32)
2911     return false;
2912
2913   unsigned DivOpc;
2914   bool is64bit = (DestVT == MVT::i64);
2915   switch (ISDOpcode) {
2916   default:
2917     return false;
2918   case ISD::SREM:
2919     DivOpc = is64bit ? AArch64::SDIVXr : AArch64::SDIVWr;
2920     break;
2921   case ISD::UREM:
2922     DivOpc = is64bit ? AArch64::UDIVXr : AArch64::UDIVWr;
2923     break;
2924   }
2925   unsigned MSubOpc = is64bit ? AArch64::MSUBXrrr : AArch64::MSUBWrrr;
2926   unsigned Src0Reg = getRegForValue(I->getOperand(0));
2927   if (!Src0Reg)
2928     return false;
2929   bool Src0IsKill = hasTrivialKill(I->getOperand(0));
2930
2931   unsigned Src1Reg = getRegForValue(I->getOperand(1));
2932   if (!Src1Reg)
2933     return false;
2934   bool Src1IsKill = hasTrivialKill(I->getOperand(1));
2935
2936   const TargetRegisterClass *RC =
2937       (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
2938   unsigned QuotReg = FastEmitInst_rr(DivOpc, RC, Src0Reg, /*IsKill=*/false,
2939                                      Src1Reg, /*IsKill=*/false);
2940   assert(QuotReg && "Unexpected DIV instruction emission failure.");
2941   // The remainder is computed as numerator - (quotient * denominator) using the
2942   // MSUB instruction.
2943   unsigned ResultReg = FastEmitInst_rrr(MSubOpc, RC, QuotReg, /*IsKill=*/true,
2944                                         Src1Reg, Src1IsKill, Src0Reg,
2945                                         Src0IsKill);
2946   UpdateValueMap(I, ResultReg);
2947   return true;
2948 }
2949
2950 bool AArch64FastISel::SelectMul(const Instruction *I) {
2951   EVT SrcEVT = TLI.getValueType(I->getOperand(0)->getType(), true);
2952   if (!SrcEVT.isSimple())
2953     return false;
2954   MVT SrcVT = SrcEVT.getSimpleVT();
2955
2956   // Must be simple value type.  Don't handle vectors.
2957   if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 &&
2958       SrcVT != MVT::i8)
2959     return false;
2960
2961   unsigned Src0Reg = getRegForValue(I->getOperand(0));
2962   if (!Src0Reg)
2963     return false;
2964   bool Src0IsKill = hasTrivialKill(I->getOperand(0));
2965
2966   unsigned Src1Reg = getRegForValue(I->getOperand(1));
2967   if (!Src1Reg)
2968     return false;
2969   bool Src1IsKill = hasTrivialKill(I->getOperand(1));
2970
2971   unsigned ResultReg =
2972     Emit_MUL_rr(SrcVT, Src0Reg, Src0IsKill, Src1Reg, Src1IsKill);
2973
2974   if (!ResultReg)
2975     return false;
2976
2977   UpdateValueMap(I, ResultReg);
2978   return true;
2979 }
2980
2981 bool AArch64FastISel::SelectShift(const Instruction *I) {
2982   EVT RetEVT = TLI.getValueType(I->getType(), true);
2983   if (!RetEVT.isSimple())
2984     return false;
2985   MVT RetVT = RetEVT.getSimpleVT();
2986
2987   unsigned Op0Reg = getRegForValue(I->getOperand(0));
2988   if (!Op0Reg)
2989     return false;
2990   bool Op0IsKill = hasTrivialKill(I->getOperand(0));
2991
2992   if (const auto *C = dyn_cast<ConstantInt>(I->getOperand(1))) {
2993     unsigned ResultReg = 0;
2994     uint64_t ShiftVal = C->getZExtValue();
2995     switch (I->getOpcode()) {
2996     default: llvm_unreachable("Unexpected instruction.");
2997     case Instruction::Shl:
2998       ResultReg = emitLSL_ri(RetVT, Op0Reg, Op0IsKill, ShiftVal);
2999       break;
3000     case Instruction::AShr:
3001       ResultReg = emitASR_ri(RetVT, Op0Reg, Op0IsKill, ShiftVal);
3002       break;
3003     case Instruction::LShr:
3004       ResultReg = emitLSR_ri(RetVT, Op0Reg, Op0IsKill, ShiftVal);
3005       break;
3006     }
3007     if (!ResultReg)
3008       return false;
3009
3010     UpdateValueMap(I, ResultReg);
3011     return true;
3012   }
3013
3014   unsigned Op1Reg = getRegForValue(I->getOperand(1));
3015   if (!Op1Reg)
3016     return false;
3017   bool Op1IsKill = hasTrivialKill(I->getOperand(1));
3018
3019   unsigned ResultReg = 0;
3020   switch (I->getOpcode()) {
3021   default: llvm_unreachable("Unexpected instruction.");
3022   case Instruction::Shl:
3023     ResultReg = emitLSL_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
3024     break;
3025   case Instruction::AShr:
3026     ResultReg = emitASR_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
3027     break;
3028   case Instruction::LShr:
3029     ResultReg = emitLSR_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
3030     break;
3031   }
3032
3033   if (!ResultReg)
3034     return false;
3035
3036   UpdateValueMap(I, ResultReg);
3037   return true;
3038 }
3039
3040 bool AArch64FastISel::SelectBitCast(const Instruction *I) {
3041   MVT RetVT, SrcVT;
3042
3043   if (!isTypeLegal(I->getOperand(0)->getType(), SrcVT))
3044     return false;
3045   if (!isTypeLegal(I->getType(), RetVT))
3046     return false;
3047
3048   unsigned Opc;
3049   if (RetVT == MVT::f32 && SrcVT == MVT::i32)
3050     Opc = AArch64::FMOVWSr;
3051   else if (RetVT == MVT::f64 && SrcVT == MVT::i64)
3052     Opc = AArch64::FMOVXDr;
3053   else if (RetVT == MVT::i32 && SrcVT == MVT::f32)
3054     Opc = AArch64::FMOVSWr;
3055   else if (RetVT == MVT::i64 && SrcVT == MVT::f64)
3056     Opc = AArch64::FMOVDXr;
3057   else
3058     return false;
3059
3060   const TargetRegisterClass *RC = nullptr;
3061   switch (RetVT.SimpleTy) {
3062   default: llvm_unreachable("Unexpected value type.");
3063   case MVT::i32: RC = &AArch64::GPR32RegClass; break;
3064   case MVT::i64: RC = &AArch64::GPR64RegClass; break;
3065   case MVT::f32: RC = &AArch64::FPR32RegClass; break;
3066   case MVT::f64: RC = &AArch64::FPR64RegClass; break;
3067   }
3068   unsigned Op0Reg = getRegForValue(I->getOperand(0));
3069   if (!Op0Reg)
3070     return false;
3071   bool Op0IsKill = hasTrivialKill(I->getOperand(0));
3072   unsigned ResultReg = FastEmitInst_r(Opc, RC, Op0Reg, Op0IsKill);
3073
3074   if (!ResultReg)
3075     return false;
3076
3077   UpdateValueMap(I, ResultReg);
3078   return true;
3079 }
3080
3081 bool AArch64FastISel::TargetSelectInstruction(const Instruction *I) {
3082   switch (I->getOpcode()) {
3083   default:
3084     break;
3085   case Instruction::Load:
3086     return SelectLoad(I);
3087   case Instruction::Store:
3088     return SelectStore(I);
3089   case Instruction::Br:
3090     return SelectBranch(I);
3091   case Instruction::IndirectBr:
3092     return SelectIndirectBr(I);
3093   case Instruction::FCmp:
3094   case Instruction::ICmp:
3095     return SelectCmp(I);
3096   case Instruction::Select:
3097     return SelectSelect(I);
3098   case Instruction::FPExt:
3099     return SelectFPExt(I);
3100   case Instruction::FPTrunc:
3101     return SelectFPTrunc(I);
3102   case Instruction::FPToSI:
3103     return SelectFPToInt(I, /*Signed=*/true);
3104   case Instruction::FPToUI:
3105     return SelectFPToInt(I, /*Signed=*/false);
3106   case Instruction::SIToFP:
3107     return SelectIntToFP(I, /*Signed=*/true);
3108   case Instruction::UIToFP:
3109     return SelectIntToFP(I, /*Signed=*/false);
3110   case Instruction::SRem:
3111     return SelectRem(I, ISD::SREM);
3112   case Instruction::URem:
3113     return SelectRem(I, ISD::UREM);
3114   case Instruction::Ret:
3115     return SelectRet(I);
3116   case Instruction::Trunc:
3117     return SelectTrunc(I);
3118   case Instruction::ZExt:
3119   case Instruction::SExt:
3120     return SelectIntExt(I);
3121
3122   // FIXME: All of these should really be handled by the target-independent
3123   // selector -> improve FastISel tblgen.
3124   case Instruction::Mul:
3125     return SelectMul(I);
3126   case Instruction::Shl:  // fall-through
3127   case Instruction::LShr: // fall-through
3128   case Instruction::AShr:
3129     return SelectShift(I);
3130   case Instruction::BitCast:
3131     return SelectBitCast(I);
3132   }
3133   return false;
3134   // Silence warnings.
3135   (void)&CC_AArch64_DarwinPCS_VarArg;
3136 }
3137
3138 namespace llvm {
3139 llvm::FastISel *AArch64::createFastISel(FunctionLoweringInfo &funcInfo,
3140                                         const TargetLibraryInfo *libInfo) {
3141   return new AArch64FastISel(funcInfo, libInfo);
3142 }
3143 }