//===----------------------------------------------------------------------===//
#include "AArch64.h"
-#include "AArch64TargetMachine.h"
#include "AArch64Subtarget.h"
+#include "AArch64TargetMachine.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/FastISel.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
namespace {
class AArch64FastISel : public FastISel {
-
class Address {
public:
typedef enum {
private:
BaseKind Kind;
+ AArch64_AM::ShiftExtendType ExtType;
union {
unsigned Reg;
int FI;
} Base;
+ unsigned OffsetReg;
+ unsigned Shift;
int64_t Offset;
+ const GlobalValue *GV;
public:
- Address() : Kind(RegBase), Offset(0) { Base.Reg = 0; }
+ Address() : Kind(RegBase), ExtType(AArch64_AM::InvalidShiftExtend),
+ OffsetReg(0), Shift(0), Offset(0), GV(nullptr) { Base.Reg = 0; }
void setKind(BaseKind K) { Kind = K; }
BaseKind getKind() const { return Kind; }
+ void setExtendType(AArch64_AM::ShiftExtendType E) { ExtType = E; }
+ AArch64_AM::ShiftExtendType getExtendType() const { return ExtType; }
bool isRegBase() const { return Kind == RegBase; }
bool isFIBase() const { return Kind == FrameIndexBase; }
void setReg(unsigned Reg) {
assert(isRegBase() && "Invalid base register access!");
return Base.Reg;
}
+ void setOffsetReg(unsigned Reg) {
+ assert(isRegBase() && "Invalid offset register access!");
+ OffsetReg = Reg;
+ }
+ unsigned getOffsetReg() const {
+ assert(isRegBase() && "Invalid offset register access!");
+ return OffsetReg;
+ }
void setFI(unsigned FI) {
assert(isFIBase() && "Invalid base frame index access!");
Base.FI = FI;
}
void setOffset(int64_t O) { Offset = O; }
int64_t getOffset() { return Offset; }
+ void setShift(unsigned S) { Shift = S; }
+ unsigned getShift() { return Shift; }
- bool isValid() { return isFIBase() || (isRegBase() && getReg() != 0); }
+ void setGlobalValue(const GlobalValue *G) { GV = G; }
+ const GlobalValue *getGlobalValue() { return GV; }
};
/// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
const AArch64Subtarget *Subtarget;
LLVMContext *Context;
+ bool FastLowerArguments() override;
+ bool FastLowerCall(CallLoweringInfo &CLI) override;
+ bool FastLowerIntrinsicCall(const IntrinsicInst *II) override;
+
private:
// Selection routines.
bool SelectLoad(const Instruction *I);
bool SelectFPToInt(const Instruction *I, bool Signed);
bool SelectIntToFP(const Instruction *I, bool Signed);
bool SelectRem(const Instruction *I, unsigned ISDOpcode);
- bool SelectCall(const Instruction *I, const char *IntrMemName);
- bool SelectIntrinsicCall(const IntrinsicInst &I);
bool SelectRet(const Instruction *I);
bool SelectTrunc(const Instruction *I);
bool SelectIntExt(const Instruction *I);
bool SelectMul(const Instruction *I);
+ bool SelectShift(const Instruction *I, bool IsLeftShift, bool IsArithmetic);
+ bool SelectBitCast(const Instruction *I);
// Utility helper routines.
bool isTypeLegal(Type *Ty, MVT &VT);
bool isLoadStoreTypeLegal(Type *Ty, MVT &VT);
- bool ComputeAddress(const Value *Obj, Address &Addr);
- bool SimplifyAddress(Address &Addr, MVT VT, int64_t ScaleFactor,
- bool UseUnscaled);
+ bool ComputeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr);
+ bool ComputeCallAddress(const Value *V, Address &Addr);
+ bool SimplifyAddress(Address &Addr, MVT VT);
void AddLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB,
- unsigned Flags, bool UseUnscaled);
+ unsigned Flags, unsigned ScaleFactor,
+ MachineMemOperand *MMO);
bool IsMemCpySmall(uint64_t Len, unsigned Alignment);
bool TryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
unsigned Alignment);
+ bool foldXALUIntrinsic(AArch64CC::CondCode &CC, const Instruction *I,
+ const Value *Cond);
+
// Emit functions.
bool EmitCmp(Value *Src1Value, Value *Src2Value, bool isZExt);
bool EmitLoad(MVT VT, unsigned &ResultReg, Address Addr,
- bool UseUnscaled = false);
+ MachineMemOperand *MMO = nullptr);
bool EmitStore(MVT VT, unsigned SrcReg, Address Addr,
- bool UseUnscaled = false);
+ MachineMemOperand *MMO = nullptr);
unsigned EmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt);
unsigned Emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt);
-
+ unsigned Emit_MUL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
+ unsigned Op1, bool Op1IsKill);
+ unsigned Emit_SMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
+ unsigned Op1, bool Op1IsKill);
+ unsigned Emit_UMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
+ unsigned Op1, bool Op1IsKill);
+ unsigned Emit_LSL_ri(MVT RetVT, unsigned Op0, bool Op0IsKill, uint64_t Imm);
+ unsigned Emit_LSR_ri(MVT RetVT, unsigned Op0, bool Op0IsKill, uint64_t Imm);
+ unsigned Emit_ASR_ri(MVT RetVT, unsigned Op0, bool Op0IsKill, uint64_t Imm);
+
+ unsigned AArch64MaterializeInt(const ConstantInt *CI, MVT VT);
unsigned AArch64MaterializeFP(const ConstantFP *CFP, MVT VT);
unsigned AArch64MaterializeGV(const GlobalValue *GV);
// Call handling routines.
private:
CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const;
- bool ProcessCallArgs(SmallVectorImpl<Value *> &Args,
- SmallVectorImpl<unsigned> &ArgRegs,
- SmallVectorImpl<MVT> &ArgVTs,
- SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
- SmallVectorImpl<unsigned> &RegArgs, CallingConv::ID CC,
+ bool ProcessCallArgs(CallLoweringInfo &CLI, SmallVectorImpl<MVT> &ArgVTs,
unsigned &NumBytes);
- bool FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
- const Instruction *I, CallingConv::ID CC, unsigned &NumBytes);
+ bool FinishCall(CallLoweringInfo &CLI, MVT RetVT, unsigned NumBytes);
public:
// Backend specific FastISel code.
return 0;
}
+unsigned AArch64FastISel::AArch64MaterializeInt(const ConstantInt *CI, MVT VT) {
+ if (VT > MVT::i64)
+ return 0;
+
+ if (!CI->isZero())
+ return FastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue());
+
+ // Create a copy from the zero register to materialize a "0" value.
+ const TargetRegisterClass *RC = (VT == MVT::i64) ? &AArch64::GPR64RegClass
+ : &AArch64::GPR32RegClass;
+ unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
+ unsigned ResultReg = createResultReg(RC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(ZeroReg, getKillRegState(true));
+ return ResultReg;
+}
+
unsigned AArch64FastISel::AArch64MaterializeFP(const ConstantFP *CFP, MVT VT) {
if (VT != MVT::f32 && VT != MVT::f64)
return 0;
const APFloat Val = CFP->getValueAPF();
- bool is64bit = (VT == MVT::f64);
+ bool Is64Bit = (VT == MVT::f64);
// This checks to see if we can use FMOV instructions to materialize
// a constant, otherwise we have to materialize via the constant pool.
if (TLI.isFPImmLegal(Val, VT)) {
- int Imm;
- unsigned Opc;
- if (is64bit) {
- Imm = AArch64_AM::getFP64Imm(Val);
- Opc = AArch64::FMOVDi;
- } else {
- Imm = AArch64_AM::getFP32Imm(Val);
- Opc = AArch64::FMOVSi;
- }
+ int Imm = Is64Bit ? AArch64_AM::getFP64Imm(Val)
+ : AArch64_AM::getFP32Imm(Val);
+ unsigned Opc = Is64Bit ? AArch64::FMOVDi : AArch64::FMOVSi;
unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
- .addImm(Imm);
+ .addImm(Imm);
return ResultReg;
}
if (Align == 0)
Align = DL.getTypeAllocSize(CFP->getType());
- unsigned Idx = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align);
+ unsigned CPI = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align);
unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
- ADRPReg).addConstantPoolIndex(Idx, 0, AArch64II::MO_PAGE);
+ ADRPReg)
+ .addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGE);
- unsigned Opc = is64bit ? AArch64::LDRDui : AArch64::LDRSui;
+ unsigned Opc = Is64Bit ? AArch64::LDRDui : AArch64::LDRSui;
unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
- .addReg(ADRPReg)
- .addConstantPoolIndex(Idx, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
+ .addReg(ADRPReg)
+ .addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
return ResultReg;
}
// ADRP + LDRX
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
ADRPReg)
- .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGE);
+ .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGE);
ResultReg = createResultReg(&AArch64::GPR64RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::LDRXui),
ResultReg)
- .addReg(ADRPReg)
- .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGEOFF |
- AArch64II::MO_NC);
+ .addReg(ADRPReg)
+ .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGEOFF |
+ AArch64II::MO_NC);
} else {
// ADRP + ADDX
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
- ADRPReg).addGlobalAddress(GV, 0, AArch64II::MO_PAGE);
+ ADRPReg)
+ .addGlobalAddress(GV, 0, AArch64II::MO_PAGE);
ResultReg = createResultReg(&AArch64::GPR64spRegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
ResultReg)
- .addReg(ADRPReg)
- .addGlobalAddress(GV, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC)
- .addImm(0);
+ .addReg(ADRPReg)
+ .addGlobalAddress(GV, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC)
+ .addImm(0);
}
return ResultReg;
}
return 0;
MVT VT = CEVT.getSimpleVT();
- // FIXME: Handle ConstantInt.
- if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
+ if (const auto *CI = dyn_cast<ConstantInt>(C))
+ return AArch64MaterializeInt(CI, VT);
+ else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
return AArch64MaterializeFP(CFP, VT);
else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
return AArch64MaterializeGV(GV);
}
// Computes the address to get to an object.
-bool AArch64FastISel::ComputeAddress(const Value *Obj, Address &Addr) {
+bool AArch64FastISel::ComputeAddress(const Value *Obj, Address &Addr, Type *Ty)
+{
const User *U = nullptr;
unsigned Opcode = Instruction::UserOp1;
if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
break;
case Instruction::BitCast: {
// Look through bitcasts.
- return ComputeAddress(U->getOperand(0), Addr);
+ return ComputeAddress(U->getOperand(0), Addr, Ty);
}
case Instruction::IntToPtr: {
// Look past no-op inttoptrs.
if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
- return ComputeAddress(U->getOperand(0), Addr);
+ return ComputeAddress(U->getOperand(0), Addr, Ty);
break;
}
case Instruction::PtrToInt: {
// Look past no-op ptrtoints.
if (TLI.getValueType(U->getType()) == TLI.getPointerTy())
- return ComputeAddress(U->getOperand(0), Addr);
+ return ComputeAddress(U->getOperand(0), Addr, Ty);
break;
}
case Instruction::GetElementPtr: {
// Try to grab the base operand now.
Addr.setOffset(TmpOffset);
- if (ComputeAddress(U->getOperand(0), Addr))
+ if (ComputeAddress(U->getOperand(0), Addr, Ty))
return true;
// We failed, restore everything and try the other options.
}
break;
}
+ case Instruction::Add: {
+ // Adds of constants are common and easy enough.
+ const Value *LHS = U->getOperand(0);
+ const Value *RHS = U->getOperand(1);
+
+ if (isa<ConstantInt>(LHS))
+ std::swap(LHS, RHS);
+
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
+ Addr.setOffset(Addr.getOffset() + (uint64_t)CI->getSExtValue());
+ return ComputeAddress(LHS, Addr, Ty);
+ }
+
+ Address Backup = Addr;
+ if (ComputeAddress(LHS, Addr, Ty) && ComputeAddress(RHS, Addr, Ty))
+ return true;
+ Addr = Backup;
+
+ break;
+ }
+ case Instruction::Shl:
+ if (Addr.getOffsetReg())
+ break;
+
+ if (const auto *CI = dyn_cast<ConstantInt>(U->getOperand(1))) {
+ unsigned Val = CI->getZExtValue();
+ if (Val < 1 || Val > 3)
+ break;
+
+ uint64_t NumBytes = 0;
+ if (Ty && Ty->isSized()) {
+ uint64_t NumBits = DL.getTypeSizeInBits(Ty);
+ NumBytes = NumBits / 8;
+ if (!isPowerOf2_64(NumBits))
+ NumBytes = 0;
+ }
+
+ if (NumBytes != (1U << Val))
+ break;
+
+ Addr.setShift(Val);
+ Addr.setExtendType(AArch64_AM::LSL);
+
+ if (const auto *I = dyn_cast<Instruction>(U->getOperand(0)))
+ if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB)
+ U = I;
+
+ if (const auto *ZE = dyn_cast<ZExtInst>(U))
+ if (ZE->getOperand(0)->getType()->isIntegerTy(32))
+ Addr.setExtendType(AArch64_AM::UXTW);
+
+ if (const auto *SE = dyn_cast<SExtInst>(U))
+ if (SE->getOperand(0)->getType()->isIntegerTy(32))
+ Addr.setExtendType(AArch64_AM::SXTW);
+
+ unsigned Reg = getRegForValue(U->getOperand(0));
+ if (!Reg)
+ return false;
+ Addr.setOffsetReg(Reg);
+ return true;
+ }
+ break;
+ }
+
+ if (Addr.getReg()) {
+ if (!Addr.getOffsetReg()) {
+ unsigned Reg = getRegForValue(Obj);
+ if (!Reg)
+ return false;
+ Addr.setOffsetReg(Reg);
+ return true;
+ }
+ return false;
+ }
+
+ unsigned Reg = getRegForValue(Obj);
+ if (!Reg)
+ return false;
+ Addr.setReg(Reg);
+ return true;
+}
+
+bool AArch64FastISel::ComputeCallAddress(const Value *V, Address &Addr) {
+ const User *U = nullptr;
+ unsigned Opcode = Instruction::UserOp1;
+ bool InMBB = true;
+
+ if (const auto *I = dyn_cast<Instruction>(V)) {
+ Opcode = I->getOpcode();
+ U = I;
+ InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock();
+ } else if (const auto *C = dyn_cast<ConstantExpr>(V)) {
+ Opcode = C->getOpcode();
+ U = C;
+ }
+
+ switch (Opcode) {
+ default: break;
+ case Instruction::BitCast:
+ // Look past bitcasts if its operand is in the same BB.
+ if (InMBB)
+ return ComputeCallAddress(U->getOperand(0), Addr);
+ break;
+ case Instruction::IntToPtr:
+ // Look past no-op inttoptrs if its operand is in the same BB.
+ if (InMBB &&
+ TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
+ return ComputeCallAddress(U->getOperand(0), Addr);
+ break;
+ case Instruction::PtrToInt:
+ // Look past no-op ptrtoints if its operand is in the same BB.
+ if (InMBB &&
+ TLI.getValueType(U->getType()) == TLI.getPointerTy())
+ return ComputeCallAddress(U->getOperand(0), Addr);
+ break;
+ }
+
+ if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
+ Addr.setGlobalValue(GV);
+ return true;
}
- // Try to get this in a register if nothing else has worked.
- if (!Addr.isValid())
- Addr.setReg(getRegForValue(Obj));
- return Addr.isValid();
+ // If all else fails, try to materialize the value in a register.
+ if (!Addr.getGlobalValue()) {
+ Addr.setReg(getRegForValue(V));
+ return Addr.getReg() != 0;
+ }
+
+ return false;
}
+
bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) {
EVT evt = TLI.getValueType(Ty, true);
return false;
}
-bool AArch64FastISel::SimplifyAddress(Address &Addr, MVT VT,
- int64_t ScaleFactor, bool UseUnscaled) {
- bool needsLowering = false;
- int64_t Offset = Addr.getOffset();
+bool AArch64FastISel::SimplifyAddress(Address &Addr, MVT VT) {
+ unsigned ScaleFactor;
switch (VT.SimpleTy) {
- default:
- return false;
- case MVT::i1:
- case MVT::i8:
- case MVT::i16:
- case MVT::i32:
- case MVT::i64:
- case MVT::f32:
- case MVT::f64:
- if (!UseUnscaled)
- // Using scaled, 12-bit, unsigned immediate offsets.
- needsLowering = ((Offset & 0xfff) != Offset);
- else
- // Using unscaled, 9-bit, signed immediate offsets.
- needsLowering = (Offset > 256 || Offset < -256);
- break;
+ default: return false;
+ case MVT::i1: // fall-through
+ case MVT::i8: ScaleFactor = 1; break;
+ case MVT::i16: ScaleFactor = 2; break;
+ case MVT::i32: // fall-through
+ case MVT::f32: ScaleFactor = 4; break;
+ case MVT::i64: // fall-through
+ case MVT::f64: ScaleFactor = 8; break;
}
- // FIXME: If this is a stack pointer and the offset needs to be simplified
- // then put the alloca address into a register, set the base type back to
- // register and continue. This should almost never happen.
- if (needsLowering && Addr.getKind() == Address::FrameIndexBase) {
- return false;
+ bool ImmediateOffsetNeedsLowering = false;
+ bool RegisterOffsetNeedsLowering = false;
+ int64_t Offset = Addr.getOffset();
+ if (((Offset < 0) || (Offset & (ScaleFactor - 1))) && !isInt<9>(Offset))
+ ImmediateOffsetNeedsLowering = true;
+ else if (Offset > 0 && !(Offset & (ScaleFactor - 1)) &&
+ !isUInt<12>(Offset / ScaleFactor))
+ ImmediateOffsetNeedsLowering = true;
+
+ // Cannot encode an offset register and an immediate offset in the same
+ // instruction. Fold the immediate offset into the load/store instruction and
+ // emit an additonal add to take care of the offset register.
+ if (!ImmediateOffsetNeedsLowering && Addr.getOffset() && Addr.isRegBase() &&
+ Addr.getOffsetReg())
+ RegisterOffsetNeedsLowering = true;
+
+ // If this is a stack pointer and the offset needs to be simplified then put
+ // the alloca address into a register, set the base type back to register and
+ // continue. This should almost never happen.
+ if (ImmediateOffsetNeedsLowering && Addr.isFIBase()) {
+ unsigned ResultReg = createResultReg(&AArch64::GPR64RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
+ ResultReg)
+ .addFrameIndex(Addr.getFI())
+ .addImm(0)
+ .addImm(0);
+ Addr.setKind(Address::RegBase);
+ Addr.setReg(ResultReg);
+ }
+
+ if (RegisterOffsetNeedsLowering) {
+ unsigned ResultReg = 0;
+ if (Addr.getReg()) {
+ ResultReg = createResultReg(&AArch64::GPR64RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(AArch64::ADDXrs), ResultReg)
+ .addReg(Addr.getReg())
+ .addReg(Addr.getOffsetReg())
+ .addImm(Addr.getShift());
+ } else
+ ResultReg = Emit_LSL_ri(MVT::i64, Addr.getOffsetReg(),
+ /*Op0IsKill=*/false, Addr.getShift());
+ if (!ResultReg)
+ return false;
+
+ Addr.setReg(ResultReg);
+ Addr.setOffsetReg(0);
+ Addr.setShift(0);
}
// Since the offset is too large for the load/store instruction get the
// reg+offset into a register.
- if (needsLowering) {
- uint64_t UnscaledOffset = Addr.getOffset() * ScaleFactor;
- unsigned ResultReg = FastEmit_ri_(MVT::i64, ISD::ADD, Addr.getReg(), false,
- UnscaledOffset, MVT::i64);
- if (ResultReg == 0)
+ if (ImmediateOffsetNeedsLowering) {
+ unsigned ResultReg = 0;
+ if (Addr.getReg())
+ ResultReg = FastEmit_ri_(MVT::i64, ISD::ADD, Addr.getReg(),
+ /*IsKill=*/false, Offset, MVT::i64);
+ else
+ ResultReg = FastEmit_i(MVT::i64, MVT::i64, ISD::Constant, Offset);
+
+ if (!ResultReg)
return false;
Addr.setReg(ResultReg);
Addr.setOffset(0);
void AArch64FastISel::AddLoadStoreOperands(Address &Addr,
const MachineInstrBuilder &MIB,
- unsigned Flags, bool UseUnscaled) {
- int64_t Offset = Addr.getOffset();
+ unsigned Flags,
+ unsigned ScaleFactor,
+ MachineMemOperand *MMO) {
+ int64_t Offset = Addr.getOffset() / ScaleFactor;
// Frame base works a bit differently. Handle it separately.
- if (Addr.getKind() == Address::FrameIndexBase) {
+ if (Addr.isFIBase()) {
int FI = Addr.getFI();
// FIXME: We shouldn't be using getObjectSize/getObjectAlignment. The size
// and alignment should be based on the VT.
- MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
- MachinePointerInfo::getFixedStack(FI, Offset), Flags,
- MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
+ MMO = FuncInfo.MF->getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(FI, Offset), Flags,
+ MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
// Now add the rest of the operands.
- MIB.addFrameIndex(FI).addImm(Offset).addMemOperand(MMO);
+ MIB.addFrameIndex(FI).addImm(Offset);
} else {
- // Now add the rest of the operands.
- MIB.addReg(Addr.getReg());
- MIB.addImm(Offset);
+ assert(Addr.isRegBase() && "Unexpected address kind.");
+ if (Addr.getOffsetReg()) {
+ assert(Addr.getOffset() == 0 && "Unexpected offset");
+ bool IsSigned = Addr.getExtendType() == AArch64_AM::SXTW ||
+ Addr.getExtendType() == AArch64_AM::SXTX;
+ MIB.addReg(Addr.getReg());
+ MIB.addReg(Addr.getOffsetReg());
+ MIB.addImm(IsSigned);
+ MIB.addImm(Addr.getShift() != 0);
+ } else {
+ MIB.addReg(Addr.getReg());
+ MIB.addImm(Offset);
+ }
}
+
+ if (MMO)
+ MIB.addMemOperand(MMO);
}
bool AArch64FastISel::EmitLoad(MVT VT, unsigned &ResultReg, Address Addr,
- bool UseUnscaled) {
+ MachineMemOperand *MMO) {
+ // Simplify this down to something we can handle.
+ if (!SimplifyAddress(Addr, VT))
+ return false;
+
+ unsigned ScaleFactor;
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Unexpected value type.");
+ case MVT::i1: // fall-through
+ case MVT::i8: ScaleFactor = 1; break;
+ case MVT::i16: ScaleFactor = 2; break;
+ case MVT::i32: // fall-through
+ case MVT::f32: ScaleFactor = 4; break;
+ case MVT::i64: // fall-through
+ case MVT::f64: ScaleFactor = 8; break;
+ }
+
// Negative offsets require unscaled, 9-bit, signed immediate offsets.
// Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
- if (!UseUnscaled && Addr.getOffset() < 0)
- UseUnscaled = true;
+ bool UseScaled = true;
+ if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
+ UseScaled = false;
+ ScaleFactor = 1;
+ }
+
+ static const unsigned OpcTable[4][6] = {
+ { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi, AArch64::LDURXi,
+ AArch64::LDURSi, AArch64::LDURDi },
+ { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui, AArch64::LDRXui,
+ AArch64::LDRSui, AArch64::LDRDui },
+ { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX, AArch64::LDRXroX,
+ AArch64::LDRSroX, AArch64::LDRDroX },
+ { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW, AArch64::LDRXroW,
+ AArch64::LDRSroW, AArch64::LDRDroW }
+ };
unsigned Opc;
const TargetRegisterClass *RC;
bool VTIsi1 = false;
- int64_t ScaleFactor = 0;
- switch (VT.SimpleTy) {
- default:
- return false;
- case MVT::i1:
- VTIsi1 = true;
- // Intentional fall-through.
- case MVT::i8:
- Opc = UseUnscaled ? AArch64::LDURBBi : AArch64::LDRBBui;
- RC = &AArch64::GPR32RegClass;
- ScaleFactor = 1;
- break;
- case MVT::i16:
- Opc = UseUnscaled ? AArch64::LDURHHi : AArch64::LDRHHui;
- RC = &AArch64::GPR32RegClass;
- ScaleFactor = 2;
- break;
- case MVT::i32:
- Opc = UseUnscaled ? AArch64::LDURWi : AArch64::LDRWui;
- RC = &AArch64::GPR32RegClass;
- ScaleFactor = 4;
- break;
- case MVT::i64:
- Opc = UseUnscaled ? AArch64::LDURXi : AArch64::LDRXui;
- RC = &AArch64::GPR64RegClass;
- ScaleFactor = 8;
- break;
- case MVT::f32:
- Opc = UseUnscaled ? AArch64::LDURSi : AArch64::LDRSui;
- RC = TLI.getRegClassFor(VT);
- ScaleFactor = 4;
- break;
- case MVT::f64:
- Opc = UseUnscaled ? AArch64::LDURDi : AArch64::LDRDui;
- RC = TLI.getRegClassFor(VT);
- ScaleFactor = 8;
- break;
- }
- // Scale the offset.
- if (!UseUnscaled) {
- int64_t Offset = Addr.getOffset();
- if (Offset & (ScaleFactor - 1))
- // Retry using an unscaled, 9-bit, signed immediate offset.
- return EmitLoad(VT, ResultReg, Addr, /*UseUnscaled*/ true);
+ bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
+ Addr.getOffsetReg();
+ unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
+ if (Addr.getExtendType() == AArch64_AM::UXTW ||
+ Addr.getExtendType() == AArch64_AM::SXTW)
+ Idx++;
- Addr.setOffset(Offset / ScaleFactor);
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Unexpected value type.");
+ case MVT::i1: VTIsi1 = true; // Intentional fall-through.
+ case MVT::i8: Opc = OpcTable[Idx][0]; RC = &AArch64::GPR32RegClass; break;
+ case MVT::i16: Opc = OpcTable[Idx][1]; RC = &AArch64::GPR32RegClass; break;
+ case MVT::i32: Opc = OpcTable[Idx][2]; RC = &AArch64::GPR32RegClass; break;
+ case MVT::i64: Opc = OpcTable[Idx][3]; RC = &AArch64::GPR64RegClass; break;
+ case MVT::f32: Opc = OpcTable[Idx][4]; RC = &AArch64::FPR32RegClass; break;
+ case MVT::f64: Opc = OpcTable[Idx][5]; RC = &AArch64::FPR64RegClass; break;
}
- // Simplify this down to something we can handle.
- if (!SimplifyAddress(Addr, VT, UseUnscaled ? 1 : ScaleFactor, UseUnscaled))
- return false;
-
// Create the base instruction, then add the operands.
ResultReg = createResultReg(RC);
MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(Opc), ResultReg);
- AddLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, UseUnscaled);
+ AddLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, ScaleFactor, MMO);
// Loading an i1 requires special handling.
if (VTIsi1) {
unsigned ANDReg = createResultReg(&AArch64::GPR32spRegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ANDWri),
ANDReg)
- .addReg(ResultReg)
- .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
+ .addReg(ResultReg)
+ .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
ResultReg = ANDReg;
}
return true;
// See if we can handle this address.
Address Addr;
- if (!ComputeAddress(I->getOperand(0), Addr))
+ if (!ComputeAddress(I->getOperand(0), Addr, I->getType()))
return false;
unsigned ResultReg;
- if (!EmitLoad(VT, ResultReg, Addr))
+ if (!EmitLoad(VT, ResultReg, Addr, createMachineMemOperandFor(I)))
return false;
UpdateValueMap(I, ResultReg);
}
bool AArch64FastISel::EmitStore(MVT VT, unsigned SrcReg, Address Addr,
- bool UseUnscaled) {
- // Negative offsets require unscaled, 9-bit, signed immediate offsets.
- // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
- if (!UseUnscaled && Addr.getOffset() < 0)
- UseUnscaled = true;
+ MachineMemOperand *MMO) {
+ // Simplify this down to something we can handle.
+ if (!SimplifyAddress(Addr, VT))
+ return false;
- unsigned StrOpc;
- bool VTIsi1 = false;
- int64_t ScaleFactor = 0;
- // Using scaled, 12-bit, unsigned immediate offsets.
+ unsigned ScaleFactor;
switch (VT.SimpleTy) {
- default:
- return false;
- case MVT::i1:
- VTIsi1 = true;
- case MVT::i8:
- StrOpc = UseUnscaled ? AArch64::STURBBi : AArch64::STRBBui;
- ScaleFactor = 1;
- break;
- case MVT::i16:
- StrOpc = UseUnscaled ? AArch64::STURHHi : AArch64::STRHHui;
- ScaleFactor = 2;
- break;
- case MVT::i32:
- StrOpc = UseUnscaled ? AArch64::STURWi : AArch64::STRWui;
- ScaleFactor = 4;
- break;
- case MVT::i64:
- StrOpc = UseUnscaled ? AArch64::STURXi : AArch64::STRXui;
- ScaleFactor = 8;
- break;
- case MVT::f32:
- StrOpc = UseUnscaled ? AArch64::STURSi : AArch64::STRSui;
- ScaleFactor = 4;
- break;
- case MVT::f64:
- StrOpc = UseUnscaled ? AArch64::STURDi : AArch64::STRDui;
- ScaleFactor = 8;
- break;
+ default: llvm_unreachable("Unexpected value type.");
+ case MVT::i1: // fall-through
+ case MVT::i8: ScaleFactor = 1; break;
+ case MVT::i16: ScaleFactor = 2; break;
+ case MVT::i32: // fall-through
+ case MVT::f32: ScaleFactor = 4; break;
+ case MVT::i64: // fall-through
+ case MVT::f64: ScaleFactor = 8; break;
}
- // Scale the offset.
- if (!UseUnscaled) {
- int64_t Offset = Addr.getOffset();
- if (Offset & (ScaleFactor - 1))
- // Retry using an unscaled, 9-bit, signed immediate offset.
- return EmitStore(VT, SrcReg, Addr, /*UseUnscaled*/ true);
- Addr.setOffset(Offset / ScaleFactor);
+ // Negative offsets require unscaled, 9-bit, signed immediate offsets.
+ // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
+ bool UseScaled = true;
+ if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
+ UseScaled = false;
+ ScaleFactor = 1;
}
- // Simplify this down to something we can handle.
- if (!SimplifyAddress(Addr, VT, UseUnscaled ? 1 : ScaleFactor, UseUnscaled))
- return false;
+
+ static const unsigned OpcTable[4][6] = {
+ { AArch64::STURBBi, AArch64::STURHHi, AArch64::STURWi, AArch64::STURXi,
+ AArch64::STURSi, AArch64::STURDi },
+ { AArch64::STRBBui, AArch64::STRHHui, AArch64::STRWui, AArch64::STRXui,
+ AArch64::STRSui, AArch64::STRDui },
+ { AArch64::STRBBroX, AArch64::STRHHroX, AArch64::STRWroX, AArch64::STRXroX,
+ AArch64::STRSroX, AArch64::STRDroX },
+ { AArch64::STRBBroW, AArch64::STRHHroW, AArch64::STRWroW, AArch64::STRXroW,
+ AArch64::STRSroW, AArch64::STRDroW }
+
+ };
+
+ unsigned Opc;
+ bool VTIsi1 = false;
+ bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
+ Addr.getOffsetReg();
+ unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
+ if (Addr.getExtendType() == AArch64_AM::UXTW ||
+ Addr.getExtendType() == AArch64_AM::SXTW)
+ Idx++;
+
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Unexpected value type.");
+ case MVT::i1: VTIsi1 = true;
+ case MVT::i8: Opc = OpcTable[Idx][0]; break;
+ case MVT::i16: Opc = OpcTable[Idx][1]; break;
+ case MVT::i32: Opc = OpcTable[Idx][2]; break;
+ case MVT::i64: Opc = OpcTable[Idx][3]; break;
+ case MVT::f32: Opc = OpcTable[Idx][4]; break;
+ case MVT::f64: Opc = OpcTable[Idx][5]; break;
+ }
// Storing an i1 requires special handling.
if (VTIsi1) {
unsigned ANDReg = createResultReg(&AArch64::GPR32spRegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ANDWri),
ANDReg)
- .addReg(SrcReg)
- .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
+ .addReg(SrcReg)
+ .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
SrcReg = ANDReg;
}
// Create the base instruction, then add the operands.
MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(StrOpc)).addReg(SrcReg);
- AddLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, UseUnscaled);
+ TII.get(Opc))
+ .addReg(SrcReg);
+ AddLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, ScaleFactor, MMO);
+
return true;
}
// See if we can handle this address.
Address Addr;
- if (!ComputeAddress(I->getOperand(1), Addr))
+ if (!ComputeAddress(I->getOperand(1), Addr, I->getOperand(0)->getType()))
return false;
- if (!EmitStore(VT, SrcReg, Addr))
+ if (!EmitStore(VT, SrcReg, Addr, createMachineMemOperandFor(I)))
return false;
return true;
}
MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
+ AArch64CC::CondCode CC = AArch64CC::NE;
if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
if (CI->hasOneUse() && (CI->getParent() == I->getParent())) {
// We may not handle every CC for now.
- AArch64CC::CondCode CC = getCompareCC(CI->getPredicate());
+ CC = getCompareCC(CI->getPredicate());
if (CC == AArch64CC::AL)
return false;
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
.addImm(CC)
.addMBB(TBB);
- FuncInfo.MBB->addSuccessor(TBB);
+
+ // Obtain the branch weight and add the TrueBB to the successor list.
+ uint32_t BranchWeight = 0;
+ if (FuncInfo.BPI)
+ BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
+ TBB->getBasicBlock());
+ FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
FastEmitBranch(FBB, DbgLoc);
return true;
.addImm(0)
.addImm(0);
- unsigned CC = AArch64CC::NE;
if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
std::swap(TBB, FBB);
CC = AArch64CC::EQ;
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
.addImm(CC)
.addMBB(TBB);
- FuncInfo.MBB->addSuccessor(TBB);
+
+ // Obtain the branch weight and add the TrueBB to the successor list.
+ uint32_t BranchWeight = 0;
+ if (FuncInfo.BPI)
+ BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
+ TBB->getBasicBlock());
+ FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
+
FastEmitBranch(FBB, DbgLoc);
return true;
}
MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::B))
.addMBB(Target);
- FuncInfo.MBB->addSuccessor(Target);
+
+ // Obtain the branch weight and add the target to the successor list.
+ uint32_t BranchWeight = 0;
+ if (FuncInfo.BPI)
+ BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
+ Target->getBasicBlock());
+ FuncInfo.MBB->addSuccessor(Target, BranchWeight);
+ return true;
+ } else if (foldXALUIntrinsic(CC, I, BI->getCondition())) {
+ // Fake request the condition, otherwise the intrinsic might be completely
+ // optimized away.
+ unsigned CondReg = getRegForValue(BI->getCondition());
+ if (!CondReg)
+ return false;
+
+ // Emit the branch.
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
+ .addImm(CC)
+ .addMBB(TBB);
+
+ // Obtain the branch weight and add the TrueBB to the successor list.
+ uint32_t BranchWeight = 0;
+ if (FuncInfo.BPI)
+ BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
+ TBB->getBasicBlock());
+ FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
+
+ FastEmitBranch(FBB, DbgLoc);
return true;
}
.addImm(0)
.addImm(0);
- unsigned CC = AArch64CC::NE;
if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
std::swap(TBB, FBB);
CC = AArch64CC::EQ;
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
.addImm(CC)
.addMBB(TBB);
- FuncInfo.MBB->addSuccessor(TBB);
+
+ // Obtain the branch weight and add the TrueBB to the successor list.
+ uint32_t BranchWeight = 0;
+ if (FuncInfo.BPI)
+ BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
+ TBB->getBasicBlock());
+ FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
+
FastEmitBranch(FBB, DbgLoc);
return true;
}
DestVT != MVT::f64)
return false;
- unsigned CondReg = getRegForValue(SI->getCondition());
- if (CondReg == 0)
- return false;
- unsigned TrueReg = getRegForValue(SI->getTrueValue());
- if (TrueReg == 0)
- return false;
- unsigned FalseReg = getRegForValue(SI->getFalseValue());
- if (FalseReg == 0)
- return false;
+ unsigned SelectOpc;
+ switch (DestVT.SimpleTy) {
+ default: return false;
+ case MVT::i32: SelectOpc = AArch64::CSELWr; break;
+ case MVT::i64: SelectOpc = AArch64::CSELXr; break;
+ case MVT::f32: SelectOpc = AArch64::FCSELSrrr; break;
+ case MVT::f64: SelectOpc = AArch64::FCSELDrrr; break;
+ }
+ const Value *Cond = SI->getCondition();
+ bool NeedTest = true;
+ AArch64CC::CondCode CC = AArch64CC::NE;
+ if (foldXALUIntrinsic(CC, I, Cond))
+ NeedTest = false;
- MRI.constrainRegClass(CondReg, &AArch64::GPR32RegClass);
- unsigned ANDReg = createResultReg(&AArch64::GPR32spRegClass);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ANDWri),
- ANDReg)
- .addReg(CondReg)
+ unsigned CondReg = getRegForValue(Cond);
+ if (!CondReg)
+ return false;
+ bool CondIsKill = hasTrivialKill(Cond);
+
+ if (NeedTest) {
+ MRI.constrainRegClass(CondReg, &AArch64::GPR32RegClass);
+ unsigned ANDReg = createResultReg(&AArch64::GPR32spRegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ANDWri),
+ ANDReg)
+ .addReg(CondReg, getKillRegState(CondIsKill))
.addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::SUBSWri))
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::SUBSWri))
.addReg(ANDReg)
.addReg(ANDReg)
.addImm(0)
.addImm(0);
+ }
- unsigned SelectOpc;
- switch (DestVT.SimpleTy) {
- default:
+ unsigned TrueReg = getRegForValue(SI->getTrueValue());
+ bool TrueIsKill = hasTrivialKill(SI->getTrueValue());
+
+ unsigned FalseReg = getRegForValue(SI->getFalseValue());
+ bool FalseIsKill = hasTrivialKill(SI->getFalseValue());
+
+ if (!TrueReg || !FalseReg)
return false;
- case MVT::i32:
- SelectOpc = AArch64::CSELWr;
- break;
- case MVT::i64:
- SelectOpc = AArch64::CSELXr;
- break;
- case MVT::f32:
- SelectOpc = AArch64::FCSELSrrr;
- break;
- case MVT::f64:
- SelectOpc = AArch64::FCSELDrrr;
- break;
- }
unsigned ResultReg = createResultReg(TLI.getRegClassFor(DestVT));
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SelectOpc),
ResultReg)
- .addReg(TrueReg)
- .addReg(FalseReg)
- .addImm(AArch64CC::NE);
+ .addReg(TrueReg, getKillRegState(TrueIsKill))
+ .addReg(FalseReg, getKillRegState(FalseIsKill))
+ .addImm(CC);
UpdateValueMap(I, ResultReg);
return true;
return true;
}
-bool AArch64FastISel::ProcessCallArgs(
- SmallVectorImpl<Value *> &Args, SmallVectorImpl<unsigned> &ArgRegs,
- SmallVectorImpl<MVT> &ArgVTs, SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
- SmallVectorImpl<unsigned> &RegArgs, CallingConv::ID CC,
- unsigned &NumBytes) {
+bool AArch64FastISel::FastLowerArguments() {
+ if (!FuncInfo.CanLowerReturn)
+ return false;
+
+ const Function *F = FuncInfo.Fn;
+ if (F->isVarArg())
+ return false;
+
+ CallingConv::ID CC = F->getCallingConv();
+ if (CC != CallingConv::C)
+ return false;
+
+ // Only handle simple cases like i1/i8/i16/i32/i64/f32/f64 of up to 8 GPR and
+ // FPR each.
+ unsigned GPRCnt = 0;
+ unsigned FPRCnt = 0;
+ unsigned Idx = 0;
+ for (auto const &Arg : F->args()) {
+ // The first argument is at index 1.
+ ++Idx;
+ if (F->getAttributes().hasAttribute(Idx, Attribute::ByVal) ||
+ F->getAttributes().hasAttribute(Idx, Attribute::InReg) ||
+ F->getAttributes().hasAttribute(Idx, Attribute::StructRet) ||
+ F->getAttributes().hasAttribute(Idx, Attribute::Nest))
+ return false;
+
+ Type *ArgTy = Arg.getType();
+ if (ArgTy->isStructTy() || ArgTy->isArrayTy() || ArgTy->isVectorTy())
+ return false;
+
+ EVT ArgVT = TLI.getValueType(ArgTy);
+ if (!ArgVT.isSimple()) return false;
+ switch (ArgVT.getSimpleVT().SimpleTy) {
+ default: return false;
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ case MVT::i64:
+ ++GPRCnt;
+ break;
+ case MVT::f16:
+ case MVT::f32:
+ case MVT::f64:
+ ++FPRCnt;
+ break;
+ }
+
+ if (GPRCnt > 8 || FPRCnt > 8)
+ return false;
+ }
+
+ static const MCPhysReg Registers[5][8] = {
+ { AArch64::W0, AArch64::W1, AArch64::W2, AArch64::W3, AArch64::W4,
+ AArch64::W5, AArch64::W6, AArch64::W7 },
+ { AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4,
+ AArch64::X5, AArch64::X6, AArch64::X7 },
+ { AArch64::H0, AArch64::H1, AArch64::H2, AArch64::H3, AArch64::H4,
+ AArch64::H5, AArch64::H6, AArch64::H7 },
+ { AArch64::S0, AArch64::S1, AArch64::S2, AArch64::S3, AArch64::S4,
+ AArch64::S5, AArch64::S6, AArch64::S7 },
+ { AArch64::D0, AArch64::D1, AArch64::D2, AArch64::D3, AArch64::D4,
+ AArch64::D5, AArch64::D6, AArch64::D7 }
+ };
+
+ unsigned GPRIdx = 0;
+ unsigned FPRIdx = 0;
+ for (auto const &Arg : F->args()) {
+ MVT VT = TLI.getSimpleValueType(Arg.getType());
+ unsigned SrcReg;
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Unexpected value type.");
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16: VT = MVT::i32; // fall-through
+ case MVT::i32: SrcReg = Registers[0][GPRIdx++]; break;
+ case MVT::i64: SrcReg = Registers[1][GPRIdx++]; break;
+ case MVT::f16: SrcReg = Registers[2][FPRIdx++]; break;
+ case MVT::f32: SrcReg = Registers[3][FPRIdx++]; break;
+ case MVT::f64: SrcReg = Registers[4][FPRIdx++]; break;
+ }
+
+ // Skip unused arguments.
+ if (Arg.use_empty()) {
+ UpdateValueMap(&Arg, 0);
+ continue;
+ }
+
+ const TargetRegisterClass *RC = TLI.getRegClassFor(VT);
+ unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
+ // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
+ // Without this, EmitLiveInCopies may eliminate the livein if its only
+ // use is a bitcast (which isn't turned into an instruction).
+ unsigned ResultReg = createResultReg(RC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(DstReg, getKillRegState(true));
+ UpdateValueMap(&Arg, ResultReg);
+ }
+ return true;
+}
+
+bool AArch64FastISel::ProcessCallArgs(CallLoweringInfo &CLI,
+ SmallVectorImpl<MVT> &OutVTs,
+ unsigned &NumBytes) {
+ CallingConv::ID CC = CLI.CallConv;
SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CC, false, *FuncInfo.MF, TM, ArgLocs, *Context);
- CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CCAssignFnForCall(CC));
+ CCState CCInfo(CC, false, *FuncInfo.MF, ArgLocs, *Context);
+ CCInfo.AnalyzeCallOperands(OutVTs, CLI.OutFlags, CCAssignFnForCall(CC));
// Get a count of how many bytes are to be pushed on the stack.
NumBytes = CCInfo.getNextStackOffset();
// Issue CALLSEQ_START
unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown))
- .addImm(NumBytes);
+ .addImm(NumBytes);
// Process the args.
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
- unsigned Arg = ArgRegs[VA.getValNo()];
- MVT ArgVT = ArgVTs[VA.getValNo()];
+ const Value *ArgVal = CLI.OutVals[VA.getValNo()];
+ MVT ArgVT = OutVTs[VA.getValNo()];
+
+ unsigned ArgReg = getRegForValue(ArgVal);
+ if (!ArgReg)
+ return false;
// Handle arg promotion: SExt, ZExt, AExt.
switch (VA.getLocInfo()) {
case CCValAssign::SExt: {
MVT DestVT = VA.getLocVT();
MVT SrcVT = ArgVT;
- Arg = EmitIntExt(SrcVT, Arg, DestVT, /*isZExt*/ false);
- if (Arg == 0)
+ ArgReg = EmitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/false);
+ if (!ArgReg)
return false;
break;
}
case CCValAssign::ZExt: {
MVT DestVT = VA.getLocVT();
MVT SrcVT = ArgVT;
- Arg = EmitIntExt(SrcVT, Arg, DestVT, /*isZExt*/ true);
- if (Arg == 0)
+ ArgReg = EmitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/true);
+ if (!ArgReg)
return false;
break;
}
// Now copy/store arg to correct locations.
if (VA.isRegLoc() && !VA.needsCustom()) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(Arg);
- RegArgs.push_back(VA.getLocReg());
+ TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg);
+ CLI.OutRegs.push_back(VA.getLocReg());
} else if (VA.needsCustom()) {
// FIXME: Handle custom args.
return false;
} else {
assert(VA.isMemLoc() && "Assuming store on stack.");
+ // Don't emit stores for undef values.
+ if (isa<UndefValue>(ArgVal))
+ continue;
+
// Need to store on the stack.
unsigned ArgSize = (ArgVT.getSizeInBits() + 7) / 8;
Addr.setReg(AArch64::SP);
Addr.setOffset(VA.getLocMemOffset() + BEAlign);
- if (!EmitStore(ArgVT, Arg, Addr))
+ unsigned Alignment = DL.getABITypeAlignment(ArgVal->getType());
+ MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
+ MachinePointerInfo::getStack(Addr.getOffset()),
+ MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment);
+
+ if (!EmitStore(ArgVT, ArgReg, Addr, MMO))
return false;
}
}
return true;
}
-bool AArch64FastISel::FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
- const Instruction *I, CallingConv::ID CC,
- unsigned &NumBytes) {
+bool AArch64FastISel::FinishCall(CallLoweringInfo &CLI, MVT RetVT,
+ unsigned NumBytes) {
+ CallingConv::ID CC = CLI.CallConv;
+
// Issue CALLSEQ_END
unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp))
- .addImm(NumBytes)
- .addImm(0);
+ .addImm(NumBytes).addImm(0);
// Now the return value.
if (RetVT != MVT::isVoid) {
SmallVector<CCValAssign, 16> RVLocs;
- CCState CCInfo(CC, false, *FuncInfo.MF, TM, RVLocs, *Context);
+ CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC));
// Only handle a single return value.
MVT CopyVT = RVLocs[0].getValVT();
unsigned ResultReg = createResultReg(TLI.getRegClassFor(CopyVT));
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(TargetOpcode::COPY),
- ResultReg).addReg(RVLocs[0].getLocReg());
- UsedRegs.push_back(RVLocs[0].getLocReg());
+ TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(RVLocs[0].getLocReg());
+ CLI.InRegs.push_back(RVLocs[0].getLocReg());
- // Finally update the result.
- UpdateValueMap(I, ResultReg);
+ CLI.ResultReg = ResultReg;
+ CLI.NumResultRegs = 1;
}
return true;
}
-bool AArch64FastISel::SelectCall(const Instruction *I,
- const char *IntrMemName = nullptr) {
- const CallInst *CI = cast<CallInst>(I);
- const Value *Callee = CI->getCalledValue();
+bool AArch64FastISel::FastLowerCall(CallLoweringInfo &CLI) {
+ CallingConv::ID CC = CLI.CallConv;
+ bool IsTailCall = CLI.IsTailCall;
+ bool IsVarArg = CLI.IsVarArg;
+ const Value *Callee = CLI.Callee;
+ const char *SymName = CLI.SymName;
- // Don't handle inline asm or intrinsics.
- if (isa<InlineAsm>(Callee))
+ // Allow SelectionDAG isel to handle tail calls.
+ if (IsTailCall)
return false;
- // Only handle global variable Callees.
- const GlobalValue *GV = dyn_cast<GlobalValue>(Callee);
- if (!GV)
+ CodeModel::Model CM = TM.getCodeModel();
+ // Only support the small and large code model.
+ if (CM != CodeModel::Small && CM != CodeModel::Large)
return false;
- // Check the calling convention.
- ImmutableCallSite CS(CI);
- CallingConv::ID CC = CS.getCallingConv();
+ // FIXME: Add large code model support for ELF.
+ if (CM == CodeModel::Large && !Subtarget->isTargetMachO())
+ return false;
// Let SDISel handle vararg functions.
- PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
- FunctionType *FTy = cast<FunctionType>(PT->getElementType());
- if (FTy->isVarArg())
+ if (IsVarArg)
return false;
- // Handle *simple* calls for now.
+ // FIXME: Only handle *simple* calls for now.
MVT RetVT;
- Type *RetTy = I->getType();
- if (RetTy->isVoidTy())
+ if (CLI.RetTy->isVoidTy())
RetVT = MVT::isVoid;
- else if (!isTypeLegal(RetTy, RetVT))
+ else if (!isTypeLegal(CLI.RetTy, RetVT))
return false;
- // Set up the argument vectors.
- SmallVector<Value *, 8> Args;
- SmallVector<unsigned, 8> ArgRegs;
- SmallVector<MVT, 8> ArgVTs;
- SmallVector<ISD::ArgFlagsTy, 8> ArgFlags;
- Args.reserve(CS.arg_size());
- ArgRegs.reserve(CS.arg_size());
- ArgVTs.reserve(CS.arg_size());
- ArgFlags.reserve(CS.arg_size());
-
- for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
- i != e; ++i) {
- // If we're lowering a memory intrinsic instead of a regular call, skip the
- // last two arguments, which shouldn't be passed to the underlying function.
- if (IntrMemName && e - i <= 2)
- break;
-
- unsigned Arg = getRegForValue(*i);
- if (Arg == 0)
+ for (auto Flag : CLI.OutFlags)
+ if (Flag.isInReg() || Flag.isSRet() || Flag.isNest() || Flag.isByVal())
return false;
- ISD::ArgFlagsTy Flags;
- unsigned AttrInd = i - CS.arg_begin() + 1;
- if (CS.paramHasAttr(AttrInd, Attribute::SExt))
- Flags.setSExt();
- if (CS.paramHasAttr(AttrInd, Attribute::ZExt))
- Flags.setZExt();
-
- // FIXME: Only handle *easy* calls for now.
- if (CS.paramHasAttr(AttrInd, Attribute::InReg) ||
- CS.paramHasAttr(AttrInd, Attribute::StructRet) ||
- CS.paramHasAttr(AttrInd, Attribute::Nest) ||
- CS.paramHasAttr(AttrInd, Attribute::ByVal))
- return false;
+ // Set up the argument vectors.
+ SmallVector<MVT, 16> OutVTs;
+ OutVTs.reserve(CLI.OutVals.size());
- MVT ArgVT;
- Type *ArgTy = (*i)->getType();
- if (!isTypeLegal(ArgTy, ArgVT) &&
- !(ArgVT == MVT::i1 || ArgVT == MVT::i8 || ArgVT == MVT::i16))
+ for (auto *Val : CLI.OutVals) {
+ MVT VT;
+ if (!isTypeLegal(Val->getType(), VT) &&
+ !(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16))
return false;
// We don't handle vector parameters yet.
- if (ArgVT.isVector() || ArgVT.getSizeInBits() > 64)
+ if (VT.isVector() || VT.getSizeInBits() > 64)
return false;
- unsigned OriginalAlignment = DL.getABITypeAlignment(ArgTy);
- Flags.setOrigAlign(OriginalAlignment);
-
- Args.push_back(*i);
- ArgRegs.push_back(Arg);
- ArgVTs.push_back(ArgVT);
- ArgFlags.push_back(Flags);
+ OutVTs.push_back(VT);
}
+ Address Addr;
+ if (!ComputeCallAddress(Callee, Addr))
+ return false;
+
// Handle the arguments now that we've gotten them.
- SmallVector<unsigned, 4> RegArgs;
unsigned NumBytes;
- if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags, RegArgs, CC, NumBytes))
+ if (!ProcessCallArgs(CLI, OutVTs, NumBytes))
return false;
// Issue the call.
MachineInstrBuilder MIB;
- MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BL));
- if (!IntrMemName)
- MIB.addGlobalAddress(GV, 0, 0);
- else
- MIB.addExternalSymbol(IntrMemName, 0);
+ if (CM == CodeModel::Small) {
+ unsigned CallOpc = Addr.getReg() ? AArch64::BLR : AArch64::BL;
+ MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CallOpc));
+ if (SymName)
+ MIB.addExternalSymbol(SymName, 0);
+ else if (Addr.getGlobalValue())
+ MIB.addGlobalAddress(Addr.getGlobalValue(), 0, 0);
+ else if (Addr.getReg())
+ MIB.addReg(Addr.getReg());
+ else
+ return false;
+ } else {
+ unsigned CallReg = 0;
+ if (SymName) {
+ unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
+ ADRPReg)
+ .addExternalSymbol(SymName, AArch64II::MO_GOT | AArch64II::MO_PAGE);
+
+ CallReg = createResultReg(&AArch64::GPR64RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::LDRXui),
+ CallReg)
+ .addReg(ADRPReg)
+ .addExternalSymbol(SymName, AArch64II::MO_GOT | AArch64II::MO_PAGEOFF |
+ AArch64II::MO_NC);
+ } else if (Addr.getGlobalValue()) {
+ CallReg = AArch64MaterializeGV(Addr.getGlobalValue());
+ } else if (Addr.getReg())
+ CallReg = Addr.getReg();
+
+ if (!CallReg)
+ return false;
+
+ MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(AArch64::BLR)).addReg(CallReg);
+ }
// Add implicit physical register uses to the call.
- for (unsigned i = 0, e = RegArgs.size(); i != e; ++i)
- MIB.addReg(RegArgs[i], RegState::Implicit);
+ for (auto Reg : CLI.OutRegs)
+ MIB.addReg(Reg, RegState::Implicit);
// Add a register mask with the call-preserved registers.
// Proper defs for return values will be added by setPhysRegsDeadExcept().
- MIB.addRegMask(TRI.getCallPreservedMask(CS.getCallingConv()));
-
- // Finish off the call including any return values.
- SmallVector<unsigned, 4> UsedRegs;
- if (!FinishCall(RetVT, UsedRegs, I, CC, NumBytes))
- return false;
+ MIB.addRegMask(TRI.getCallPreservedMask(CC));
- // Set all unused physreg defs as dead.
- static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI);
+ CLI.Call = MIB;
- return true;
+ // Finish off the call including any return values.
+ return FinishCall(CLI, RetVT, NumBytes);
}
bool AArch64FastISel::IsMemCpySmall(uint64_t Len, unsigned Alignment) {
bool RV;
unsigned ResultReg;
RV = EmitLoad(VT, ResultReg, Src);
- assert(RV == true && "Should be able to handle this load.");
+ if (!RV)
+ return false;
+
RV = EmitStore(VT, ResultReg, Dest);
- assert(RV == true && "Should be able to handle this store.");
- (void)RV;
+ if (!RV)
+ return false;
int64_t Size = VT.getSizeInBits() / 8;
Len -= Size;
return true;
}
-bool AArch64FastISel::SelectIntrinsicCall(const IntrinsicInst &I) {
- // FIXME: Handle more intrinsics.
- switch (I.getIntrinsicID()) {
- default:
+/// \brief Check if it is possible to fold the condition from the XALU intrinsic
+/// into the user. The condition code will only be updated on success.
+bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC,
+ const Instruction *I,
+ const Value *Cond) {
+ if (!isa<ExtractValueInst>(Cond))
+ return false;
+
+ const auto *EV = cast<ExtractValueInst>(Cond);
+ if (!isa<IntrinsicInst>(EV->getAggregateOperand()))
return false;
+
+ const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand());
+ MVT RetVT;
+ const Function *Callee = II->getCalledFunction();
+ Type *RetTy =
+ cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U);
+ if (!isTypeLegal(RetTy, RetVT))
+ return false;
+
+ if (RetVT != MVT::i32 && RetVT != MVT::i64)
+ return false;
+
+ AArch64CC::CondCode TmpCC;
+ switch (II->getIntrinsicID()) {
+ default: return false;
+ case Intrinsic::sadd_with_overflow:
+ case Intrinsic::ssub_with_overflow: TmpCC = AArch64CC::VS; break;
+ case Intrinsic::uadd_with_overflow: TmpCC = AArch64CC::HS; break;
+ case Intrinsic::usub_with_overflow: TmpCC = AArch64CC::LO; break;
+ case Intrinsic::smul_with_overflow:
+ case Intrinsic::umul_with_overflow: TmpCC = AArch64CC::NE; break;
+ }
+
+ // Check if both instructions are in the same basic block.
+ if (II->getParent() != I->getParent())
+ return false;
+
+ // Make sure nothing is in the way
+ BasicBlock::const_iterator Start = I;
+ BasicBlock::const_iterator End = II;
+ for (auto Itr = std::prev(Start); Itr != End; --Itr) {
+ // We only expect extractvalue instructions between the intrinsic and the
+ // instruction to be selected.
+ if (!isa<ExtractValueInst>(Itr))
+ return false;
+
+ // Check that the extractvalue operand comes from the intrinsic.
+ const auto *EVI = cast<ExtractValueInst>(Itr);
+ if (EVI->getAggregateOperand() != II)
+ return false;
+ }
+
+ CC = TmpCC;
+ return true;
+}
+
+bool AArch64FastISel::FastLowerIntrinsicCall(const IntrinsicInst *II) {
+ // FIXME: Handle more intrinsics.
+ switch (II->getIntrinsicID()) {
+ default: return false;
+ case Intrinsic::frameaddress: {
+ MachineFrameInfo *MFI = FuncInfo.MF->getFrameInfo();
+ MFI->setFrameAddressIsTaken(true);
+
+ const AArch64RegisterInfo *RegInfo =
+ static_cast<const AArch64RegisterInfo *>(
+ TM.getSubtargetImpl()->getRegisterInfo());
+ unsigned FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF));
+ unsigned SrcReg = FramePtr;
+
+ // Recursively load frame address
+ // ldr x0, [fp]
+ // ldr x0, [x0]
+ // ldr x0, [x0]
+ // ...
+ unsigned DestReg;
+ unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue();
+ while (Depth--) {
+ DestReg = createResultReg(&AArch64::GPR64RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(AArch64::LDRXui), DestReg)
+ .addReg(SrcReg).addImm(0);
+ SrcReg = DestReg;
+ }
+
+ UpdateValueMap(II, SrcReg);
+ return true;
+ }
case Intrinsic::memcpy:
case Intrinsic::memmove: {
- const MemTransferInst &MTI = cast<MemTransferInst>(I);
+ const auto *MTI = cast<MemTransferInst>(II);
// Don't handle volatile.
- if (MTI.isVolatile())
+ if (MTI->isVolatile())
return false;
// Disable inlining for memmove before calls to ComputeAddress. Otherwise,
// we would emit dead code because we don't currently handle memmoves.
- bool isMemCpy = (I.getIntrinsicID() == Intrinsic::memcpy);
- if (isa<ConstantInt>(MTI.getLength()) && isMemCpy) {
+ bool IsMemCpy = (II->getIntrinsicID() == Intrinsic::memcpy);
+ if (isa<ConstantInt>(MTI->getLength()) && IsMemCpy) {
// Small memcpy's are common enough that we want to do them without a call
// if possible.
- uint64_t Len = cast<ConstantInt>(MTI.getLength())->getZExtValue();
- unsigned Alignment = MTI.getAlignment();
+ uint64_t Len = cast<ConstantInt>(MTI->getLength())->getZExtValue();
+ unsigned Alignment = MTI->getAlignment();
if (IsMemCpySmall(Len, Alignment)) {
Address Dest, Src;
- if (!ComputeAddress(MTI.getRawDest(), Dest) ||
- !ComputeAddress(MTI.getRawSource(), Src))
+ if (!ComputeAddress(MTI->getRawDest(), Dest) ||
+ !ComputeAddress(MTI->getRawSource(), Src))
return false;
if (TryEmitSmallMemCpy(Dest, Src, Len, Alignment))
return true;
}
}
- if (!MTI.getLength()->getType()->isIntegerTy(64))
+ if (!MTI->getLength()->getType()->isIntegerTy(64))
return false;
- if (MTI.getSourceAddressSpace() > 255 || MTI.getDestAddressSpace() > 255)
+ if (MTI->getSourceAddressSpace() > 255 || MTI->getDestAddressSpace() > 255)
// Fast instruction selection doesn't support the special
// address spaces.
return false;
- const char *IntrMemName = isa<MemCpyInst>(I) ? "memcpy" : "memmove";
- return SelectCall(&I, IntrMemName);
+ const char *IntrMemName = isa<MemCpyInst>(II) ? "memcpy" : "memmove";
+ return LowerCallTo(II, IntrMemName, II->getNumArgOperands() - 2);
}
case Intrinsic::memset: {
- const MemSetInst &MSI = cast<MemSetInst>(I);
+ const MemSetInst *MSI = cast<MemSetInst>(II);
// Don't handle volatile.
- if (MSI.isVolatile())
+ if (MSI->isVolatile())
return false;
- if (!MSI.getLength()->getType()->isIntegerTy(64))
+ if (!MSI->getLength()->getType()->isIntegerTy(64))
return false;
- if (MSI.getDestAddressSpace() > 255)
+ if (MSI->getDestAddressSpace() > 255)
// Fast instruction selection doesn't support the special
// address spaces.
return false;
- return SelectCall(&I, "memset");
+ return LowerCallTo(II, "memset", II->getNumArgOperands() - 2);
}
case Intrinsic::trap: {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BRK))
.addImm(1);
return true;
}
+ case Intrinsic::sqrt: {
+ Type *RetTy = II->getCalledFunction()->getReturnType();
+
+ MVT VT;
+ if (!isTypeLegal(RetTy, VT))
+ return false;
+
+ unsigned Op0Reg = getRegForValue(II->getOperand(0));
+ if (!Op0Reg)
+ return false;
+ bool Op0IsKill = hasTrivialKill(II->getOperand(0));
+
+ unsigned ResultReg = FastEmit_r(VT, VT, ISD::FSQRT, Op0Reg, Op0IsKill);
+ if (!ResultReg)
+ return false;
+
+ UpdateValueMap(II, ResultReg);
+ return true;
+ }
+ case Intrinsic::sadd_with_overflow:
+ case Intrinsic::uadd_with_overflow:
+ case Intrinsic::ssub_with_overflow:
+ case Intrinsic::usub_with_overflow:
+ case Intrinsic::smul_with_overflow:
+ case Intrinsic::umul_with_overflow: {
+ // This implements the basic lowering of the xalu with overflow intrinsics.
+ const Function *Callee = II->getCalledFunction();
+ auto *Ty = cast<StructType>(Callee->getReturnType());
+ Type *RetTy = Ty->getTypeAtIndex(0U);
+ Type *CondTy = Ty->getTypeAtIndex(1);
+
+ MVT VT;
+ if (!isTypeLegal(RetTy, VT))
+ return false;
+
+ if (VT != MVT::i32 && VT != MVT::i64)
+ return false;
+
+ const Value *LHS = II->getArgOperand(0);
+ const Value *RHS = II->getArgOperand(1);
+ // Canonicalize immediate to the RHS.
+ if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) &&
+ isCommutativeIntrinsic(II))
+ std::swap(LHS, RHS);
+
+ unsigned LHSReg = getRegForValue(LHS);
+ if (!LHSReg)
+ return false;
+ bool LHSIsKill = hasTrivialKill(LHS);
+
+ // Check if the immediate can be encoded in the instruction and if we should
+ // invert the instruction (adds -> subs) to handle negative immediates.
+ bool UseImm = false;
+ bool UseInverse = false;
+ uint64_t Imm = 0;
+ if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
+ if (C->isNegative()) {
+ UseInverse = true;
+ Imm = -(C->getSExtValue());
+ } else
+ Imm = C->getZExtValue();
+
+ if (isUInt<12>(Imm))
+ UseImm = true;
+
+ UseInverse = UseImm && UseInverse;
+ }
+
+ static const unsigned OpcTable[2][2][2] = {
+ { {AArch64::ADDSWrr, AArch64::ADDSXrr},
+ {AArch64::ADDSWri, AArch64::ADDSXri} },
+ { {AArch64::SUBSWrr, AArch64::SUBSXrr},
+ {AArch64::SUBSWri, AArch64::SUBSXri} }
+ };
+ unsigned Opc = 0;
+ unsigned MulReg = 0;
+ unsigned RHSReg = 0;
+ bool RHSIsKill = false;
+ AArch64CC::CondCode CC = AArch64CC::Invalid;
+ bool Is64Bit = VT == MVT::i64;
+ switch (II->getIntrinsicID()) {
+ default: llvm_unreachable("Unexpected intrinsic!");
+ case Intrinsic::sadd_with_overflow:
+ Opc = OpcTable[UseInverse][UseImm][Is64Bit]; CC = AArch64CC::VS; break;
+ case Intrinsic::uadd_with_overflow:
+ Opc = OpcTable[UseInverse][UseImm][Is64Bit]; CC = AArch64CC::HS; break;
+ case Intrinsic::ssub_with_overflow:
+ Opc = OpcTable[!UseInverse][UseImm][Is64Bit]; CC = AArch64CC::VS; break;
+ case Intrinsic::usub_with_overflow:
+ Opc = OpcTable[!UseInverse][UseImm][Is64Bit]; CC = AArch64CC::LO; break;
+ case Intrinsic::smul_with_overflow: {
+ CC = AArch64CC::NE;
+ RHSReg = getRegForValue(RHS);
+ if (!RHSReg)
+ return false;
+ RHSIsKill = hasTrivialKill(RHS);
+
+ if (VT == MVT::i32) {
+ MulReg = Emit_SMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
+ unsigned ShiftReg = Emit_LSR_ri(MVT::i64, MulReg, false, 32);
+ MulReg = FastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true,
+ AArch64::sub_32);
+ ShiftReg = FastEmitInst_extractsubreg(VT, ShiftReg, /*IsKill=*/true,
+ AArch64::sub_32);
+ unsigned CmpReg = createResultReg(TLI.getRegClassFor(VT));
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(AArch64::SUBSWrs), CmpReg)
+ .addReg(ShiftReg, getKillRegState(true))
+ .addReg(MulReg, getKillRegState(false))
+ .addImm(159); // 159 <-> asr #31
+ } else {
+ assert(VT == MVT::i64 && "Unexpected value type.");
+ MulReg = Emit_MUL_rr(VT, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
+ unsigned SMULHReg = FastEmit_rr(VT, VT, ISD::MULHS, LHSReg, LHSIsKill,
+ RHSReg, RHSIsKill);
+ unsigned CmpReg = createResultReg(TLI.getRegClassFor(VT));
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(AArch64::SUBSXrs), CmpReg)
+ .addReg(SMULHReg, getKillRegState(true))
+ .addReg(MulReg, getKillRegState(false))
+ .addImm(191); // 191 <-> asr #63
+ }
+ break;
+ }
+ case Intrinsic::umul_with_overflow: {
+ CC = AArch64CC::NE;
+ RHSReg = getRegForValue(RHS);
+ if (!RHSReg)
+ return false;
+ RHSIsKill = hasTrivialKill(RHS);
+
+ if (VT == MVT::i32) {
+ MulReg = Emit_UMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
+ unsigned CmpReg = createResultReg(TLI.getRegClassFor(MVT::i64));
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(AArch64::SUBSXrs), CmpReg)
+ .addReg(AArch64::XZR, getKillRegState(true))
+ .addReg(MulReg, getKillRegState(false))
+ .addImm(96); // 96 <-> lsr #32
+ MulReg = FastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true,
+ AArch64::sub_32);
+ } else {
+ assert(VT == MVT::i64 && "Unexpected value type.");
+ MulReg = Emit_MUL_rr(VT, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
+ unsigned UMULHReg = FastEmit_rr(VT, VT, ISD::MULHU, LHSReg, LHSIsKill,
+ RHSReg, RHSIsKill);
+ unsigned CmpReg = createResultReg(TLI.getRegClassFor(VT));
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(AArch64::SUBSXrr), CmpReg)
+ .addReg(AArch64::XZR, getKillRegState(true))
+ .addReg(UMULHReg, getKillRegState(false));
+ }
+ break;
+ }
+ }
+
+ if (!UseImm) {
+ RHSReg = getRegForValue(RHS);
+ if (!RHSReg)
+ return false;
+ RHSIsKill = hasTrivialKill(RHS);
+ }
+
+ unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
+ if (Opc) {
+ MachineInstrBuilder MIB;
+ MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc),
+ ResultReg)
+ .addReg(LHSReg, getKillRegState(LHSIsKill));
+ if (UseImm) {
+ MIB.addImm(Imm);
+ MIB.addImm(0);
+ } else
+ MIB.addReg(RHSReg, getKillRegState(RHSIsKill));
+ }
+ else
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(MulReg);
+
+ unsigned ResultReg2 = FuncInfo.CreateRegs(CondTy);
+ assert((ResultReg+1) == ResultReg2 && "Nonconsecutive result registers.");
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
+ ResultReg2)
+ .addReg(AArch64::WZR, getKillRegState(true))
+ .addReg(AArch64::WZR, getKillRegState(true))
+ .addImm(getInvertedCondCode(CC));
+
+ UpdateValueMap(II, ResultReg, 2);
+ return true;
+ }
}
return false;
}
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ValLocs;
- CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, TM, ValLocs,
- I->getContext());
+ CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext());
CCAssignFn *RetCC = CC == CallingConv::WebKit_JS ? RetCC_AArch64_WebKit_JS
: RetCC_AArch64_AAPCS;
CCInfo.AnalyzeReturn(Outs, RetCC);
}
}
+unsigned AArch64FastISel::Emit_MUL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
+ unsigned Op1, bool Op1IsKill) {
+ unsigned Opc, ZReg;
+ switch (RetVT.SimpleTy) {
+ default: return 0;
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ RetVT = MVT::i32;
+ Opc = AArch64::MADDWrrr; ZReg = AArch64::WZR; break;
+ case MVT::i64:
+ Opc = AArch64::MADDXrrr; ZReg = AArch64::XZR; break;
+ }
+
+ // Create the base instruction, then add the operands.
+ unsigned ResultReg = createResultReg(TLI.getRegClassFor(RetVT));
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
+ .addReg(Op0, getKillRegState(Op0IsKill))
+ .addReg(Op1, getKillRegState(Op1IsKill))
+ .addReg(ZReg, getKillRegState(true));
+
+ return ResultReg;
+}
+
+unsigned AArch64FastISel::Emit_SMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
+ unsigned Op1, bool Op1IsKill) {
+ if (RetVT != MVT::i64)
+ return 0;
+
+ // Create the base instruction, then add the operands.
+ unsigned ResultReg = createResultReg(&AArch64::GPR64RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::SMADDLrrr),
+ ResultReg)
+ .addReg(Op0, getKillRegState(Op0IsKill))
+ .addReg(Op1, getKillRegState(Op1IsKill))
+ .addReg(AArch64::XZR, getKillRegState(true));
+
+ return ResultReg;
+}
+
+unsigned AArch64FastISel::Emit_UMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
+ unsigned Op1, bool Op1IsKill) {
+ if (RetVT != MVT::i64)
+ return 0;
+
+ // Create the base instruction, then add the operands.
+ unsigned ResultReg = createResultReg(&AArch64::GPR64RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::UMADDLrrr),
+ ResultReg)
+ .addReg(Op0, getKillRegState(Op0IsKill))
+ .addReg(Op1, getKillRegState(Op1IsKill))
+ .addReg(AArch64::XZR, getKillRegState(true));
+
+ return ResultReg;
+}
+
+unsigned AArch64FastISel::Emit_LSL_ri(MVT RetVT, unsigned Op0, bool Op0IsKill,
+ uint64_t Shift) {
+ unsigned Opc, ImmR, ImmS;
+ switch (RetVT.SimpleTy) {
+ default: return 0;
+ case MVT::i8:
+ Opc = AArch64::UBFMWri; ImmR = -Shift % 32; ImmS = 7 - Shift; break;
+ case MVT::i16:
+ Opc = AArch64::UBFMWri; ImmR = -Shift % 32; ImmS = 15 - Shift; break;
+ case MVT::i32:
+ Opc = AArch64::UBFMWri; ImmR = -Shift % 32; ImmS = 31 - Shift; break;
+ case MVT::i64:
+ Opc = AArch64::UBFMXri; ImmR = -Shift % 64; ImmS = 63 - Shift; break;
+ }
+
+ RetVT.SimpleTy = std::max(MVT::i32, RetVT.SimpleTy);
+ return FastEmitInst_rii(Opc, TLI.getRegClassFor(RetVT), Op0, Op0IsKill, ImmR,
+ ImmS);
+}
+
+unsigned AArch64FastISel::Emit_LSR_ri(MVT RetVT, unsigned Op0, bool Op0IsKill,
+ uint64_t Shift) {
+ unsigned Opc, ImmS;
+ switch (RetVT.SimpleTy) {
+ default: return 0;
+ case MVT::i8: Opc = AArch64::UBFMWri; ImmS = 7; break;
+ case MVT::i16: Opc = AArch64::UBFMWri; ImmS = 15; break;
+ case MVT::i32: Opc = AArch64::UBFMWri; ImmS = 31; break;
+ case MVT::i64: Opc = AArch64::UBFMXri; ImmS = 63; break;
+ }
+
+ RetVT.SimpleTy = std::max(MVT::i32, RetVT.SimpleTy);
+ return FastEmitInst_rii(Opc, TLI.getRegClassFor(RetVT), Op0, Op0IsKill, Shift,
+ ImmS);
+}
+
+unsigned AArch64FastISel::Emit_ASR_ri(MVT RetVT, unsigned Op0, bool Op0IsKill,
+ uint64_t Shift) {
+ unsigned Opc, ImmS;
+ switch (RetVT.SimpleTy) {
+ default: return 0;
+ case MVT::i8: Opc = AArch64::SBFMWri; ImmS = 7; break;
+ case MVT::i16: Opc = AArch64::SBFMWri; ImmS = 15; break;
+ case MVT::i32: Opc = AArch64::SBFMWri; ImmS = 31; break;
+ case MVT::i64: Opc = AArch64::SBFMXri; ImmS = 63; break;
+ }
+
+ RetVT.SimpleTy = std::max(MVT::i32, RetVT.SimpleTy);
+ return FastEmitInst_rii(Opc, TLI.getRegClassFor(RetVT), Op0, Op0IsKill, Shift,
+ ImmS);
+}
+
unsigned AArch64FastISel::EmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
bool isZExt) {
assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?");
+
+ // FastISel does not have plumbing to deal with extensions where the SrcVT or
+ // DestVT are odd things, so test to make sure that they are both types we can
+ // handle (i1/i8/i16/i32 for SrcVT and i8/i16/i32/i64 for DestVT), otherwise
+ // bail out to SelectionDAG.
+ if (((DestVT != MVT::i8) && (DestVT != MVT::i16) &&
+ (DestVT != MVT::i32) && (DestVT != MVT::i64)) ||
+ ((SrcVT != MVT::i1) && (SrcVT != MVT::i8) &&
+ (SrcVT != MVT::i16) && (SrcVT != MVT::i32)))
+ return 0;
+
unsigned Opc;
unsigned Imm = 0;
MVT SrcVT = SrcEVT.getSimpleVT();
MVT DestVT = DestEVT.getSimpleVT();
- unsigned ResultReg = EmitIntExt(SrcVT, SrcReg, DestVT, isZExt);
- if (ResultReg == 0)
+ unsigned ResultReg = 0;
+
+ // Check if it is an argument and if it is already zero/sign-extended.
+ if (const auto *Arg = dyn_cast<Argument>(Src)) {
+ if ((isZExt && Arg->hasZExtAttr()) || (!isZExt && Arg->hasSExtAttr())) {
+ if (DestVT == MVT::i64) {
+ ResultReg = createResultReg(TLI.getRegClassFor(DestVT));
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(AArch64::SUBREG_TO_REG), ResultReg)
+ .addImm(0)
+ .addReg(SrcReg)
+ .addImm(AArch64::sub_32);
+ } else
+ ResultReg = SrcReg;
+ }
+ }
+
+ if (!ResultReg)
+ ResultReg = EmitIntExt(SrcVT, SrcReg, DestVT, isZExt);
+
+ if (!ResultReg)
return false;
+
UpdateValueMap(I, ResultReg);
return true;
}
SrcVT != MVT::i8)
return false;
- unsigned Opc;
- unsigned ZReg;
- switch (SrcVT.SimpleTy) {
- default:
- return false;
- case MVT::i8:
- case MVT::i16:
- case MVT::i32:
- ZReg = AArch64::WZR;
- Opc = AArch64::MADDWrrr;
- break;
- case MVT::i64:
- ZReg = AArch64::XZR;
- Opc = AArch64::MADDXrrr;
- break;
- }
-
unsigned Src0Reg = getRegForValue(I->getOperand(0));
if (!Src0Reg)
return false;
+ bool Src0IsKill = hasTrivialKill(I->getOperand(0));
unsigned Src1Reg = getRegForValue(I->getOperand(1));
if (!Src1Reg)
return false;
+ bool Src1IsKill = hasTrivialKill(I->getOperand(1));
+
+ unsigned ResultReg =
+ Emit_MUL_rr(SrcVT, Src0Reg, Src0IsKill, Src1Reg, Src1IsKill);
+
+ if (!ResultReg)
+ return false;
+
+ UpdateValueMap(I, ResultReg);
+ return true;
+}
+
+bool AArch64FastISel::SelectShift(const Instruction *I, bool IsLeftShift,
+ bool IsArithmetic) {
+ EVT RetEVT = TLI.getValueType(I->getType(), true);
+ if (!RetEVT.isSimple())
+ return false;
+ MVT RetVT = RetEVT.getSimpleVT();
+
+ if (!isa<ConstantInt>(I->getOperand(1)))
+ return false;
+
+ unsigned Op0Reg = getRegForValue(I->getOperand(0));
+ if (!Op0Reg)
+ return false;
+ bool Op0IsKill = hasTrivialKill(I->getOperand(0));
+
+ uint64_t ShiftVal = cast<ConstantInt>(I->getOperand(1))->getZExtValue();
+
+ unsigned ResultReg;
+ if (IsLeftShift)
+ ResultReg = Emit_LSL_ri(RetVT, Op0Reg, Op0IsKill, ShiftVal);
+ else {
+ if (IsArithmetic)
+ ResultReg = Emit_ASR_ri(RetVT, Op0Reg, Op0IsKill, ShiftVal);
+ else
+ ResultReg = Emit_LSR_ri(RetVT, Op0Reg, Op0IsKill, ShiftVal);
+ }
+
+ if (!ResultReg)
+ return false;
+
+ UpdateValueMap(I, ResultReg);
+ return true;
+}
+
+bool AArch64FastISel::SelectBitCast(const Instruction *I) {
+ MVT RetVT, SrcVT;
+
+ if (!isTypeLegal(I->getOperand(0)->getType(), SrcVT))
+ return false;
+ if (!isTypeLegal(I->getType(), RetVT))
+ return false;
+
+ unsigned Opc;
+ if (RetVT == MVT::f32 && SrcVT == MVT::i32)
+ Opc = AArch64::FMOVWSr;
+ else if (RetVT == MVT::f64 && SrcVT == MVT::i64)
+ Opc = AArch64::FMOVXDr;
+ else if (RetVT == MVT::i32 && SrcVT == MVT::f32)
+ Opc = AArch64::FMOVSWr;
+ else if (RetVT == MVT::i64 && SrcVT == MVT::f64)
+ Opc = AArch64::FMOVDXr;
+ else
+ return false;
+
+ unsigned Op0Reg = getRegForValue(I->getOperand(0));
+ if (!Op0Reg)
+ return false;
+ bool Op0IsKill = hasTrivialKill(I->getOperand(0));
+ unsigned ResultReg = FastEmitInst_r(Opc, TLI.getRegClassFor(RetVT),
+ Op0Reg, Op0IsKill);
+
+ if (!ResultReg)
+ return false;
- // Create the base instruction, then add the operands.
- unsigned ResultReg = createResultReg(TLI.getRegClassFor(SrcVT));
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
- .addReg(Src0Reg)
- .addReg(Src1Reg)
- .addReg(ZReg);
UpdateValueMap(I, ResultReg);
return true;
}
return SelectRem(I, ISD::SREM);
case Instruction::URem:
return SelectRem(I, ISD::UREM);
- case Instruction::Call:
- if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
- return SelectIntrinsicCall(*II);
- return SelectCall(I);
case Instruction::Ret:
return SelectRet(I);
case Instruction::Trunc:
case Instruction::ZExt:
case Instruction::SExt:
return SelectIntExt(I);
+
+ // FIXME: All of these should really be handled by the target-independent
+ // selector -> improve FastISel tblgen.
case Instruction::Mul:
- // FIXME: This really should be handled by the target-independent selector.
return SelectMul(I);
+ case Instruction::Shl:
+ return SelectShift(I, /*IsLeftShift=*/true, /*IsArithmetic=*/false);
+ case Instruction::LShr:
+ return SelectShift(I, /*IsLeftShift=*/false, /*IsArithmetic=*/false);
+ case Instruction::AShr:
+ return SelectShift(I, /*IsLeftShift=*/false, /*IsArithmetic=*/true);
+ case Instruction::BitCast:
+ return SelectBitCast(I);
}
return false;
// Silence warnings.