#include "AArch64Subtarget.h"
#include "AArch64TargetMachine.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/FastISel.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
int FI;
} Base;
int64_t Offset;
+ const GlobalValue *GV;
public:
- Address() : Kind(RegBase), Offset(0) { Base.Reg = 0; }
+ Address() : Kind(RegBase), Offset(0), GV(nullptr) { Base.Reg = 0; }
void setKind(BaseKind K) { Kind = K; }
BaseKind getKind() const { return Kind; }
bool isRegBase() const { return Kind == RegBase; }
void setOffset(int64_t O) { Offset = O; }
int64_t getOffset() { return Offset; }
+ void setGlobalValue(const GlobalValue *G) { GV = G; }
+ const GlobalValue *getGlobalValue() { return GV; }
+
bool isValid() { return isFIBase() || (isRegBase() && getReg() != 0); }
};
const AArch64Subtarget *Subtarget;
LLVMContext *Context;
+ bool FastLowerArguments() override;
bool FastLowerCall(CallLoweringInfo &CLI) override;
bool FastLowerIntrinsicCall(const IntrinsicInst *II) override;
bool SelectIntExt(const Instruction *I);
bool SelectMul(const Instruction *I);
bool SelectShift(const Instruction *I, bool IsLeftShift, bool IsArithmetic);
+ bool SelectBitCast(const Instruction *I);
// Utility helper routines.
bool isTypeLegal(Type *Ty, MVT &VT);
bool isLoadStoreTypeLegal(Type *Ty, MVT &VT);
bool ComputeAddress(const Value *Obj, Address &Addr);
+ bool ComputeCallAddress(const Value *V, Address &Addr);
bool SimplifyAddress(Address &Addr, MVT VT, int64_t ScaleFactor,
bool UseUnscaled);
void AddLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB,
- unsigned Flags, bool UseUnscaled);
+ unsigned Flags, MachineMemOperand *MMO,
+ bool UseUnscaled);
bool IsMemCpySmall(uint64_t Len, unsigned Alignment);
bool TryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
unsigned Alignment);
// Emit functions.
bool EmitCmp(Value *Src1Value, Value *Src2Value, bool isZExt);
bool EmitLoad(MVT VT, unsigned &ResultReg, Address Addr,
- bool UseUnscaled = false);
+ MachineMemOperand *MMO = nullptr, bool UseUnscaled = false);
bool EmitStore(MVT VT, unsigned SrcReg, Address Addr,
- bool UseUnscaled = false);
+ MachineMemOperand *MMO = nullptr, bool UseUnscaled = false);
unsigned EmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt);
unsigned Emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt);
unsigned Emit_MUL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
unsigned Emit_LSR_ri(MVT RetVT, unsigned Op0, bool Op0IsKill, uint64_t Imm);
unsigned Emit_ASR_ri(MVT RetVT, unsigned Op0, bool Op0IsKill, uint64_t Imm);
+ unsigned AArch64MaterializeInt(const ConstantInt *CI, MVT VT);
unsigned AArch64MaterializeFP(const ConstantFP *CFP, MVT VT);
unsigned AArch64MaterializeGV(const GlobalValue *GV);
return 0;
}
+unsigned AArch64FastISel::AArch64MaterializeInt(const ConstantInt *CI, MVT VT) {
+ if (VT > MVT::i64)
+ return 0;
+ return FastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue());
+}
+
unsigned AArch64FastISel::AArch64MaterializeFP(const ConstantFP *CFP, MVT VT) {
if (VT != MVT::f32 && VT != MVT::f64)
return 0;
const APFloat Val = CFP->getValueAPF();
- bool is64bit = (VT == MVT::f64);
+ bool Is64Bit = (VT == MVT::f64);
// This checks to see if we can use FMOV instructions to materialize
// a constant, otherwise we have to materialize via the constant pool.
if (TLI.isFPImmLegal(Val, VT)) {
- int Imm;
- unsigned Opc;
- if (is64bit) {
- Imm = AArch64_AM::getFP64Imm(Val);
- Opc = AArch64::FMOVDi;
- } else {
- Imm = AArch64_AM::getFP32Imm(Val);
- Opc = AArch64::FMOVSi;
- }
unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
+ // Positive zero (+0.0) has to be materialized with a fmov from the zero
+ // register, because the immediate version of fmov cannot encode zero.
+ if (Val.isPosZero()) {
+ unsigned ZReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
+ unsigned Opc = Is64Bit ? AArch64::FMOVDr : AArch64::FMOVSr;
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
+ .addReg(ZReg, getKillRegState(true));
+ return ResultReg;
+ }
+ int Imm = Is64Bit ? AArch64_AM::getFP64Imm(Val)
+ : AArch64_AM::getFP32Imm(Val);
+ unsigned Opc = Is64Bit ? AArch64::FMOVDi : AArch64::FMOVSi;
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
- .addImm(Imm);
+ .addImm(Imm);
return ResultReg;
}
if (Align == 0)
Align = DL.getTypeAllocSize(CFP->getType());
- unsigned Idx = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align);
+ unsigned CPI = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align);
unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
- ADRPReg).addConstantPoolIndex(Idx, 0, AArch64II::MO_PAGE);
+ ADRPReg)
+ .addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGE);
- unsigned Opc = is64bit ? AArch64::LDRDui : AArch64::LDRSui;
+ unsigned Opc = Is64Bit ? AArch64::LDRDui : AArch64::LDRSui;
unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
- .addReg(ADRPReg)
- .addConstantPoolIndex(Idx, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
+ .addReg(ADRPReg)
+ .addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
return ResultReg;
}
// ADRP + LDRX
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
ADRPReg)
- .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGE);
+ .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGE);
ResultReg = createResultReg(&AArch64::GPR64RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::LDRXui),
ResultReg)
- .addReg(ADRPReg)
- .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGEOFF |
- AArch64II::MO_NC);
+ .addReg(ADRPReg)
+ .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGEOFF |
+ AArch64II::MO_NC);
} else {
// ADRP + ADDX
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
- ADRPReg).addGlobalAddress(GV, 0, AArch64II::MO_PAGE);
+ ADRPReg)
+ .addGlobalAddress(GV, 0, AArch64II::MO_PAGE);
ResultReg = createResultReg(&AArch64::GPR64spRegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
ResultReg)
- .addReg(ADRPReg)
- .addGlobalAddress(GV, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC)
- .addImm(0);
+ .addReg(ADRPReg)
+ .addGlobalAddress(GV, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC)
+ .addImm(0);
}
return ResultReg;
}
return 0;
MVT VT = CEVT.getSimpleVT();
- // FIXME: Handle ConstantInt.
- if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
+ if (const auto *CI = dyn_cast<ConstantInt>(C))
+ return AArch64MaterializeInt(CI, VT);
+ else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
return AArch64MaterializeFP(CFP, VT);
else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
return AArch64MaterializeGV(GV);
}
break;
}
+ case Instruction::Add:
+ // Adds of constants are common and easy enough.
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) {
+ Addr.setOffset(Addr.getOffset() + (uint64_t)CI->getSExtValue());
+ return ComputeAddress(U->getOperand(0), Addr);
+ }
+ break;
}
// Try to get this in a register if nothing else has worked.
return Addr.isValid();
}
+bool AArch64FastISel::ComputeCallAddress(const Value *V, Address &Addr) {
+ const User *U = nullptr;
+ unsigned Opcode = Instruction::UserOp1;
+ bool InMBB = true;
+
+ if (const auto *I = dyn_cast<Instruction>(V)) {
+ Opcode = I->getOpcode();
+ U = I;
+ InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock();
+ } else if (const auto *C = dyn_cast<ConstantExpr>(V)) {
+ Opcode = C->getOpcode();
+ U = C;
+ }
+
+ switch (Opcode) {
+ default: break;
+ case Instruction::BitCast:
+ // Look past bitcasts if its operand is in the same BB.
+ if (InMBB)
+ return ComputeCallAddress(U->getOperand(0), Addr);
+ break;
+ case Instruction::IntToPtr:
+ // Look past no-op inttoptrs if its operand is in the same BB.
+ if (InMBB &&
+ TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
+ return ComputeCallAddress(U->getOperand(0), Addr);
+ break;
+ case Instruction::PtrToInt:
+ // Look past no-op ptrtoints if its operand is in the same BB.
+ if (InMBB &&
+ TLI.getValueType(U->getType()) == TLI.getPointerTy())
+ return ComputeCallAddress(U->getOperand(0), Addr);
+ break;
+ }
+
+ if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
+ Addr.setGlobalValue(GV);
+ return true;
+ }
+
+ // If all else fails, try to materialize the value in a register.
+ if (!Addr.getGlobalValue()) {
+ Addr.setReg(getRegForValue(V));
+ return Addr.getReg() != 0;
+ }
+
+ return false;
+}
+
+
bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) {
EVT evt = TLI.getValueType(Ty, true);
void AArch64FastISel::AddLoadStoreOperands(Address &Addr,
const MachineInstrBuilder &MIB,
- unsigned Flags, bool UseUnscaled) {
+ unsigned Flags,
+ MachineMemOperand *MMO,
+ bool UseUnscaled) {
int64_t Offset = Addr.getOffset();
// Frame base works a bit differently. Handle it separately.
if (Addr.getKind() == Address::FrameIndexBase) {
int FI = Addr.getFI();
// FIXME: We shouldn't be using getObjectSize/getObjectAlignment. The size
// and alignment should be based on the VT.
- MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
- MachinePointerInfo::getFixedStack(FI, Offset), Flags,
- MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
+ MMO = FuncInfo.MF->getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(FI, Offset), Flags,
+ MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
// Now add the rest of the operands.
- MIB.addFrameIndex(FI).addImm(Offset).addMemOperand(MMO);
+ MIB.addFrameIndex(FI).addImm(Offset);
} else {
// Now add the rest of the operands.
MIB.addReg(Addr.getReg());
MIB.addImm(Offset);
}
+
+ if (MMO)
+ MIB.addMemOperand(MMO);
}
bool AArch64FastISel::EmitLoad(MVT VT, unsigned &ResultReg, Address Addr,
- bool UseUnscaled) {
+ MachineMemOperand *MMO, bool UseUnscaled) {
// Negative offsets require unscaled, 9-bit, signed immediate offsets.
// Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
if (!UseUnscaled && Addr.getOffset() < 0)
int64_t Offset = Addr.getOffset();
if (Offset & (ScaleFactor - 1))
// Retry using an unscaled, 9-bit, signed immediate offset.
- return EmitLoad(VT, ResultReg, Addr, /*UseUnscaled*/ true);
+ return EmitLoad(VT, ResultReg, Addr, MMO, /*UseUnscaled*/ true);
Addr.setOffset(Offset / ScaleFactor);
}
ResultReg = createResultReg(RC);
MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(Opc), ResultReg);
- AddLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, UseUnscaled);
+ AddLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, MMO, UseUnscaled);
// Loading an i1 requires special handling.
if (VTIsi1) {
return false;
unsigned ResultReg;
- if (!EmitLoad(VT, ResultReg, Addr))
+ if (!EmitLoad(VT, ResultReg, Addr, createMachineMemOperandFor(I)))
return false;
UpdateValueMap(I, ResultReg);
}
bool AArch64FastISel::EmitStore(MVT VT, unsigned SrcReg, Address Addr,
- bool UseUnscaled) {
+ MachineMemOperand *MMO, bool UseUnscaled) {
// Negative offsets require unscaled, 9-bit, signed immediate offsets.
// Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
if (!UseUnscaled && Addr.getOffset() < 0)
int64_t Offset = Addr.getOffset();
if (Offset & (ScaleFactor - 1))
// Retry using an unscaled, 9-bit, signed immediate offset.
- return EmitStore(VT, SrcReg, Addr, /*UseUnscaled*/ true);
+ return EmitStore(VT, SrcReg, Addr, MMO, /*UseUnscaled*/ true);
Addr.setOffset(Offset / ScaleFactor);
}
// Create the base instruction, then add the operands.
MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(StrOpc)).addReg(SrcReg);
- AddLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, UseUnscaled);
+ AddLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, MMO, UseUnscaled);
+
return true;
}
if (!ComputeAddress(I->getOperand(1), Addr))
return false;
- if (!EmitStore(VT, SrcReg, Addr))
+ if (!EmitStore(VT, SrcReg, Addr, createMachineMemOperandFor(I)))
return false;
return true;
}
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
.addImm(CC)
.addMBB(TBB);
- FuncInfo.MBB->addSuccessor(TBB);
+
+ // Obtain the branch weight and add the TrueBB to the successor list.
+ uint32_t BranchWeight = 0;
+ if (FuncInfo.BPI)
+ BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
+ TBB->getBasicBlock());
+ FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
FastEmitBranch(FBB, DbgLoc);
return true;
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
.addImm(CC)
.addMBB(TBB);
- FuncInfo.MBB->addSuccessor(TBB);
+
+ // Obtain the branch weight and add the TrueBB to the successor list.
+ uint32_t BranchWeight = 0;
+ if (FuncInfo.BPI)
+ BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
+ TBB->getBasicBlock());
+ FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
+
FastEmitBranch(FBB, DbgLoc);
return true;
}
MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::B))
.addMBB(Target);
- FuncInfo.MBB->addSuccessor(Target);
+
+ // Obtain the branch weight and add the target to the successor list.
+ uint32_t BranchWeight = 0;
+ if (FuncInfo.BPI)
+ BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
+ Target->getBasicBlock());
+ FuncInfo.MBB->addSuccessor(Target, BranchWeight);
return true;
} else if (foldXALUIntrinsic(CC, I, BI->getCondition())) {
// Fake request the condition, otherwise the intrinsic might be completely
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
.addImm(CC)
.addMBB(TBB);
- FuncInfo.MBB->addSuccessor(TBB);
+
+ // Obtain the branch weight and add the TrueBB to the successor list.
+ uint32_t BranchWeight = 0;
+ if (FuncInfo.BPI)
+ BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
+ TBB->getBasicBlock());
+ FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
FastEmitBranch(FBB, DbgLoc);
return true;
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
.addImm(CC)
.addMBB(TBB);
- FuncInfo.MBB->addSuccessor(TBB);
+
+ // Obtain the branch weight and add the TrueBB to the successor list.
+ uint32_t BranchWeight = 0;
+ if (FuncInfo.BPI)
+ BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
+ TBB->getBasicBlock());
+ FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
+
FastEmitBranch(FBB, DbgLoc);
return true;
}
DestVT != MVT::f64)
return false;
- unsigned CondReg = getRegForValue(SI->getCondition());
- if (CondReg == 0)
- return false;
- unsigned TrueReg = getRegForValue(SI->getTrueValue());
- if (TrueReg == 0)
- return false;
- unsigned FalseReg = getRegForValue(SI->getFalseValue());
- if (FalseReg == 0)
- return false;
+ unsigned SelectOpc;
+ switch (DestVT.SimpleTy) {
+ default: return false;
+ case MVT::i32: SelectOpc = AArch64::CSELWr; break;
+ case MVT::i64: SelectOpc = AArch64::CSELXr; break;
+ case MVT::f32: SelectOpc = AArch64::FCSELSrrr; break;
+ case MVT::f64: SelectOpc = AArch64::FCSELDrrr; break;
+ }
+ const Value *Cond = SI->getCondition();
+ bool NeedTest = true;
+ AArch64CC::CondCode CC = AArch64CC::NE;
+ if (foldXALUIntrinsic(CC, I, Cond))
+ NeedTest = false;
- MRI.constrainRegClass(CondReg, &AArch64::GPR32RegClass);
- unsigned ANDReg = createResultReg(&AArch64::GPR32spRegClass);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ANDWri),
- ANDReg)
- .addReg(CondReg)
+ unsigned CondReg = getRegForValue(Cond);
+ if (!CondReg)
+ return false;
+ bool CondIsKill = hasTrivialKill(Cond);
+
+ if (NeedTest) {
+ MRI.constrainRegClass(CondReg, &AArch64::GPR32RegClass);
+ unsigned ANDReg = createResultReg(&AArch64::GPR32spRegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ANDWri),
+ ANDReg)
+ .addReg(CondReg, getKillRegState(CondIsKill))
.addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::SUBSWri))
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::SUBSWri))
.addReg(ANDReg)
.addReg(ANDReg)
.addImm(0)
.addImm(0);
+ }
- unsigned SelectOpc;
- switch (DestVT.SimpleTy) {
- default:
+ unsigned TrueReg = getRegForValue(SI->getTrueValue());
+ bool TrueIsKill = hasTrivialKill(SI->getTrueValue());
+
+ unsigned FalseReg = getRegForValue(SI->getFalseValue());
+ bool FalseIsKill = hasTrivialKill(SI->getFalseValue());
+
+ if (!TrueReg || !FalseReg)
return false;
- case MVT::i32:
- SelectOpc = AArch64::CSELWr;
- break;
- case MVT::i64:
- SelectOpc = AArch64::CSELXr;
- break;
- case MVT::f32:
- SelectOpc = AArch64::FCSELSrrr;
- break;
- case MVT::f64:
- SelectOpc = AArch64::FCSELDrrr;
- break;
- }
unsigned ResultReg = createResultReg(TLI.getRegClassFor(DestVT));
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SelectOpc),
ResultReg)
- .addReg(TrueReg)
- .addReg(FalseReg)
- .addImm(AArch64CC::NE);
+ .addReg(TrueReg, getKillRegState(TrueIsKill))
+ .addReg(FalseReg, getKillRegState(FalseIsKill))
+ .addImm(CC);
UpdateValueMap(I, ResultReg);
return true;
return true;
}
+bool AArch64FastISel::FastLowerArguments() {
+ if (!FuncInfo.CanLowerReturn)
+ return false;
+
+ const Function *F = FuncInfo.Fn;
+ if (F->isVarArg())
+ return false;
+
+ CallingConv::ID CC = F->getCallingConv();
+ if (CC != CallingConv::C)
+ return false;
+
+ // Only handle simple cases like i1/i8/i16/i32/i64/f32/f64 of up to 8 GPR and
+ // FPR each.
+ unsigned GPRCnt = 0;
+ unsigned FPRCnt = 0;
+ unsigned Idx = 0;
+ for (auto const &Arg : F->args()) {
+ // The first argument is at index 1.
+ ++Idx;
+ if (F->getAttributes().hasAttribute(Idx, Attribute::ByVal) ||
+ F->getAttributes().hasAttribute(Idx, Attribute::InReg) ||
+ F->getAttributes().hasAttribute(Idx, Attribute::StructRet) ||
+ F->getAttributes().hasAttribute(Idx, Attribute::Nest))
+ return false;
+
+ Type *ArgTy = Arg.getType();
+ if (ArgTy->isStructTy() || ArgTy->isArrayTy() || ArgTy->isVectorTy())
+ return false;
+
+ EVT ArgVT = TLI.getValueType(ArgTy);
+ if (!ArgVT.isSimple()) return false;
+ switch (ArgVT.getSimpleVT().SimpleTy) {
+ default: return false;
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ case MVT::i64:
+ ++GPRCnt;
+ break;
+ case MVT::f16:
+ case MVT::f32:
+ case MVT::f64:
+ ++FPRCnt;
+ break;
+ }
+
+ if (GPRCnt > 8 || FPRCnt > 8)
+ return false;
+ }
+
+ static const MCPhysReg Registers[5][8] = {
+ { AArch64::W0, AArch64::W1, AArch64::W2, AArch64::W3, AArch64::W4,
+ AArch64::W5, AArch64::W6, AArch64::W7 },
+ { AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4,
+ AArch64::X5, AArch64::X6, AArch64::X7 },
+ { AArch64::H0, AArch64::H1, AArch64::H2, AArch64::H3, AArch64::H4,
+ AArch64::H5, AArch64::H6, AArch64::H7 },
+ { AArch64::S0, AArch64::S1, AArch64::S2, AArch64::S3, AArch64::S4,
+ AArch64::S5, AArch64::S6, AArch64::S7 },
+ { AArch64::D0, AArch64::D1, AArch64::D2, AArch64::D3, AArch64::D4,
+ AArch64::D5, AArch64::D6, AArch64::D7 }
+ };
+
+ unsigned GPRIdx = 0;
+ unsigned FPRIdx = 0;
+ for (auto const &Arg : F->args()) {
+ MVT VT = TLI.getSimpleValueType(Arg.getType());
+ unsigned SrcReg;
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Unexpected value type.");
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16: VT = MVT::i32; // fall-through
+ case MVT::i32: SrcReg = Registers[0][GPRIdx++]; break;
+ case MVT::i64: SrcReg = Registers[1][GPRIdx++]; break;
+ case MVT::f16: SrcReg = Registers[2][FPRIdx++]; break;
+ case MVT::f32: SrcReg = Registers[3][FPRIdx++]; break;
+ case MVT::f64: SrcReg = Registers[4][FPRIdx++]; break;
+ }
+
+ // Skip unused arguments.
+ if (Arg.use_empty()) {
+ UpdateValueMap(&Arg, 0);
+ continue;
+ }
+
+ const TargetRegisterClass *RC = TLI.getRegClassFor(VT);
+ unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
+ // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
+ // Without this, EmitLiveInCopies may eliminate the livein if its only
+ // use is a bitcast (which isn't turned into an instruction).
+ unsigned ResultReg = createResultReg(RC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(DstReg, getKillRegState(true));
+ UpdateValueMap(&Arg, ResultReg);
+ }
+ return true;
+}
+
bool AArch64FastISel::ProcessCallArgs(CallLoweringInfo &CLI,
SmallVectorImpl<MVT> &OutVTs,
unsigned &NumBytes) {
CallingConv::ID CC = CLI.CallConv;
SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CC, false, *FuncInfo.MF, TM, ArgLocs, *Context);
+ CCState CCInfo(CC, false, *FuncInfo.MF, ArgLocs, *Context);
CCInfo.AnalyzeCallOperands(OutVTs, CLI.OutFlags, CCAssignFnForCall(CC));
// Get a count of how many bytes are to be pushed on the stack.
} else {
assert(VA.isMemLoc() && "Assuming store on stack.");
+ // Don't emit stores for undef values.
+ if (isa<UndefValue>(ArgVal))
+ continue;
+
// Need to store on the stack.
unsigned ArgSize = (ArgVT.getSizeInBits() + 7) / 8;
Addr.setReg(AArch64::SP);
Addr.setOffset(VA.getLocMemOffset() + BEAlign);
- if (!EmitStore(ArgVT, ArgReg, Addr))
+ unsigned Alignment = DL.getABITypeAlignment(ArgVal->getType());
+ MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
+ MachinePointerInfo::getStack(Addr.getOffset()),
+ MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment);
+
+ if (!EmitStore(ArgVT, ArgReg, Addr, MMO))
return false;
}
}
// Now the return value.
if (RetVT != MVT::isVoid) {
SmallVector<CCValAssign, 16> RVLocs;
- CCState CCInfo(CC, false, *FuncInfo.MF, TM, RVLocs, *Context);
+ CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC));
// Only handle a single return value.
bool AArch64FastISel::FastLowerCall(CallLoweringInfo &CLI) {
CallingConv::ID CC = CLI.CallConv;
+ bool IsTailCall = CLI.IsTailCall;
bool IsVarArg = CLI.IsVarArg;
const Value *Callee = CLI.Callee;
const char *SymName = CLI.SymName;
- // Only handle global variable Callees.
- const GlobalValue *GV = dyn_cast<GlobalValue>(Callee);
- if (!GV)
+ // Allow SelectionDAG isel to handle tail calls.
+ if (IsTailCall)
+ return false;
+
+ CodeModel::Model CM = TM.getCodeModel();
+ // Only support the small and large code model.
+ if (CM != CodeModel::Small && CM != CodeModel::Large)
+ return false;
+
+ // FIXME: Add large code model support for ELF.
+ if (CM == CodeModel::Large && !Subtarget->isTargetMachO())
return false;
// Let SDISel handle vararg functions.
OutVTs.push_back(VT);
}
+ Address Addr;
+ if (!ComputeCallAddress(Callee, Addr))
+ return false;
+
// Handle the arguments now that we've gotten them.
unsigned NumBytes;
if (!ProcessCallArgs(CLI, OutVTs, NumBytes))
// Issue the call.
MachineInstrBuilder MIB;
- MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BL));
- CLI.Call = MIB;
- if (!SymName)
- MIB.addGlobalAddress(GV, 0, 0);
- else
- MIB.addExternalSymbol(SymName, 0);
+ if (CM == CodeModel::Small) {
+ unsigned CallOpc = Addr.getReg() ? AArch64::BLR : AArch64::BL;
+ MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CallOpc));
+ if (SymName)
+ MIB.addExternalSymbol(SymName, 0);
+ else if (Addr.getGlobalValue())
+ MIB.addGlobalAddress(Addr.getGlobalValue(), 0, 0);
+ else if (Addr.getReg())
+ MIB.addReg(Addr.getReg());
+ else
+ return false;
+ } else {
+ unsigned CallReg = 0;
+ if (SymName) {
+ unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
+ ADRPReg)
+ .addExternalSymbol(SymName, AArch64II::MO_GOT | AArch64II::MO_PAGE);
+
+ CallReg = createResultReg(&AArch64::GPR64RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::LDRXui),
+ CallReg)
+ .addReg(ADRPReg)
+ .addExternalSymbol(SymName, AArch64II::MO_GOT | AArch64II::MO_PAGEOFF |
+ AArch64II::MO_NC);
+ } else if (Addr.getGlobalValue()) {
+ CallReg = AArch64MaterializeGV(Addr.getGlobalValue());
+ } else if (Addr.getReg())
+ CallReg = Addr.getReg();
+
+ if (!CallReg)
+ return false;
+
+ MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(AArch64::BLR)).addReg(CallReg);
+ }
// Add implicit physical register uses to the call.
for (auto Reg : CLI.OutRegs)
// Proper defs for return values will be added by setPhysRegsDeadExcept().
MIB.addRegMask(TRI.getCallPreservedMask(CC));
+ CLI.Call = MIB;
+
// Finish off the call including any return values.
return FinishCall(CLI, RetVT, NumBytes);
}
MFI->setFrameAddressIsTaken(true);
const AArch64RegisterInfo *RegInfo =
- static_cast<const AArch64RegisterInfo *>(TM.getRegisterInfo());
+ static_cast<const AArch64RegisterInfo *>(
+ TM.getSubtargetImpl()->getRegisterInfo());
unsigned FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF));
unsigned SrcReg = FramePtr;
.addImm(1);
return true;
}
+ case Intrinsic::sqrt: {
+ Type *RetTy = II->getCalledFunction()->getReturnType();
+
+ MVT VT;
+ if (!isTypeLegal(RetTy, VT))
+ return false;
+
+ unsigned Op0Reg = getRegForValue(II->getOperand(0));
+ if (!Op0Reg)
+ return false;
+ bool Op0IsKill = hasTrivialKill(II->getOperand(0));
+
+ unsigned ResultReg = FastEmit_r(VT, VT, ISD::FSQRT, Op0Reg, Op0IsKill);
+ if (!ResultReg)
+ return false;
+
+ UpdateValueMap(II, ResultReg);
+ return true;
+ }
case Intrinsic::sadd_with_overflow:
case Intrinsic::uadd_with_overflow:
case Intrinsic::ssub_with_overflow:
return false;
bool LHSIsKill = hasTrivialKill(LHS);
- unsigned RHSReg = 0;
- bool RHSIsKill = false;
- bool UseImm = true;
- if (!isa<ConstantInt>(RHS)) {
- RHSReg = getRegForValue(RHS);
- if (!RHSReg)
- return false;
- RHSIsKill = hasTrivialKill(RHS);
- UseImm = false;
+ // Check if the immediate can be encoded in the instruction and if we should
+ // invert the instruction (adds -> subs) to handle negative immediates.
+ bool UseImm = false;
+ bool UseInverse = false;
+ uint64_t Imm = 0;
+ if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
+ if (C->isNegative()) {
+ UseInverse = true;
+ Imm = -(C->getSExtValue());
+ } else
+ Imm = C->getZExtValue();
+
+ if (isUInt<12>(Imm))
+ UseImm = true;
+
+ UseInverse = UseImm && UseInverse;
}
+ static const unsigned OpcTable[2][2][2] = {
+ { {AArch64::ADDSWrr, AArch64::ADDSXrr},
+ {AArch64::ADDSWri, AArch64::ADDSXri} },
+ { {AArch64::SUBSWrr, AArch64::SUBSXrr},
+ {AArch64::SUBSWri, AArch64::SUBSXri} }
+ };
unsigned Opc = 0;
unsigned MulReg = 0;
+ unsigned RHSReg = 0;
+ bool RHSIsKill = false;
AArch64CC::CondCode CC = AArch64CC::Invalid;
bool Is64Bit = VT == MVT::i64;
switch (II->getIntrinsicID()) {
default: llvm_unreachable("Unexpected intrinsic!");
case Intrinsic::sadd_with_overflow:
- if (UseImm)
- Opc = Is64Bit ? AArch64::ADDSXri : AArch64::ADDSWri;
- else
- Opc = Is64Bit ? AArch64::ADDSXrr : AArch64::ADDSWrr;
- CC = AArch64CC::VS;
- break;
+ Opc = OpcTable[UseInverse][UseImm][Is64Bit]; CC = AArch64CC::VS; break;
case Intrinsic::uadd_with_overflow:
- if (UseImm)
- Opc = Is64Bit ? AArch64::ADDSXri : AArch64::ADDSWri;
- else
- Opc = Is64Bit ? AArch64::ADDSXrr : AArch64::ADDSWrr;
- CC = AArch64CC::HS;
- break;
+ Opc = OpcTable[UseInverse][UseImm][Is64Bit]; CC = AArch64CC::HS; break;
case Intrinsic::ssub_with_overflow:
- if (UseImm)
- Opc = Is64Bit ? AArch64::SUBSXri : AArch64::SUBSWri;
- else
- Opc = Is64Bit ? AArch64::SUBSXrr : AArch64::SUBSWrr;
- CC = AArch64CC::VS;
- break;
+ Opc = OpcTable[!UseInverse][UseImm][Is64Bit]; CC = AArch64CC::VS; break;
case Intrinsic::usub_with_overflow:
- if (UseImm)
- Opc = Is64Bit ? AArch64::SUBSXri : AArch64::SUBSWri;
- else
- Opc = Is64Bit ? AArch64::SUBSXrr : AArch64::SUBSWrr;
- CC = AArch64CC::LO;
- break;
+ Opc = OpcTable[!UseInverse][UseImm][Is64Bit]; CC = AArch64CC::LO; break;
case Intrinsic::smul_with_overflow: {
CC = AArch64CC::NE;
- if (UseImm) {
- RHSReg = getRegForValue(RHS);
- if (!RHSReg)
- return false;
- RHSIsKill = hasTrivialKill(RHS);
- }
+ RHSReg = getRegForValue(RHS);
+ if (!RHSReg)
+ return false;
+ RHSIsKill = hasTrivialKill(RHS);
+
if (VT == MVT::i32) {
MulReg = Emit_SMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
unsigned ShiftReg = Emit_LSR_ri(MVT::i64, MulReg, false, 32);
}
case Intrinsic::umul_with_overflow: {
CC = AArch64CC::NE;
- if (UseImm) {
- RHSReg = getRegForValue(RHS);
- if (!RHSReg)
- return false;
- RHSIsKill = hasTrivialKill(RHS);
- }
+ RHSReg = getRegForValue(RHS);
+ if (!RHSReg)
+ return false;
+ RHSIsKill = hasTrivialKill(RHS);
+
if (VT == MVT::i32) {
MulReg = Emit_UMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
unsigned CmpReg = createResultReg(TLI.getRegClassFor(MVT::i64));
}
}
+ if (!UseImm) {
+ RHSReg = getRegForValue(RHS);
+ if (!RHSReg)
+ return false;
+ RHSIsKill = hasTrivialKill(RHS);
+ }
+
unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
if (Opc) {
MachineInstrBuilder MIB;
MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc),
ResultReg)
.addReg(LHSReg, getKillRegState(LHSIsKill));
- if (UseImm)
- MIB.addImm(cast<ConstantInt>(RHS)->getZExtValue());
- else
+ if (UseImm) {
+ MIB.addImm(Imm);
+ MIB.addImm(0);
+ } else
MIB.addReg(RHSReg, getKillRegState(RHSIsKill));
}
else
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ValLocs;
- CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, TM, ValLocs,
- I->getContext());
+ CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext());
CCAssignFn *RetCC = CC == CallingConv::WebKit_JS ? RetCC_AArch64_WebKit_JS
: RetCC_AArch64_AAPCS;
CCInfo.AnalyzeReturn(Outs, RetCC);
switch (RetVT.SimpleTy) {
default: return 0;
case MVT::i8:
+ Opc = AArch64::UBFMWri; ImmR = -Shift % 32; ImmS = 7 - Shift; break;
case MVT::i16:
+ Opc = AArch64::UBFMWri; ImmR = -Shift % 32; ImmS = 15 - Shift; break;
case MVT::i32:
- RetVT = MVT::i32;
Opc = AArch64::UBFMWri; ImmR = -Shift % 32; ImmS = 31 - Shift; break;
case MVT::i64:
Opc = AArch64::UBFMXri; ImmR = -Shift % 64; ImmS = 63 - Shift; break;
}
+ RetVT.SimpleTy = std::max(MVT::i32, RetVT.SimpleTy);
return FastEmitInst_rii(Opc, TLI.getRegClassFor(RetVT), Op0, Op0IsKill, ImmR,
ImmS);
}
unsigned Opc, ImmS;
switch (RetVT.SimpleTy) {
default: return 0;
- case MVT::i8:
- case MVT::i16:
- case MVT::i32:
- RetVT = MVT::i32;
- Opc = AArch64::UBFMWri; ImmS = 31; break;
- case MVT::i64:
- Opc = AArch64::UBFMXri; ImmS = 63; break;
+ case MVT::i8: Opc = AArch64::UBFMWri; ImmS = 7; break;
+ case MVT::i16: Opc = AArch64::UBFMWri; ImmS = 15; break;
+ case MVT::i32: Opc = AArch64::UBFMWri; ImmS = 31; break;
+ case MVT::i64: Opc = AArch64::UBFMXri; ImmS = 63; break;
}
+ RetVT.SimpleTy = std::max(MVT::i32, RetVT.SimpleTy);
return FastEmitInst_rii(Opc, TLI.getRegClassFor(RetVT), Op0, Op0IsKill, Shift,
ImmS);
}
unsigned Opc, ImmS;
switch (RetVT.SimpleTy) {
default: return 0;
- case MVT::i8:
- case MVT::i16:
- case MVT::i32:
- RetVT = MVT::i32;
- Opc = AArch64::SBFMWri; ImmS = 31; break;
- case MVT::i64:
- Opc = AArch64::SBFMXri; ImmS = 63; break;
+ case MVT::i8: Opc = AArch64::SBFMWri; ImmS = 7; break;
+ case MVT::i16: Opc = AArch64::SBFMWri; ImmS = 15; break;
+ case MVT::i32: Opc = AArch64::SBFMWri; ImmS = 31; break;
+ case MVT::i64: Opc = AArch64::SBFMXri; ImmS = 63; break;
}
+ RetVT.SimpleTy = std::max(MVT::i32, RetVT.SimpleTy);
return FastEmitInst_rii(Opc, TLI.getRegClassFor(RetVT), Op0, Op0IsKill, Shift,
ImmS);
}
MVT SrcVT = SrcEVT.getSimpleVT();
MVT DestVT = DestEVT.getSimpleVT();
- unsigned ResultReg = EmitIntExt(SrcVT, SrcReg, DestVT, isZExt);
- if (ResultReg == 0)
+ unsigned ResultReg = 0;
+
+ // Check if it is an argument and if it is already zero/sign-extended.
+ if (const auto *Arg = dyn_cast<Argument>(Src)) {
+ if ((isZExt && Arg->hasZExtAttr()) || (!isZExt && Arg->hasSExtAttr())) {
+ if (DestVT == MVT::i64) {
+ ResultReg = createResultReg(TLI.getRegClassFor(DestVT));
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(AArch64::SUBREG_TO_REG), ResultReg)
+ .addImm(0)
+ .addReg(SrcReg)
+ .addImm(AArch64::sub_32);
+ } else
+ ResultReg = SrcReg;
+ }
+ }
+
+ if (!ResultReg)
+ ResultReg = EmitIntExt(SrcVT, SrcReg, DestVT, isZExt);
+
+ if (!ResultReg)
return false;
+
UpdateValueMap(I, ResultReg);
return true;
}
return true;
}
+bool AArch64FastISel::SelectBitCast(const Instruction *I) {
+ MVT RetVT, SrcVT;
+
+ if (!isTypeLegal(I->getOperand(0)->getType(), SrcVT))
+ return false;
+ if (!isTypeLegal(I->getType(), RetVT))
+ return false;
+
+ unsigned Opc;
+ if (RetVT == MVT::f32 && SrcVT == MVT::i32)
+ Opc = AArch64::FMOVWSr;
+ else if (RetVT == MVT::f64 && SrcVT == MVT::i64)
+ Opc = AArch64::FMOVXDr;
+ else if (RetVT == MVT::i32 && SrcVT == MVT::f32)
+ Opc = AArch64::FMOVSWr;
+ else if (RetVT == MVT::i64 && SrcVT == MVT::f64)
+ Opc = AArch64::FMOVDXr;
+ else
+ return false;
+
+ unsigned Op0Reg = getRegForValue(I->getOperand(0));
+ if (!Op0Reg)
+ return false;
+ bool Op0IsKill = hasTrivialKill(I->getOperand(0));
+ unsigned ResultReg = FastEmitInst_r(Opc, TLI.getRegClassFor(RetVT),
+ Op0Reg, Op0IsKill);
+
+ if (!ResultReg)
+ return false;
+
+ UpdateValueMap(I, ResultReg);
+ return true;
+}
+
bool AArch64FastISel::TargetSelectInstruction(const Instruction *I) {
switch (I->getOpcode()) {
default:
return SelectShift(I, /*IsLeftShift=*/false, /*IsArithmetic=*/false);
case Instruction::AShr:
return SelectShift(I, /*IsLeftShift=*/false, /*IsArithmetic=*/true);
+ case Instruction::BitCast:
+ return SelectBitCast(I);
}
return false;
// Silence warnings.