bool SelectTrunc(const Instruction *I);
bool SelectIntExt(const Instruction *I);
bool SelectMul(const Instruction *I);
- bool SelectShift(const Instruction *I, bool IsLeftShift, bool IsArithmetic);
+ bool SelectShift(const Instruction *I);
bool SelectBitCast(const Instruction *I);
// Utility helper routines.
bool isTypeLegal(Type *Ty, MVT &VT);
bool isLoadStoreTypeLegal(Type *Ty, MVT &VT);
+ bool isValueAvailable(const Value *V) const;
bool ComputeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr);
bool ComputeCallAddress(const Value *V, Address &Addr);
bool SimplifyAddress(Address &Addr, MVT VT);
unsigned emitAddsSubs_ri(bool UseAdds, MVT RetVT, unsigned LHSReg,
bool LHSIsKill, uint64_t Imm,
bool WantResult = true);
+ unsigned emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
+ bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
+ AArch64_AM::ShiftExtendType ShiftType,
+ uint64_t ShiftImm, bool WantResult = true);
unsigned emitAddsSubs_rs(bool UseAdds, MVT RetVT, unsigned LHSReg,
bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
AArch64_AM::ShiftExtendType ShiftType,
uint64_t ShiftImm, bool WantResult = true);
+ unsigned emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
+ bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
+ AArch64_AM::ShiftExtendType ExtType,
+ uint64_t ShiftImm, bool WantResult = true);
+
unsigned emitAddsSubs_rx(bool UseAdds, MVT RetVT, unsigned LHSReg,
bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
AArch64_AM::ShiftExtendType ExtType,
unsigned Op1, bool Op1IsKill);
unsigned Emit_UMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
unsigned Op1, bool Op1IsKill);
- unsigned Emit_LSL_ri(MVT RetVT, unsigned Op0, bool Op0IsKill, uint64_t Imm);
- unsigned Emit_LSR_ri(MVT RetVT, unsigned Op0, bool Op0IsKill, uint64_t Imm);
- unsigned Emit_ASR_ri(MVT RetVT, unsigned Op0, bool Op0IsKill, uint64_t Imm);
+ unsigned emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
+ unsigned Op1Reg, bool Op1IsKill);
+ unsigned emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
+ uint64_t Imm, bool IsZExt = true);
+ unsigned emitLSR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
+ unsigned Op1Reg, bool Op1IsKill);
+ unsigned emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
+ uint64_t Imm, bool IsZExt = true);
+ unsigned emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
+ unsigned Op1Reg, bool Op1IsKill);
+ unsigned emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
+ uint64_t Imm, bool IsZExt = false);
unsigned AArch64MaterializeInt(const ConstantInt *CI, MVT VT);
unsigned AArch64MaterializeFP(const ConstantFP *CFP, MVT VT);
// Backend specific FastISel code.
unsigned TargetMaterializeAlloca(const AllocaInst *AI) override;
unsigned TargetMaterializeConstant(const Constant *C) override;
+ unsigned TargetMaterializeFloatZero(const ConstantFP* CF) override;
explicit AArch64FastISel(FunctionLoweringInfo &funcInfo,
const TargetLibraryInfo *libInfo)
}
unsigned AArch64FastISel::AArch64MaterializeFP(const ConstantFP *CFP, MVT VT) {
+ // Positive zero (+0.0) has to be materialized with a fmov from the zero
+ // register, because the immediate version of fmov cannot encode zero.
+ if (CFP->isNullValue())
+ return TargetMaterializeFloatZero(CFP);
+
if (VT != MVT::f32 && VT != MVT::f64)
return 0;
const APFloat Val = CFP->getValueAPF();
bool Is64Bit = (VT == MVT::f64);
-
// This checks to see if we can use FMOV instructions to materialize
// a constant, otherwise we have to materialize via the constant pool.
if (TLI.isFPImmLegal(Val, VT)) {
- unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
- // Positive zero (+0.0) has to be materialized with a fmov from the zero
- // register, because the immediate version of fmov cannot encode zero.
- if (Val.isPosZero()) {
- unsigned ZReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
- unsigned Opc = Is64Bit ? AArch64::FMOVXDr : AArch64::FMOVWSr;
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
- .addReg(ZReg, getKillRegState(true));
- return ResultReg;
- }
- int Imm = Is64Bit ? AArch64_AM::getFP64Imm(Val)
- : AArch64_AM::getFP32Imm(Val);
+ int Imm =
+ Is64Bit ? AArch64_AM::getFP64Imm(Val) : AArch64_AM::getFP32Imm(Val);
+ assert((Imm != -1) && "Cannot encode floating-point constant.");
unsigned Opc = Is64Bit ? AArch64::FMOVDi : AArch64::FMOVSi;
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
- .addImm(Imm);
- return ResultReg;
+ return FastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm);
}
// Materialize via constant pool. MachineConstantPool wants an explicit
unsigned CPI = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align);
unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
- ADRPReg)
- .addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGE);
+ ADRPReg).addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGE);
unsigned Opc = Is64Bit ? AArch64::LDRDui : AArch64::LDRSui;
unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
- .addReg(ADRPReg)
- .addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
+ .addReg(ADRPReg)
+ .addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
return ResultReg;
}
return 0;
}
+unsigned AArch64FastISel::TargetMaterializeFloatZero(const ConstantFP* CFP) {
+ assert(CFP->isNullValue() &&
+ "Floating-point constant is not a positive zero.");
+ MVT VT;
+ if (!isTypeLegal(CFP->getType(), VT))
+ return 0;
+
+ if (VT != MVT::f32 && VT != MVT::f64)
+ return 0;
+
+ bool Is64Bit = (VT == MVT::f64);
+ unsigned ZReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
+ unsigned Opc = Is64Bit ? AArch64::FMOVXDr : AArch64::FMOVWSr;
+ return FastEmitInst_r(Opc, TLI.getRegClassFor(VT), ZReg, /*IsKill=*/true);
+}
+
// Computes the address to get to an object.
bool AArch64FastISel::ComputeAddress(const Value *Obj, Address &Addr, Type *Ty)
{
return false;
}
+bool AArch64FastISel::isValueAvailable(const Value *V) const {
+ if (!isa<Instruction>(V))
+ return true;
+
+ const auto *I = cast<Instruction>(V);
+ if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB)
+ return true;
+
+ return false;
+}
+
bool AArch64FastISel::SimplifyAddress(Address &Addr, MVT VT) {
unsigned ScaleFactor;
switch (VT.SimpleTy) {
Addr.getOffsetReg())
RegisterOffsetNeedsLowering = true;
+ // Cannot encode zero register as base.
+ if (Addr.isRegBase() && Addr.getOffsetReg() && !Addr.getReg())
+ RegisterOffsetNeedsLowering = true;
+
// If this is a stack pointer and the offset needs to be simplified then put
// the alloca address into a register, set the base type back to register and
// continue. This should almost never happen.
if (RegisterOffsetNeedsLowering) {
unsigned ResultReg = 0;
- if (Addr.getReg())
- ResultReg = FastEmitInst_rri(AArch64::ADDXrs, &AArch64::GPR64RegClass,
- Addr.getReg(), /*TODO:IsKill=*/false,
- Addr.getOffsetReg(), /*TODO:IsKill=*/false,
- Addr.getShift());
- else
- ResultReg = Emit_LSL_ri(MVT::i64, Addr.getOffsetReg(),
- /*Op0IsKill=*/false, Addr.getShift());
+ if (Addr.getReg()) {
+ if (Addr.getExtendType() == AArch64_AM::SXTW ||
+ Addr.getExtendType() == AArch64_AM::UXTW )
+ ResultReg = emitAddSub_rx(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
+ /*TODO:IsKill=*/false, Addr.getOffsetReg(),
+ /*TODO:IsKill=*/false, Addr.getExtendType(),
+ Addr.getShift());
+ else
+ ResultReg = emitAddSub_rs(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
+ /*TODO:IsKill=*/false, Addr.getOffsetReg(),
+ /*TODO:IsKill=*/false, AArch64_AM::LSL,
+ Addr.getShift());
+ } else {
+ if (Addr.getExtendType() == AArch64_AM::UXTW)
+ ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
+ /*Op0IsKill=*/false, Addr.getShift(),
+ /*IsZExt=*/true);
+ else if (Addr.getExtendType() == AArch64_AM::SXTW)
+ ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
+ /*Op0IsKill=*/false, Addr.getShift(),
+ /*IsZExt=*/false);
+ else
+ ResultReg = emitLSL_ri(MVT::i64, MVT::i64, Addr.getOffsetReg(),
+ /*Op0IsKill=*/false, Addr.getShift());
+ }
if (!ResultReg)
return false;
Addr.setReg(ResultReg);
Addr.setOffsetReg(0);
Addr.setShift(0);
+ Addr.setExtendType(AArch64_AM::InvalidShiftExtend);
}
// Since the offset is too large for the load/store instruction get the
std::swap(LHS, RHS);
// Canonicalize shift immediate to the RHS.
- if (UseAdds)
+ if (UseAdds && isValueAvailable(LHS))
if (const auto *SI = dyn_cast<BinaryOperator>(LHS))
if (isa<ConstantInt>(SI->getOperand(1)))
if (SI->getOpcode() == Instruction::Shl ||
return ResultReg;
// Only extend the RHS within the instruction if there is a valid extend type.
- if (ExtendType != AArch64_AM::InvalidShiftExtend) {
+ if (ExtendType != AArch64_AM::InvalidShiftExtend && isValueAvailable(RHS)) {
if (const auto *SI = dyn_cast<BinaryOperator>(RHS))
if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1)))
if ((SI->getOpcode() == Instruction::Shl) && (C->getZExtValue() < 4)) {
}
// Check if the shift can be folded into the instruction.
- if (const auto *SI = dyn_cast<BinaryOperator>(RHS)) {
- if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
- AArch64_AM::ShiftExtendType ShiftType = AArch64_AM::InvalidShiftExtend;
- switch (SI->getOpcode()) {
- default: break;
- case Instruction::Shl: ShiftType = AArch64_AM::LSL; break;
- case Instruction::LShr: ShiftType = AArch64_AM::LSR; break;
- case Instruction::AShr: ShiftType = AArch64_AM::ASR; break;
- }
- uint64_t ShiftVal = C->getZExtValue();
- if (ShiftType != AArch64_AM::InvalidShiftExtend) {
- unsigned RHSReg = getRegForValue(SI->getOperand(0));
- if (!RHSReg)
- return 0;
- bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
- return emitAddsSubs_rs(UseAdds, RetVT, LHSReg, LHSIsKill, RHSReg,
- RHSIsKill, ShiftType, ShiftVal, WantResult);
+ if (isValueAvailable(RHS))
+ if (const auto *SI = dyn_cast<BinaryOperator>(RHS)) {
+ if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
+ AArch64_AM::ShiftExtendType ShiftType = AArch64_AM::InvalidShiftExtend;
+ switch (SI->getOpcode()) {
+ default: break;
+ case Instruction::Shl: ShiftType = AArch64_AM::LSL; break;
+ case Instruction::LShr: ShiftType = AArch64_AM::LSR; break;
+ case Instruction::AShr: ShiftType = AArch64_AM::ASR; break;
+ }
+ uint64_t ShiftVal = C->getZExtValue();
+ if (ShiftType != AArch64_AM::InvalidShiftExtend) {
+ unsigned RHSReg = getRegForValue(SI->getOperand(0));
+ if (!RHSReg)
+ return 0;
+ bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
+ return emitAddsSubs_rs(UseAdds, RetVT, LHSReg, LHSIsKill, RHSReg,
+ RHSIsKill, ShiftType, ShiftVal, WantResult);
+ }
}
}
- }
unsigned RHSReg = getRegForValue(RHS);
if (!RHSReg)
return ResultReg;
}
+unsigned AArch64FastISel::emitAddSub_rs(bool UseAdd, MVT RetVT,
+ unsigned LHSReg, bool LHSIsKill,
+ unsigned RHSReg, bool RHSIsKill,
+ AArch64_AM::ShiftExtendType ShiftType,
+ uint64_t ShiftImm, bool WantResult) {
+ assert(LHSReg && RHSReg && "Invalid register number.");
+
+ if (RetVT != MVT::i32 && RetVT != MVT::i64)
+ return 0;
+
+ static const unsigned OpcTable[2][2] = {
+ { AArch64::ADDWrs, AArch64::ADDXrs },
+ { AArch64::SUBWrs, AArch64::SUBXrs }
+ };
+ unsigned Opc = OpcTable[!UseAdd][(RetVT == MVT::i64)];
+ unsigned ResultReg;
+ if (WantResult) {
+ const TargetRegisterClass *RC =
+ (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
+ ResultReg = createResultReg(RC);
+ } else
+ ResultReg = (RetVT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
+
+ const MCInstrDesc &II = TII.get(Opc);
+ LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
+ RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
+ .addReg(LHSReg, getKillRegState(LHSIsKill))
+ .addReg(RHSReg, getKillRegState(RHSIsKill))
+ .addImm(getShifterImm(ShiftType, ShiftImm));
+
+ return ResultReg;
+}
+
unsigned AArch64FastISel::emitAddsSubs_rs(bool UseAdds, MVT RetVT,
unsigned LHSReg, bool LHSIsKill,
unsigned RHSReg, bool RHSIsKill,
return ResultReg;
}
+unsigned AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT,
+ unsigned LHSReg, bool LHSIsKill,
+ unsigned RHSReg, bool RHSIsKill,
+ AArch64_AM::ShiftExtendType ExtType,
+ uint64_t ShiftImm, bool WantResult) {
+ assert(LHSReg && RHSReg && "Invalid register number.");
+
+ if (RetVT != MVT::i32 && RetVT != MVT::i64)
+ return 0;
+
+ static const unsigned OpcTable[2][2] = {
+ { AArch64::ADDWrx, AArch64::ADDXrx },
+ { AArch64::SUBWrx, AArch64::SUBXrx }
+ };
+ unsigned Opc = OpcTable[!UseAdd][(RetVT == MVT::i64)];
+ unsigned ResultReg;
+ if (WantResult) {
+ const TargetRegisterClass *RC =
+ (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
+ ResultReg = createResultReg(RC);
+ } else
+ ResultReg = (RetVT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
+
+ const MCInstrDesc &II = TII.get(Opc);
+ LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
+ RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
+ .addReg(LHSReg, getKillRegState(LHSIsKill))
+ .addReg(RHSReg, getKillRegState(RHSIsKill))
+ .addImm(getArithExtendImm(ExtType, ShiftImm));
+
+ return ResultReg;
+}
+
unsigned AArch64FastISel::emitAddsSubs_rx(bool UseAdds, MVT RetVT,
unsigned LHSReg, bool LHSIsKill,
unsigned RHSReg, bool RHSIsKill,
}
// Storing an i1 requires special handling.
- if (VTIsi1) {
+ if (VTIsi1 && SrcReg != AArch64::WZR) {
unsigned ANDReg = emitAND_ri(MVT::i32, SrcReg, /*TODO:IsKill=*/false, 1);
assert(ANDReg && "Unexpected AND instruction emission failure.");
SrcReg = ANDReg;
bool AArch64FastISel::SelectStore(const Instruction *I) {
MVT VT;
- Value *Op0 = I->getOperand(0);
+ const Value *Op0 = I->getOperand(0);
// Verify we have a legal type before going any further. Currently, we handle
// simple types that will directly fit in a register (i32/f32/i64/f64) or
// those that can be sign or zero-extended to a basic operation (i1/i8/i16).
cast<StoreInst>(I)->isAtomic())
return false;
- // Get the value to be stored into a register.
- unsigned SrcReg = getRegForValue(Op0);
- if (SrcReg == 0)
+ // Get the value to be stored into a register. Use the zero register directly
+ // when possible to avoid an unnecessary copy and a wasted register.
+ unsigned SrcReg = 0;
+ if (const auto *CI = dyn_cast<ConstantInt>(Op0)) {
+ if (CI->isZero())
+ SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
+ } else if (const auto *CF = dyn_cast<ConstantFP>(Op0)) {
+ if (CF->isZero() && !CF->isNegative()) {
+ VT = MVT::getIntegerVT(VT.getSizeInBits());
+ SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
+ }
+ }
+
+ if (!SrcReg)
+ SrcReg = getRegForValue(Op0);
+
+ if (!SrcReg)
return false;
// See if we can handle this address.
if (VT == MVT::i32) {
MulReg = Emit_SMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
- unsigned ShiftReg = Emit_LSR_ri(MVT::i64, MulReg, false, 32);
+ unsigned ShiftReg = emitLSR_ri(MVT::i64, MVT::i64, MulReg,
+ /*IsKill=*/false, 32);
MulReg = FastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true,
AArch64::sub_32);
ShiftReg = FastEmitInst_extractsubreg(VT, ShiftReg, /*IsKill=*/true,
bool SrcIsKill = hasTrivialKill(Op);
// If we're truncating from i64 to a smaller non-legal type then generate an
- // AND. Otherwise, we know the high bits are undefined and a truncate doesn't
- // generate any code.
+ // AND. Otherwise, we know the high bits are undefined and a truncate only
+ // generate a COPY. We cannot mark the source register also as result
+ // register, because this can incorrectly transfer the kill flag onto the
+ // source register.
+ unsigned ResultReg;
if (SrcVT == MVT::i64) {
uint64_t Mask = 0;
switch (DestVT.SimpleTy) {
unsigned Reg32 = FastEmitInst_extractsubreg(MVT::i32, SrcReg, SrcIsKill,
AArch64::sub_32);
// Create the AND instruction which performs the actual truncation.
- unsigned ANDReg = emitAND_ri(MVT::i32, Reg32, /*IsKill=*/true, Mask);
- assert(ANDReg && "Unexpected AND instruction emission failure.");
- SrcReg = ANDReg;
+ ResultReg = emitAND_ri(MVT::i32, Reg32, /*IsKill=*/true, Mask);
+ assert(ResultReg && "Unexpected AND instruction emission failure.");
+ } else {
+ ResultReg = createResultReg(&AArch64::GPR32RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(SrcReg, getKillRegState(SrcIsKill));
}
- UpdateValueMap(I, SrcReg);
+ UpdateValueMap(I, ResultReg);
return true;
}
AArch64::XZR, /*IsKill=*/true);
}
-unsigned AArch64FastISel::Emit_LSL_ri(MVT RetVT, unsigned Op0, bool Op0IsKill,
- uint64_t Shift) {
- unsigned Opc, ImmR, ImmS;
+unsigned AArch64FastISel::emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
+ unsigned Op1Reg, bool Op1IsKill) {
+ unsigned Opc = 0;
+ bool NeedTrunc = false;
+ uint64_t Mask = 0;
switch (RetVT.SimpleTy) {
default: return 0;
- case MVT::i8:
- Opc = AArch64::UBFMWri; ImmR = -Shift % 32; ImmS = 7 - Shift; break;
- case MVT::i16:
- Opc = AArch64::UBFMWri; ImmR = -Shift % 32; ImmS = 15 - Shift; break;
- case MVT::i32:
- Opc = AArch64::UBFMWri; ImmR = -Shift % 32; ImmS = 31 - Shift; break;
- case MVT::i64:
- Opc = AArch64::UBFMXri; ImmR = -Shift % 64; ImmS = 63 - Shift; break;
+ case MVT::i8: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xff; break;
+ case MVT::i16: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xffff; break;
+ case MVT::i32: Opc = AArch64::LSLVWr; break;
+ case MVT::i64: Opc = AArch64::LSLVXr; break;
}
const TargetRegisterClass *RC =
(RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
+ if (NeedTrunc) {
+ Op1Reg = emitAND_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
+ Op1IsKill = true;
+ }
+ unsigned ResultReg = FastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
+ Op1IsKill);
+ if (NeedTrunc)
+ ResultReg = emitAND_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
+ return ResultReg;
+}
+
+unsigned AArch64FastISel::emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
+ bool Op0IsKill, uint64_t Shift,
+ bool IsZext) {
+ assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
+ "Unexpected source/return type pair.");
+ assert((SrcVT == MVT::i8 || SrcVT == MVT::i16 || SrcVT == MVT::i32 ||
+ SrcVT == MVT::i64) && "Unexpected source value type.");
+ assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
+ RetVT == MVT::i64) && "Unexpected return value type.");
+
+ bool Is64Bit = (RetVT == MVT::i64);
+ unsigned RegSize = Is64Bit ? 64 : 32;
+ unsigned DstBits = RetVT.getSizeInBits();
+ unsigned SrcBits = SrcVT.getSizeInBits();
+
+ // Don't deal with undefined shifts.
+ if (Shift >= DstBits)
+ return 0;
+
+ // For immediate shifts we can fold the zero-/sign-extension into the shift.
+ // {S|U}BFM Wd, Wn, #r, #s
+ // Wd<32+s-r,32-r> = Wn<s:0> when r > s
+
+ // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
+ // %2 = shl i16 %1, 4
+ // Wd<32+7-28,32-28> = Wn<7:0> <- clamp s to 7
+ // 0b1111_1111_1111_1111__1111_1010_1010_0000 sext
+ // 0b0000_0000_0000_0000__0000_0101_0101_0000 sext | zext
+ // 0b0000_0000_0000_0000__0000_1010_1010_0000 zext
+
+ // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
+ // %2 = shl i16 %1, 8
+ // Wd<32+7-24,32-24> = Wn<7:0>
+ // 0b1111_1111_1111_1111__1010_1010_0000_0000 sext
+ // 0b0000_0000_0000_0000__0101_0101_0000_0000 sext | zext
+ // 0b0000_0000_0000_0000__1010_1010_0000_0000 zext
+
+ // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
+ // %2 = shl i16 %1, 12
+ // Wd<32+3-20,32-20> = Wn<3:0>
+ // 0b1111_1111_1111_1111__1010_0000_0000_0000 sext
+ // 0b0000_0000_0000_0000__0101_0000_0000_0000 sext | zext
+ // 0b0000_0000_0000_0000__1010_0000_0000_0000 zext
+
+ unsigned ImmR = RegSize - Shift;
+ // Limit the width to the length of the source type.
+ unsigned ImmS = std::min<unsigned>(SrcBits - 1, DstBits - 1 - Shift);
+ static const unsigned OpcTable[2][2] = {
+ {AArch64::SBFMWri, AArch64::SBFMXri},
+ {AArch64::UBFMWri, AArch64::UBFMXri}
+ };
+ unsigned Opc = OpcTable[IsZext][Is64Bit];
+ const TargetRegisterClass *RC =
+ Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
+ if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
+ unsigned TmpReg = MRI.createVirtualRegister(RC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(AArch64::SUBREG_TO_REG), TmpReg)
+ .addImm(0)
+ .addReg(Op0, getKillRegState(Op0IsKill))
+ .addImm(AArch64::sub_32);
+ Op0 = TmpReg;
+ Op0IsKill = true;
+ }
return FastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
}
-unsigned AArch64FastISel::Emit_LSR_ri(MVT RetVT, unsigned Op0, bool Op0IsKill,
- uint64_t Shift) {
- unsigned Opc, ImmS;
+unsigned AArch64FastISel::emitLSR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
+ unsigned Op1Reg, bool Op1IsKill) {
+ unsigned Opc = 0;
+ bool NeedTrunc = false;
+ uint64_t Mask = 0;
switch (RetVT.SimpleTy) {
default: return 0;
- case MVT::i8: Opc = AArch64::UBFMWri; ImmS = 7; break;
- case MVT::i16: Opc = AArch64::UBFMWri; ImmS = 15; break;
- case MVT::i32: Opc = AArch64::UBFMWri; ImmS = 31; break;
- case MVT::i64: Opc = AArch64::UBFMXri; ImmS = 63; break;
+ case MVT::i8: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xff; break;
+ case MVT::i16: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xffff; break;
+ case MVT::i32: Opc = AArch64::LSRVWr; break;
+ case MVT::i64: Opc = AArch64::LSRVXr; break;
}
const TargetRegisterClass *RC =
(RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
- return FastEmitInst_rii(Opc, RC, Op0, Op0IsKill, Shift, ImmS);
+ if (NeedTrunc) {
+ Op0Reg = emitAND_ri(MVT::i32, Op0Reg, Op0IsKill, Mask);
+ Op1Reg = emitAND_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
+ Op0IsKill = Op1IsKill = true;
+ }
+ unsigned ResultReg = FastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
+ Op1IsKill);
+ if (NeedTrunc)
+ ResultReg = emitAND_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
+ return ResultReg;
}
-unsigned AArch64FastISel::Emit_ASR_ri(MVT RetVT, unsigned Op0, bool Op0IsKill,
- uint64_t Shift) {
- unsigned Opc, ImmS;
+unsigned AArch64FastISel::emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
+ bool Op0IsKill, uint64_t Shift,
+ bool IsZExt) {
+ assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
+ "Unexpected source/return type pair.");
+ assert((SrcVT == MVT::i8 || SrcVT == MVT::i16 || SrcVT == MVT::i32 ||
+ SrcVT == MVT::i64) && "Unexpected source value type.");
+ assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
+ RetVT == MVT::i64) && "Unexpected return value type.");
+
+ bool Is64Bit = (RetVT == MVT::i64);
+ unsigned RegSize = Is64Bit ? 64 : 32;
+ unsigned DstBits = RetVT.getSizeInBits();
+ unsigned SrcBits = SrcVT.getSizeInBits();
+
+ // Don't deal with undefined shifts.
+ if (Shift >= DstBits)
+ return 0;
+
+ // For immediate shifts we can fold the zero-/sign-extension into the shift.
+ // {S|U}BFM Wd, Wn, #r, #s
+ // Wd<s-r:0> = Wn<s:r> when r <= s
+
+ // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
+ // %2 = lshr i16 %1, 4
+ // Wd<7-4:0> = Wn<7:4>
+ // 0b0000_0000_0000_0000__0000_1111_1111_1010 sext
+ // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
+ // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
+
+ // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
+ // %2 = lshr i16 %1, 8
+ // Wd<7-7,0> = Wn<7:7>
+ // 0b0000_0000_0000_0000__0000_0000_1111_1111 sext
+ // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
+ // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
+
+ // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
+ // %2 = lshr i16 %1, 12
+ // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
+ // 0b0000_0000_0000_0000__0000_0000_0000_1111 sext
+ // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
+ // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
+
+ if (Shift >= SrcBits && IsZExt)
+ return AArch64MaterializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)),
+ RetVT);
+
+ // It is not possible to fold a sign-extend into the LShr instruction. In this
+ // case emit a sign-extend.
+ if (!IsZExt) {
+ Op0 = EmitIntExt(SrcVT, Op0, RetVT, IsZExt);
+ if (!Op0)
+ return 0;
+ Op0IsKill = true;
+ SrcVT = RetVT;
+ SrcBits = SrcVT.getSizeInBits();
+ IsZExt = true;
+ }
+
+ unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
+ unsigned ImmS = SrcBits - 1;
+ static const unsigned OpcTable[2][2] = {
+ {AArch64::SBFMWri, AArch64::SBFMXri},
+ {AArch64::UBFMWri, AArch64::UBFMXri}
+ };
+ unsigned Opc = OpcTable[IsZExt][Is64Bit];
+ const TargetRegisterClass *RC =
+ Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
+ if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
+ unsigned TmpReg = MRI.createVirtualRegister(RC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(AArch64::SUBREG_TO_REG), TmpReg)
+ .addImm(0)
+ .addReg(Op0, getKillRegState(Op0IsKill))
+ .addImm(AArch64::sub_32);
+ Op0 = TmpReg;
+ Op0IsKill = true;
+ }
+ return FastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
+}
+
+unsigned AArch64FastISel::emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
+ unsigned Op1Reg, bool Op1IsKill) {
+ unsigned Opc = 0;
+ bool NeedTrunc = false;
+ uint64_t Mask = 0;
switch (RetVT.SimpleTy) {
default: return 0;
- case MVT::i8: Opc = AArch64::SBFMWri; ImmS = 7; break;
- case MVT::i16: Opc = AArch64::SBFMWri; ImmS = 15; break;
- case MVT::i32: Opc = AArch64::SBFMWri; ImmS = 31; break;
- case MVT::i64: Opc = AArch64::SBFMXri; ImmS = 63; break;
+ case MVT::i8: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xff; break;
+ case MVT::i16: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xffff; break;
+ case MVT::i32: Opc = AArch64::ASRVWr; break;
+ case MVT::i64: Opc = AArch64::ASRVXr; break;
}
const TargetRegisterClass *RC =
(RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
- return FastEmitInst_rii(Opc, RC, Op0, Op0IsKill, Shift, ImmS);
+ if (NeedTrunc) {
+ Op0Reg = EmitIntExt(RetVT, Op0Reg, MVT::i32, /*IsZExt=*/false);
+ Op1Reg = emitAND_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
+ Op0IsKill = Op1IsKill = true;
+ }
+ unsigned ResultReg = FastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
+ Op1IsKill);
+ if (NeedTrunc)
+ ResultReg = emitAND_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
+ return ResultReg;
+}
+
+unsigned AArch64FastISel::emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
+ bool Op0IsKill, uint64_t Shift,
+ bool IsZExt) {
+ assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
+ "Unexpected source/return type pair.");
+ assert((SrcVT == MVT::i8 || SrcVT == MVT::i16 || SrcVT == MVT::i32 ||
+ SrcVT == MVT::i64) && "Unexpected source value type.");
+ assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
+ RetVT == MVT::i64) && "Unexpected return value type.");
+
+ bool Is64Bit = (RetVT == MVT::i64);
+ unsigned RegSize = Is64Bit ? 64 : 32;
+ unsigned DstBits = RetVT.getSizeInBits();
+ unsigned SrcBits = SrcVT.getSizeInBits();
+
+ // Don't deal with undefined shifts.
+ if (Shift >= DstBits)
+ return 0;
+
+ // For immediate shifts we can fold the zero-/sign-extension into the shift.
+ // {S|U}BFM Wd, Wn, #r, #s
+ // Wd<s-r:0> = Wn<s:r> when r <= s
+
+ // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
+ // %2 = ashr i16 %1, 4
+ // Wd<7-4:0> = Wn<7:4>
+ // 0b1111_1111_1111_1111__1111_1111_1111_1010 sext
+ // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
+ // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
+
+ // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
+ // %2 = ashr i16 %1, 8
+ // Wd<7-7,0> = Wn<7:7>
+ // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
+ // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
+ // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
+
+ // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
+ // %2 = ashr i16 %1, 12
+ // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
+ // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
+ // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
+ // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
+
+ if (Shift >= SrcBits && IsZExt)
+ return AArch64MaterializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)),
+ RetVT);
+
+ unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
+ unsigned ImmS = SrcBits - 1;
+ static const unsigned OpcTable[2][2] = {
+ {AArch64::SBFMWri, AArch64::SBFMXri},
+ {AArch64::UBFMWri, AArch64::UBFMXri}
+ };
+ unsigned Opc = OpcTable[IsZExt][Is64Bit];
+ const TargetRegisterClass *RC =
+ Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
+ if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
+ unsigned TmpReg = MRI.createVirtualRegister(RC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(AArch64::SUBREG_TO_REG), TmpReg)
+ .addImm(0)
+ .addReg(Op0, getKillRegState(Op0IsKill))
+ .addImm(AArch64::sub_32);
+ Op0 = TmpReg;
+ Op0IsKill = true;
+ }
+ return FastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
}
unsigned AArch64FastISel::EmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
return true;
}
-bool AArch64FastISel::SelectShift(const Instruction *I, bool IsLeftShift,
- bool IsArithmetic) {
- EVT RetEVT = TLI.getValueType(I->getType(), true);
- if (!RetEVT.isSimple())
+bool AArch64FastISel::SelectShift(const Instruction *I) {
+ MVT RetVT;
+ if (!isLoadStoreTypeLegal(I->getType(), RetVT))
return false;
- MVT RetVT = RetEVT.getSimpleVT();
- if (!isa<ConstantInt>(I->getOperand(1)))
- return false;
+ if (const auto *C = dyn_cast<ConstantInt>(I->getOperand(1))) {
+ unsigned ResultReg = 0;
+ uint64_t ShiftVal = C->getZExtValue();
+ MVT SrcVT = RetVT;
+ bool IsZExt = (I->getOpcode() == Instruction::AShr) ? false : true;
+ const Value *Op0 = I->getOperand(0);
+ if (const auto *ZExt = dyn_cast<ZExtInst>(Op0)) {
+ MVT TmpVT;
+ if (isValueAvailable(ZExt) &&
+ isLoadStoreTypeLegal(ZExt->getSrcTy(), TmpVT)) {
+ SrcVT = TmpVT;
+ IsZExt = true;
+ Op0 = ZExt->getOperand(0);
+ }
+ } else if (const auto *SExt = dyn_cast<SExtInst>(Op0)) {
+ MVT TmpVT;
+ if (isValueAvailable(SExt) &&
+ isLoadStoreTypeLegal(SExt->getSrcTy(), TmpVT)) {
+ SrcVT = TmpVT;
+ IsZExt = false;
+ Op0 = SExt->getOperand(0);
+ }
+ }
+
+ unsigned Op0Reg = getRegForValue(Op0);
+ if (!Op0Reg)
+ return false;
+ bool Op0IsKill = hasTrivialKill(Op0);
+
+ switch (I->getOpcode()) {
+ default: llvm_unreachable("Unexpected instruction.");
+ case Instruction::Shl:
+ ResultReg = emitLSL_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
+ break;
+ case Instruction::AShr:
+ ResultReg = emitASR_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
+ break;
+ case Instruction::LShr:
+ ResultReg = emitLSR_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
+ break;
+ }
+ if (!ResultReg)
+ return false;
+
+ UpdateValueMap(I, ResultReg);
+ return true;
+ }
unsigned Op0Reg = getRegForValue(I->getOperand(0));
if (!Op0Reg)
return false;
bool Op0IsKill = hasTrivialKill(I->getOperand(0));
- uint64_t ShiftVal = cast<ConstantInt>(I->getOperand(1))->getZExtValue();
+ unsigned Op1Reg = getRegForValue(I->getOperand(1));
+ if (!Op1Reg)
+ return false;
+ bool Op1IsKill = hasTrivialKill(I->getOperand(1));
- unsigned ResultReg;
- if (IsLeftShift)
- ResultReg = Emit_LSL_ri(RetVT, Op0Reg, Op0IsKill, ShiftVal);
- else {
- if (IsArithmetic)
- ResultReg = Emit_ASR_ri(RetVT, Op0Reg, Op0IsKill, ShiftVal);
- else
- ResultReg = Emit_LSR_ri(RetVT, Op0Reg, Op0IsKill, ShiftVal);
+ unsigned ResultReg = 0;
+ switch (I->getOpcode()) {
+ default: llvm_unreachable("Unexpected instruction.");
+ case Instruction::Shl:
+ ResultReg = emitLSL_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
+ break;
+ case Instruction::AShr:
+ ResultReg = emitASR_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
+ break;
+ case Instruction::LShr:
+ ResultReg = emitLSR_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
+ break;
}
if (!ResultReg)
// selector -> improve FastISel tblgen.
case Instruction::Mul:
return SelectMul(I);
- case Instruction::Shl:
- return SelectShift(I, /*IsLeftShift=*/true, /*IsArithmetic=*/false);
- case Instruction::LShr:
- return SelectShift(I, /*IsLeftShift=*/false, /*IsArithmetic=*/false);
+ case Instruction::Shl: // fall-through
+ case Instruction::LShr: // fall-through
case Instruction::AShr:
- return SelectShift(I, /*IsLeftShift=*/false, /*IsArithmetic=*/true);
+ return SelectShift(I);
case Instruction::BitCast:
return SelectBitCast(I);
}