X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTarget%2FAArch64%2FAArch64FastISel.cpp;h=c3f6859f510d6840396ef2564bd0eaf782793323;hb=515cc265c96317bb4275939a90a3d723f10e7a23;hp=1e4499b8d53ae9031f53ce58e44fb062c5946992;hpb=d3a04223e84797a1432f10d7a153da6c258017a9;p=oota-llvm.git diff --git a/lib/Target/AArch64/AArch64FastISel.cpp b/lib/Target/AArch64/AArch64FastISel.cpp index 1e4499b8d53..c3f6859f510 100644 --- a/lib/Target/AArch64/AArch64FastISel.cpp +++ b/lib/Target/AArch64/AArch64FastISel.cpp @@ -14,6 +14,7 @@ //===----------------------------------------------------------------------===// #include "AArch64.h" +#include "AArch64CallingConvention.h" #include "AArch64Subtarget.h" #include "AArch64TargetMachine.h" #include "MCTargetDesc/AArch64AddressingModes.h" @@ -150,6 +151,8 @@ private: bool foldXALUIntrinsic(AArch64CC::CondCode &CC, const Instruction *I, const Value *Cond); bool optimizeIntExtLoad(const Instruction *I, MVT RetVT, MVT SrcVT); + bool optimizeSelect(const SelectInst *SI); + std::pair getRegForGEPIndex(const Value *Idx); // Emit helper routines. unsigned emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS, @@ -242,9 +245,10 @@ public: unsigned fastMaterializeFloatZero(const ConstantFP* CF) override; explicit AArch64FastISel(FunctionLoweringInfo &FuncInfo, - const TargetLibraryInfo *LibInfo) + const TargetLibraryInfo *LibInfo) : FastISel(FuncInfo, LibInfo, /*SkipTargetIndependentISel=*/true) { - Subtarget = &TM.getSubtarget(); + Subtarget = + &static_cast(FuncInfo.MF->getSubtarget()); Context = &FuncInfo.Fn->getContext(); } @@ -299,6 +303,8 @@ static unsigned getImplicitScaleFactor(MVT VT) { CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const { if (CC == CallingConv::WebKit_JS) return CC_AArch64_WebKit_JS; + if (CC == CallingConv::GHC) + return CC_AArch64_GHC; return Subtarget->isTargetDarwin() ? CC_AArch64_DarwinPCS : CC_AArch64_AAPCS; } @@ -364,6 +370,24 @@ unsigned AArch64FastISel::materializeFP(const ConstantFP *CFP, MVT VT) { return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm); } + // For the MachO large code model materialize the FP constant in code. + if (Subtarget->isTargetMachO() && TM.getCodeModel() == CodeModel::Large) { + unsigned Opc1 = Is64Bit ? AArch64::MOVi64imm : AArch64::MOVi32imm; + const TargetRegisterClass *RC = Is64Bit ? + &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; + + unsigned TmpReg = createResultReg(RC); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc1), TmpReg) + .addImm(CFP->getValueAPF().bitcastToAPInt().getZExtValue()); + + unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT)); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), ResultReg) + .addReg(TmpReg, getKillRegState(true)); + + return ResultReg; + } + // Materialize via constant pool. MachineConstantPool wants an explicit // alignment. unsigned Align = DL.getPrefTypeAlignment(CFP->getType()); @@ -750,7 +774,7 @@ bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty) if (Addr.getOffsetReg()) break; - if (DL.getTypeSizeInBits(Ty) != 8) + if (!Ty || DL.getTypeSizeInBits(Ty) != 8) break; const Value *LHS = U->getOperand(0); @@ -1084,7 +1108,7 @@ unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS, RetVT.SimpleTy = std::max(RetVT.SimpleTy, MVT::i32); // Canonicalize immediates to the RHS first. - if (UseAdd && isa(LHS) && !isa(RHS)) + if (UseAdd && isa(LHS) && !isa(RHS)) std::swap(LHS, RHS); // Canonicalize mul by power of 2 to the RHS. @@ -1118,7 +1142,11 @@ unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS, else ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, LHSIsKill, Imm, SetFlags, WantResult); - } + } else if (const auto *C = dyn_cast(RHS)) + if (C->isNullValue()) + ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, LHSIsKill, 0, SetFlags, + WantResult); + if (ResultReg) return ResultReg; @@ -1889,7 +1917,8 @@ bool AArch64FastISel::selectLoad(const Instruction *I) { // could select it. Emit a copy to subreg if necessary. FastISel will remove // it when it selects the integer extend. unsigned Reg = lookUpRegForValue(IntExtVal); - if (!Reg) { + auto *MI = MRI.getUniqueVRegDef(Reg); + if (!MI) { if (RetVT == MVT::i64 && VT <= MVT::i32) { if (WantZExt) { // Delete the last emitted instruction from emitLoad (SUBREG_TO_REG). @@ -1907,10 +1936,7 @@ bool AArch64FastISel::selectLoad(const Instruction *I) { // The integer extend has already been emitted - delete all the instructions // that have been emitted by the integer extend lowering code and use the // result from the load instruction directly. - while (Reg) { - auto *MI = MRI.getUniqueVRegDef(Reg); - if (!MI) - break; + while (MI) { Reg = 0; for (auto &Opnd : MI->uses()) { if (Opnd.isReg()) { @@ -1919,6 +1945,9 @@ bool AArch64FastISel::selectLoad(const Instruction *I) { } } MI->eraseFromParent(); + MI = nullptr; + if (Reg) + MI = MRI.getUniqueVRegDef(Reg); } updateValueMap(IntExtVal, ResultReg); return true; @@ -2087,12 +2116,12 @@ bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) { const Value *LHS = CI->getOperand(0); const Value *RHS = CI->getOperand(1); - Type *Ty = LHS->getType(); - if (!Ty->isIntegerTy()) - return false; + MVT VT; + if (!isTypeSupported(LHS->getType(), VT)) + return false; - unsigned BW = cast(Ty)->getBitWidth(); - if (BW != 1 && BW != 8 && BW != 16 && BW != 32 && BW != 64) + unsigned BW = VT.getSizeInBits(); + if (BW > 64) return false; MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)]; @@ -2106,19 +2135,19 @@ bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) { int TestBit = -1; bool IsCmpNE; - if ((Predicate == CmpInst::ICMP_EQ) || (Predicate == CmpInst::ICMP_NE)) { - if (const auto *C = dyn_cast(LHS)) - if (C->isNullValue()) - std::swap(LHS, RHS); - - if (!isa(RHS)) - return false; + switch (Predicate) { + default: + return false; + case CmpInst::ICMP_EQ: + case CmpInst::ICMP_NE: + if (isa(LHS) && cast(LHS)->isNullValue()) + std::swap(LHS, RHS); - if (!cast(RHS)->isNullValue()) + if (!isa(RHS) || !cast(RHS)->isNullValue()) return false; if (const auto *AI = dyn_cast(LHS)) - if (AI->getOpcode() == Instruction::And) { + if (AI->getOpcode() == Instruction::And && isValueAvailable(AI)) { const Value *AndLHS = AI->getOperand(0); const Value *AndRHS = AI->getOperand(1); @@ -2132,27 +2161,32 @@ bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) { LHS = AndLHS; } } - IsCmpNE = Predicate == CmpInst::ICMP_NE; - } else if (Predicate == CmpInst::ICMP_SLT) { - if (!isa(RHS)) - return false; - if (!cast(RHS)->isNullValue()) + if (VT == MVT::i1) + TestBit = 0; + + IsCmpNE = Predicate == CmpInst::ICMP_NE; + break; + case CmpInst::ICMP_SLT: + case CmpInst::ICMP_SGE: + if (!isa(RHS) || !cast(RHS)->isNullValue()) return false; TestBit = BW - 1; - IsCmpNE = true; - } else if (Predicate == CmpInst::ICMP_SGT) { + IsCmpNE = Predicate == CmpInst::ICMP_SLT; + break; + case CmpInst::ICMP_SGT: + case CmpInst::ICMP_SLE: if (!isa(RHS)) return false; - if (cast(RHS)->getValue() != -1) + if (cast(RHS)->getValue() != APInt(BW, -1, true)) return false; TestBit = BW - 1; - IsCmpNE = false; - } else - return false; + IsCmpNE = Predicate == CmpInst::ICMP_SLE; + break; + } // end switch static const unsigned OpcTable[2][2][2] = { { {AArch64::CBZW, AArch64::CBZX }, @@ -2165,7 +2199,7 @@ bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) { bool Is64Bit = BW == 64; if (TestBit < 32 && TestBit >= 0) Is64Bit = false; - + unsigned Opc = OpcTable[IsBitTest][IsCmpNE][Is64Bit]; const MCInstrDesc &II = TII.get(Opc); @@ -2178,11 +2212,8 @@ bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) { SrcReg = fastEmitInst_extractsubreg(MVT::i32, SrcReg, SrcIsKill, AArch64::sub_32); - if ((BW < 32) && !IsBitTest) { - EVT CmpEVT = TLI.getValueType(Ty, true); - SrcReg = - emitIntExt(CmpEVT.getSimpleVT(), SrcReg, MVT::i32, /*isZExt*/ true); - } + if ((BW < 32) && !IsBitTest) + SrcReg = emitIntExt(VT, SrcReg, MVT::i32, /*IsZExt=*/true); // Emit the combined compare and branch instruction. SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs()); @@ -2491,60 +2522,186 @@ bool AArch64FastISel::selectCmp(const Instruction *I) { return true; } -bool AArch64FastISel::selectSelect(const Instruction *I) { - const SelectInst *SI = cast(I); +/// \brief Optimize selects of i1 if one of the operands has a 'true' or 'false' +/// value. +bool AArch64FastISel::optimizeSelect(const SelectInst *SI) { + if (!SI->getType()->isIntegerTy(1)) + return false; - EVT DestEVT = TLI.getValueType(SI->getType(), true); - if (!DestEVT.isSimple()) + const Value *Src1Val, *Src2Val; + unsigned Opc = 0; + bool NeedExtraOp = false; + if (auto *CI = dyn_cast(SI->getTrueValue())) { + if (CI->isOne()) { + Src1Val = SI->getCondition(); + Src2Val = SI->getFalseValue(); + Opc = AArch64::ORRWrr; + } else { + assert(CI->isZero()); + Src1Val = SI->getFalseValue(); + Src2Val = SI->getCondition(); + Opc = AArch64::BICWrr; + } + } else if (auto *CI = dyn_cast(SI->getFalseValue())) { + if (CI->isOne()) { + Src1Val = SI->getCondition(); + Src2Val = SI->getTrueValue(); + Opc = AArch64::ORRWrr; + NeedExtraOp = true; + } else { + assert(CI->isZero()); + Src1Val = SI->getCondition(); + Src2Val = SI->getTrueValue(); + Opc = AArch64::ANDWrr; + } + } + + if (!Opc) return false; - MVT DestVT = DestEVT.getSimpleVT(); - if (DestVT != MVT::i32 && DestVT != MVT::i64 && DestVT != MVT::f32 && - DestVT != MVT::f64) + unsigned Src1Reg = getRegForValue(Src1Val); + if (!Src1Reg) return false; + bool Src1IsKill = hasTrivialKill(Src1Val); - unsigned SelectOpc; - const TargetRegisterClass *RC = nullptr; - switch (DestVT.SimpleTy) { - default: return false; + unsigned Src2Reg = getRegForValue(Src2Val); + if (!Src2Reg) + return false; + bool Src2IsKill = hasTrivialKill(Src2Val); + + if (NeedExtraOp) { + Src1Reg = emitLogicalOp_ri(ISD::XOR, MVT::i32, Src1Reg, Src1IsKill, 1); + Src1IsKill = true; + } + unsigned ResultReg = fastEmitInst_rr(Opc, &AArch64::GPR32spRegClass, Src1Reg, + Src1IsKill, Src2Reg, Src2IsKill); + updateValueMap(SI, ResultReg); + return true; +} + +bool AArch64FastISel::selectSelect(const Instruction *I) { + assert(isa(I) && "Expected a select instruction."); + MVT VT; + if (!isTypeSupported(I->getType(), VT)) + return false; + + unsigned Opc; + const TargetRegisterClass *RC; + switch (VT.SimpleTy) { + default: + return false; + case MVT::i1: + case MVT::i8: + case MVT::i16: case MVT::i32: - SelectOpc = AArch64::CSELWr; RC = &AArch64::GPR32RegClass; break; + Opc = AArch64::CSELWr; + RC = &AArch64::GPR32RegClass; + break; case MVT::i64: - SelectOpc = AArch64::CSELXr; RC = &AArch64::GPR64RegClass; break; + Opc = AArch64::CSELXr; + RC = &AArch64::GPR64RegClass; + break; case MVT::f32: - SelectOpc = AArch64::FCSELSrrr; RC = &AArch64::FPR32RegClass; break; + Opc = AArch64::FCSELSrrr; + RC = &AArch64::FPR32RegClass; + break; case MVT::f64: - SelectOpc = AArch64::FCSELDrrr; RC = &AArch64::FPR64RegClass; break; + Opc = AArch64::FCSELDrrr; + RC = &AArch64::FPR64RegClass; + break; } + const SelectInst *SI = cast(I); const Value *Cond = SI->getCondition(); - bool NeedTest = true; AArch64CC::CondCode CC = AArch64CC::NE; - if (foldXALUIntrinsic(CC, I, Cond)) - NeedTest = false; + AArch64CC::CondCode ExtraCC = AArch64CC::AL; - unsigned CondReg = getRegForValue(Cond); - if (!CondReg) - return false; - bool CondIsKill = hasTrivialKill(Cond); + if (optimizeSelect(SI)) + return true; - if (NeedTest) { - unsigned ANDReg = emitAnd_ri(MVT::i32, CondReg, CondIsKill, 1); - assert(ANDReg && "Unexpected AND instruction emission failure."); - emitICmp_ri(MVT::i32, ANDReg, /*IsKill=*/true, 0); + // Try to pickup the flags, so we don't have to emit another compare. + if (foldXALUIntrinsic(CC, I, Cond)) { + // Fake request the condition to force emission of the XALU intrinsic. + unsigned CondReg = getRegForValue(Cond); + if (!CondReg) + return false; + } else if (isa(Cond) && cast(Cond)->hasOneUse() && + isValueAvailable(Cond)) { + const auto *Cmp = cast(Cond); + // Try to optimize or fold the cmp. + CmpInst::Predicate Predicate = optimizeCmpPredicate(Cmp); + const Value *FoldSelect = nullptr; + switch (Predicate) { + default: + break; + case CmpInst::FCMP_FALSE: + FoldSelect = SI->getFalseValue(); + break; + case CmpInst::FCMP_TRUE: + FoldSelect = SI->getTrueValue(); + break; + } + + if (FoldSelect) { + unsigned SrcReg = getRegForValue(FoldSelect); + if (!SrcReg) + return false; + unsigned UseReg = lookUpRegForValue(SI); + if (UseReg) + MRI.clearKillFlags(UseReg); + + updateValueMap(I, SrcReg); + return true; + } + + // Emit the cmp. + if (!emitCmp(Cmp->getOperand(0), Cmp->getOperand(1), Cmp->isUnsigned())) + return false; + + // FCMP_UEQ and FCMP_ONE cannot be checked with a single select instruction. + CC = getCompareCC(Predicate); + switch (Predicate) { + default: + break; + case CmpInst::FCMP_UEQ: + ExtraCC = AArch64CC::EQ; + CC = AArch64CC::VS; + break; + case CmpInst::FCMP_ONE: + ExtraCC = AArch64CC::MI; + CC = AArch64CC::GT; + break; + } + assert((CC != AArch64CC::AL) && "Unexpected condition code."); + } else { + unsigned CondReg = getRegForValue(Cond); + if (!CondReg) + return false; + bool CondIsKill = hasTrivialKill(Cond); + + // Emit a TST instruction (ANDS wzr, reg, #imm). + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ANDSWri), + AArch64::WZR) + .addReg(CondReg, getKillRegState(CondIsKill)) + .addImm(AArch64_AM::encodeLogicalImmediate(1, 32)); } - unsigned TrueReg = getRegForValue(SI->getTrueValue()); - bool TrueIsKill = hasTrivialKill(SI->getTrueValue()); + unsigned Src1Reg = getRegForValue(SI->getTrueValue()); + bool Src1IsKill = hasTrivialKill(SI->getTrueValue()); - unsigned FalseReg = getRegForValue(SI->getFalseValue()); - bool FalseIsKill = hasTrivialKill(SI->getFalseValue()); + unsigned Src2Reg = getRegForValue(SI->getFalseValue()); + bool Src2IsKill = hasTrivialKill(SI->getFalseValue()); - if (!TrueReg || !FalseReg) + if (!Src1Reg || !Src2Reg) return false; - unsigned ResultReg = fastEmitInst_rri(SelectOpc, RC, TrueReg, TrueIsKill, - FalseReg, FalseIsKill, CC); + if (ExtraCC != AArch64CC::AL) { + Src2Reg = fastEmitInst_rri(Opc, RC, Src1Reg, Src1IsKill, Src2Reg, + Src2IsKill, ExtraCC); + Src2IsKill = true; + } + unsigned ResultReg = fastEmitInst_rri(Opc, RC, Src1Reg, Src1IsKill, Src2Reg, + Src2IsKill, CC); updateValueMap(I, ResultReg); return true; } @@ -2878,6 +3035,11 @@ bool AArch64FastISel::finishCall(CallLoweringInfo &CLI, MVT RetVT, // Copy all of the result registers out of their specified physreg. MVT CopyVT = RVLocs[0].getValVT(); + + // TODO: Handle big-endian results + if (CopyVT.isVector() && !Subtarget->isLittleEndian()) + return false; + unsigned ResultReg = createResultReg(TLI.getRegClassFor(CopyVT)); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), ResultReg) @@ -3002,7 +3164,7 @@ bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) { // Add a register mask with the call-preserved registers. // Proper defs for return values will be added by setPhysRegsDeadExcept(). - MIB.addRegMask(TRI.getCallPreservedMask(CC)); + MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC)); CLI.Call = MIB; @@ -3169,8 +3331,7 @@ bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) { MFI->setFrameAddressIsTaken(true); const AArch64RegisterInfo *RegInfo = - static_cast( - TM.getSubtargetImpl()->getRegisterInfo()); + static_cast(Subtarget->getRegisterInfo()); unsigned FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF)); unsigned SrcReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, @@ -3294,6 +3455,32 @@ bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) { updateValueMap(II, CLI.ResultReg); return true; } + case Intrinsic::fabs: { + MVT VT; + if (!isTypeLegal(II->getType(), VT)) + return false; + + unsigned Opc; + switch (VT.SimpleTy) { + default: + return false; + case MVT::f32: + Opc = AArch64::FABSSr; + break; + case MVT::f64: + Opc = AArch64::FABSDr; + break; + } + unsigned SrcReg = getRegForValue(II->getOperand(0)); + if (!SrcReg) + return false; + bool SrcRegIsKill = hasTrivialKill(II->getOperand(0)); + unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT)); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) + .addReg(SrcReg, getKillRegState(SrcRegIsKill)); + updateValueMap(II, ResultReg); + return true; + } case Intrinsic::trap: { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BRK)) .addImm(1); @@ -3727,7 +3914,7 @@ unsigned AArch64FastISel::emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill, unsigned AArch64FastISel::emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0, bool Op0IsKill, uint64_t Shift, - bool IsZext) { + bool IsZExt) { assert(RetVT.SimpleTy >= SrcVT.SimpleTy && "Unexpected source/return type pair."); assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 || @@ -3740,6 +3927,20 @@ unsigned AArch64FastISel::emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0, unsigned RegSize = Is64Bit ? 64 : 32; unsigned DstBits = RetVT.getSizeInBits(); unsigned SrcBits = SrcVT.getSizeInBits(); + const TargetRegisterClass *RC = + Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; + + // Just emit a copy for "zero" shifts. + if (Shift == 0) { + if (RetVT == SrcVT) { + unsigned ResultReg = createResultReg(RC); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), ResultReg) + .addReg(Op0, getKillRegState(Op0IsKill)); + return ResultReg; + } else + return emitIntExt(SrcVT, Op0, RetVT, IsZExt); + } // Don't deal with undefined shifts. if (Shift >= DstBits) @@ -3777,9 +3978,7 @@ unsigned AArch64FastISel::emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0, {AArch64::SBFMWri, AArch64::SBFMXri}, {AArch64::UBFMWri, AArch64::UBFMXri} }; - unsigned Opc = OpcTable[IsZext][Is64Bit]; - const TargetRegisterClass *RC = - Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; + unsigned Opc = OpcTable[IsZExt][Is64Bit]; if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) { unsigned TmpReg = MRI.createVirtualRegister(RC); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, @@ -3825,8 +4024,9 @@ unsigned AArch64FastISel::emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0, bool IsZExt) { assert(RetVT.SimpleTy >= SrcVT.SimpleTy && "Unexpected source/return type pair."); - assert((SrcVT == MVT::i8 || SrcVT == MVT::i16 || SrcVT == MVT::i32 || - SrcVT == MVT::i64) && "Unexpected source value type."); + assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 || + SrcVT == MVT::i32 || SrcVT == MVT::i64) && + "Unexpected source value type."); assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 || RetVT == MVT::i64) && "Unexpected return value type."); @@ -3834,6 +4034,20 @@ unsigned AArch64FastISel::emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0, unsigned RegSize = Is64Bit ? 64 : 32; unsigned DstBits = RetVT.getSizeInBits(); unsigned SrcBits = SrcVT.getSizeInBits(); + const TargetRegisterClass *RC = + Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; + + // Just emit a copy for "zero" shifts. + if (Shift == 0) { + if (RetVT == SrcVT) { + unsigned ResultReg = createResultReg(RC); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), ResultReg) + .addReg(Op0, getKillRegState(Op0IsKill)); + return ResultReg; + } else + return emitIntExt(SrcVT, Op0, RetVT, IsZExt); + } // Don't deal with undefined shifts. if (Shift >= DstBits) @@ -3886,8 +4100,6 @@ unsigned AArch64FastISel::emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0, {AArch64::UBFMWri, AArch64::UBFMXri} }; unsigned Opc = OpcTable[IsZExt][Is64Bit]; - const TargetRegisterClass *RC = - Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) { unsigned TmpReg = MRI.createVirtualRegister(RC); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, @@ -3933,8 +4145,9 @@ unsigned AArch64FastISel::emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0, bool IsZExt) { assert(RetVT.SimpleTy >= SrcVT.SimpleTy && "Unexpected source/return type pair."); - assert((SrcVT == MVT::i8 || SrcVT == MVT::i16 || SrcVT == MVT::i32 || - SrcVT == MVT::i64) && "Unexpected source value type."); + assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 || + SrcVT == MVT::i32 || SrcVT == MVT::i64) && + "Unexpected source value type."); assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 || RetVT == MVT::i64) && "Unexpected return value type."); @@ -3942,6 +4155,20 @@ unsigned AArch64FastISel::emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0, unsigned RegSize = Is64Bit ? 64 : 32; unsigned DstBits = RetVT.getSizeInBits(); unsigned SrcBits = SrcVT.getSizeInBits(); + const TargetRegisterClass *RC = + Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; + + // Just emit a copy for "zero" shifts. + if (Shift == 0) { + if (RetVT == SrcVT) { + unsigned ResultReg = createResultReg(RC); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), ResultReg) + .addReg(Op0, getKillRegState(Op0IsKill)); + return ResultReg; + } else + return emitIntExt(SrcVT, Op0, RetVT, IsZExt); + } // Don't deal with undefined shifts. if (Shift >= DstBits) @@ -3982,8 +4209,6 @@ unsigned AArch64FastISel::emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0, {AArch64::UBFMWri, AArch64::UBFMXri} }; unsigned Opc = OpcTable[IsZExt][Is64Bit]; - const TargetRegisterClass *RC = - Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) { unsigned TmpReg = MRI.createVirtualRegister(RC); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, @@ -4192,6 +4417,14 @@ bool AArch64FastISel::selectIntExt(const Instruction *I) { .addImm(AArch64::sub_32); SrcReg = ResultReg; } + // Conservatively clear all kill flags from all uses, because we are + // replacing a sign-/zero-extend instruction at IR level with a nop at MI + // level. The result of the instruction at IR level might have been + // trivially dead, which is now not longer true. + unsigned UseReg = lookUpRegForValue(I); + if (UseReg) + MRI.clearKillFlags(UseReg); + updateValueMap(I, SrcReg); return true; } @@ -4336,7 +4569,7 @@ bool AArch64FastISel::selectShift(const Instruction *I) { unsigned ResultReg = 0; uint64_t ShiftVal = C->getZExtValue(); MVT SrcVT = RetVT; - bool IsZExt = (I->getOpcode() == Instruction::AShr) ? false : true; + bool IsZExt = I->getOpcode() != Instruction::AShr; const Value *Op0 = I->getOperand(0); if (const auto *ZExt = dyn_cast(Op0)) { if (!isIntExtFree(ZExt)) { @@ -4559,6 +4792,28 @@ bool AArch64FastISel::selectSDiv(const Instruction *I) { return true; } +/// This is mostly a copy of the existing FastISel getRegForGEPIndex code. We +/// have to duplicate it for AArch64, because otherwise we would fail during the +/// sign-extend emission. +std::pair AArch64FastISel::getRegForGEPIndex(const Value *Idx) { + unsigned IdxN = getRegForValue(Idx); + if (IdxN == 0) + // Unhandled operand. Halt "fast" selection and bail. + return std::pair(0, false); + + bool IdxNIsKill = hasTrivialKill(Idx); + + // If the index is smaller or larger than intptr_t, truncate or extend it. + MVT PtrVT = TLI.getPointerTy(); + EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false); + if (IdxVT.bitsLT(PtrVT)) { + IdxN = emitIntExt(IdxVT.getSimpleVT(), IdxN, PtrVT, /*IsZExt=*/false); + IdxNIsKill = true; + } else if (IdxVT.bitsGT(PtrVT)) + llvm_unreachable("AArch64 FastISel doesn't support types larger than i64"); + return std::pair(IdxN, IdxNIsKill); +} + /// This is mostly a copy of the existing FastISel GEP code, but we have to /// duplicate it for AArch64, because otherwise we would bail out even for /// simple cases. This is because the standard fastEmit functions don't cover