X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTarget%2FAArch64%2FAArch64FastISel.cpp;h=0ac4b39b0357245ace6fb3e5c7c6603d5a89f77a;hb=d7696fd2cb39a434f131b0c8d9d2f43892d253eb;hp=322dce662abd092040112d447398fd0832b909a2;hpb=84bbcfe2009eafcde83fb7a7f54b7d1aad46f52a;p=oota-llvm.git diff --git a/lib/Target/AArch64/AArch64FastISel.cpp b/lib/Target/AArch64/AArch64FastISel.cpp index 322dce662ab..0ac4b39b035 100644 --- a/lib/Target/AArch64/AArch64FastISel.cpp +++ b/lib/Target/AArch64/AArch64FastISel.cpp @@ -969,7 +969,7 @@ bool AArch64FastISel::simplifyAddress(Address &Addr, MVT VT) { // Cannot encode an offset register and an immediate offset in the same // instruction. Fold the immediate offset into the load/store instruction and - // emit an additonal add to take care of the offset register. + // emit an additional add to take care of the offset register. if (!ImmediateOffsetNeedsLowering && Addr.getOffset() && Addr.getOffsetReg()) RegisterOffsetNeedsLowering = true; @@ -1058,8 +1058,8 @@ void AArch64FastISel::addLoadStoreOperands(Address &Addr, // FIXME: We shouldn't be using getObjectSize/getObjectAlignment. The size // and alignment should be based on the VT. MMO = FuncInfo.MF->getMachineMemOperand( - MachinePointerInfo::getFixedStack(FI, Offset), Flags, - MFI.getObjectSize(FI), MFI.getObjectAlignment(FI)); + MachinePointerInfo::getFixedStack(*FuncInfo.MF, FI, Offset), Flags, + MFI.getObjectSize(FI), MFI.getObjectAlignment(FI)); // Now add the rest of the operands. MIB.addFrameIndex(FI).addImm(Offset); } else { @@ -1178,7 +1178,7 @@ unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS, } // Check if the mul can be folded into the instruction. - if (RHS->hasOneUse() && isValueAvailable(RHS)) + if (RHS->hasOneUse() && isValueAvailable(RHS)) { if (isMulPowOf2(RHS)) { const Value *MulLHS = cast(RHS)->getOperand(0); const Value *MulRHS = cast(RHS)->getOperand(1); @@ -1193,12 +1193,16 @@ unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS, if (!RHSReg) return 0; bool RHSIsKill = hasTrivialKill(MulLHS); - return emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill, - AArch64_AM::LSL, ShiftVal, SetFlags, WantResult); + ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, + RHSIsKill, AArch64_AM::LSL, ShiftVal, SetFlags, + WantResult); + if (ResultReg) + return ResultReg; } + } // Check if the shift can be folded into the instruction. - if (RHS->hasOneUse() && isValueAvailable(RHS)) + if (RHS->hasOneUse() && isValueAvailable(RHS)) { if (const auto *SI = dyn_cast(RHS)) { if (const auto *C = dyn_cast(SI->getOperand(1))) { AArch64_AM::ShiftExtendType ShiftType = AArch64_AM::InvalidShiftExtend; @@ -1214,12 +1218,15 @@ unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS, if (!RHSReg) return 0; bool RHSIsKill = hasTrivialKill(SI->getOperand(0)); - return emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, - RHSIsKill, ShiftType, ShiftVal, SetFlags, - WantResult); + ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, + RHSIsKill, ShiftType, ShiftVal, SetFlags, + WantResult); + if (ResultReg) + return ResultReg; } } } + } unsigned RHSReg = getRegForValue(RHS); if (!RHSReg) @@ -1323,6 +1330,10 @@ unsigned AArch64FastISel::emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg, if (RetVT != MVT::i32 && RetVT != MVT::i64) return 0; + // Don't deal with undefined shifts. + if (ShiftImm >= RetVT.getSizeInBits()) + return 0; + static const unsigned OpcTable[2][2][2] = { { { AArch64::SUBWrs, AArch64::SUBXrs }, { AArch64::ADDWrs, AArch64::ADDXrs } }, @@ -1360,6 +1371,9 @@ unsigned AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg, if (RetVT != MVT::i32 && RetVT != MVT::i64) return 0; + if (ShiftImm >= 4) + return 0; + static const unsigned OpcTable[2][2][2] = { { { AArch64::SUBWrx, AArch64::SUBXrx }, { AArch64::ADDWrx, AArch64::ADDXrx } }, @@ -1542,7 +1556,7 @@ unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT, return ResultReg; // Check if the mul can be folded into the instruction. - if (RHS->hasOneUse() && isValueAvailable(RHS)) + if (RHS->hasOneUse() && isValueAvailable(RHS)) { if (isMulPowOf2(RHS)) { const Value *MulLHS = cast(RHS)->getOperand(0); const Value *MulRHS = cast(RHS)->getOperand(1); @@ -1558,12 +1572,15 @@ unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT, if (!RHSReg) return 0; bool RHSIsKill = hasTrivialKill(MulLHS); - return emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg, - RHSIsKill, ShiftVal); + ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg, + RHSIsKill, ShiftVal); + if (ResultReg) + return ResultReg; } + } // Check if the shift can be folded into the instruction. - if (RHS->hasOneUse() && isValueAvailable(RHS)) + if (RHS->hasOneUse() && isValueAvailable(RHS)) { if (const auto *SI = dyn_cast(RHS)) if (const auto *C = dyn_cast(SI->getOperand(1))) { uint64_t ShiftVal = C->getZExtValue(); @@ -1571,9 +1588,12 @@ unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT, if (!RHSReg) return 0; bool RHSIsKill = hasTrivialKill(SI->getOperand(0)); - return emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg, - RHSIsKill, ShiftVal); + ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg, + RHSIsKill, ShiftVal); + if (ResultReg) + return ResultReg; } + } unsigned RHSReg = getRegForValue(RHS); if (!RHSReg) @@ -1646,6 +1666,11 @@ unsigned AArch64FastISel::emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, { AArch64::ORRWrs, AArch64::ORRXrs }, { AArch64::EORWrs, AArch64::EORXrs } }; + + // Don't deal with undefined shifts. + if (ShiftImm >= RetVT.getSizeInBits()) + return 0; + const TargetRegisterClass *RC; unsigned Opc; switch (RetVT.SimpleTy) { @@ -2235,14 +2260,7 @@ bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) { MIB.addImm(TestBit); MIB.addMBB(TBB); - // Obtain the branch weight and add the TrueBB to the successor list. - uint32_t BranchWeight = 0; - if (FuncInfo.BPI) - BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(), - TBB->getBasicBlock()); - FuncInfo.MBB->addSuccessor(TBB, BranchWeight); - fastEmitBranch(FBB, DbgLoc); - + finishCondBranch(BI->getParent(), TBB, FBB); return true; } @@ -2257,7 +2275,6 @@ bool AArch64FastISel::selectBranch(const Instruction *I) { MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)]; MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)]; - AArch64CC::CondCode CC = AArch64CC::NE; if (const CmpInst *CI = dyn_cast(BI->getCondition())) { if (CI->hasOneUse() && isValueAvailable(CI)) { // Try to optimize or fold the cmp. @@ -2289,7 +2306,7 @@ bool AArch64FastISel::selectBranch(const Instruction *I) { // FCMP_UEQ and FCMP_ONE cannot be checked with a single branch // instruction. - CC = getCompareCC(Predicate); + AArch64CC::CondCode CC = getCompareCC(Predicate); AArch64CC::CondCode ExtraCC = AArch64CC::AL; switch (Predicate) { default: @@ -2317,52 +2334,7 @@ bool AArch64FastISel::selectBranch(const Instruction *I) { .addImm(CC) .addMBB(TBB); - // Obtain the branch weight and add the TrueBB to the successor list. - uint32_t BranchWeight = 0; - if (FuncInfo.BPI) - BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(), - TBB->getBasicBlock()); - FuncInfo.MBB->addSuccessor(TBB, BranchWeight); - - fastEmitBranch(FBB, DbgLoc); - return true; - } - } else if (TruncInst *TI = dyn_cast(BI->getCondition())) { - MVT SrcVT; - if (TI->hasOneUse() && isValueAvailable(TI) && - isTypeSupported(TI->getOperand(0)->getType(), SrcVT)) { - unsigned CondReg = getRegForValue(TI->getOperand(0)); - if (!CondReg) - return false; - bool CondIsKill = hasTrivialKill(TI->getOperand(0)); - - // Issue an extract_subreg to get the lower 32-bits. - if (SrcVT == MVT::i64) { - CondReg = fastEmitInst_extractsubreg(MVT::i32, CondReg, CondIsKill, - AArch64::sub_32); - CondIsKill = true; - } - - unsigned ANDReg = emitAnd_ri(MVT::i32, CondReg, CondIsKill, 1); - assert(ANDReg && "Unexpected AND instruction emission failure."); - emitICmp_ri(MVT::i32, ANDReg, /*IsKill=*/true, 0); - - if (FuncInfo.MBB->isLayoutSuccessor(TBB)) { - std::swap(TBB, FBB); - CC = AArch64CC::EQ; - } - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc)) - .addImm(CC) - .addMBB(TBB); - - // Obtain the branch weight and add the TrueBB to the successor list. - uint32_t BranchWeight = 0; - if (FuncInfo.BPI) - BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(), - TBB->getBasicBlock()); - FuncInfo.MBB->addSuccessor(TBB, BranchWeight); - - fastEmitBranch(FBB, DbgLoc); + finishCondBranch(BI->getParent(), TBB, FBB); return true; } } else if (const auto *CI = dyn_cast(BI->getCondition())) { @@ -2371,34 +2343,31 @@ bool AArch64FastISel::selectBranch(const Instruction *I) { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::B)) .addMBB(Target); - // Obtain the branch weight and add the target to the successor list. - uint32_t BranchWeight = 0; - if (FuncInfo.BPI) - BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(), - Target->getBasicBlock()); - FuncInfo.MBB->addSuccessor(Target, BranchWeight); + // Obtain the branch probability and add the target to the successor list. + if (FuncInfo.BPI) { + auto BranchProbability = FuncInfo.BPI->getEdgeProbability( + BI->getParent(), Target->getBasicBlock()); + FuncInfo.MBB->addSuccessor(Target, BranchProbability); + } else + FuncInfo.MBB->addSuccessorWithoutProb(Target); return true; - } else if (foldXALUIntrinsic(CC, I, BI->getCondition())) { - // Fake request the condition, otherwise the intrinsic might be completely - // optimized away. - unsigned CondReg = getRegForValue(BI->getCondition()); - if (!CondReg) - return false; - - // Emit the branch. - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc)) - .addImm(CC) - .addMBB(TBB); + } else { + AArch64CC::CondCode CC = AArch64CC::NE; + if (foldXALUIntrinsic(CC, I, BI->getCondition())) { + // Fake request the condition, otherwise the intrinsic might be completely + // optimized away. + unsigned CondReg = getRegForValue(BI->getCondition()); + if (!CondReg) + return false; - // Obtain the branch weight and add the TrueBB to the successor list. - uint32_t BranchWeight = 0; - if (FuncInfo.BPI) - BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(), - TBB->getBasicBlock()); - FuncInfo.MBB->addSuccessor(TBB, BranchWeight); + // Emit the branch. + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc)) + .addImm(CC) + .addMBB(TBB); - fastEmitBranch(FBB, DbgLoc); - return true; + finishCondBranch(BI->getParent(), TBB, FBB); + return true; + } } unsigned CondReg = getRegForValue(BI->getCondition()); @@ -2406,32 +2375,22 @@ bool AArch64FastISel::selectBranch(const Instruction *I) { return false; bool CondRegIsKill = hasTrivialKill(BI->getCondition()); - // We've been divorced from our compare! Our block was split, and - // now our compare lives in a predecessor block. We musn't - // re-compare here, as the children of the compare aren't guaranteed - // live across the block boundary (we *could* check for this). - // Regardless, the compare has been done in the predecessor block, - // and it left a value for us in a virtual register. Ergo, we test - // the one-bit value left in the virtual register. - emitICmp_ri(MVT::i32, CondReg, CondRegIsKill, 0); - + // i1 conditions come as i32 values, test the lowest bit with tb(n)z. + unsigned Opcode = AArch64::TBNZW; if (FuncInfo.MBB->isLayoutSuccessor(TBB)) { std::swap(TBB, FBB); - CC = AArch64CC::EQ; + Opcode = AArch64::TBZW; } - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc)) - .addImm(CC) + const MCInstrDesc &II = TII.get(Opcode); + unsigned ConstrainedCondReg + = constrainOperandRegClass(II, CondReg, II.getNumDefs()); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) + .addReg(ConstrainedCondReg, getKillRegState(CondRegIsKill)) + .addImm(0) .addMBB(TBB); - // Obtain the branch weight and add the TrueBB to the successor list. - uint32_t BranchWeight = 0; - if (FuncInfo.BPI) - BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(), - TBB->getBasicBlock()); - FuncInfo.MBB->addSuccessor(TBB, BranchWeight); - - fastEmitBranch(FBB, DbgLoc); + finishCondBranch(BI->getParent(), TBB, FBB); return true; } @@ -2447,8 +2406,8 @@ bool AArch64FastISel::selectIndirectBr(const Instruction *I) { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(AddrReg); // Make sure the CFG is up-to-date. - for (unsigned i = 0, e = BI->getNumSuccessors(); i != e; ++i) - FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[BI->getSuccessor(i)]); + for (auto *Succ : BI->successors()) + FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[Succ]); return true; } @@ -2456,6 +2415,10 @@ bool AArch64FastISel::selectIndirectBr(const Instruction *I) { bool AArch64FastISel::selectCmp(const Instruction *I) { const CmpInst *CI = cast(I); + // Vectors of i1 are weird: bail out. + if (CI->getType()->isVectorTy()) + return false; + // Try to optimize or fold the cmp. CmpInst::Predicate Predicate = optimizeCmpPredicate(CI); unsigned ResultReg = 0; @@ -2954,8 +2917,7 @@ bool AArch64FastISel::processCallArgs(CallLoweringInfo &CLI, .addImm(NumBytes); // Process the args. - for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { - CCValAssign &VA = ArgLocs[i]; + for (CCValAssign &VA : ArgLocs) { const Value *ArgVal = CLI.OutVals[VA.getValNo()]; MVT ArgVT = OutVTs[VA.getValNo()]; @@ -3018,8 +2980,8 @@ bool AArch64FastISel::processCallArgs(CallLoweringInfo &CLI, unsigned Alignment = DL.getABITypeAlignment(ArgVal->getType()); MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand( - MachinePointerInfo::getStack(Addr.getOffset()), - MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment); + MachinePointerInfo::getStack(*FuncInfo.MF, Addr.getOffset()), + MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment); if (!emitStore(ArgVT, ArgReg, Addr, MMO)) return false; @@ -3318,8 +3280,8 @@ bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC, return false; // Make sure nothing is in the way - BasicBlock::const_iterator Start = I; - BasicBlock::const_iterator End = II; + BasicBlock::const_iterator Start(I); + BasicBlock::const_iterator End(II); for (auto Itr = std::prev(Start); Itr != End; --Itr) { // We only expect extractvalue instructions between the intrinsic and the // instruction to be selected. @@ -3684,6 +3646,9 @@ bool AArch64FastISel::selectRet(const Instruction *I) { if (F.isVarArg()) return false; + if (TLI.supportSplitCSR(FuncInfo.MF)) + return false; + // Build a list of return value registers. SmallVector RetRegs; @@ -3763,8 +3728,8 @@ bool AArch64FastISel::selectRet(const Instruction *I) { MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::RET_ReallyLR)); - for (unsigned i = 0, e = RetRegs.size(); i != e; ++i) - MIB.addReg(RetRegs[i], RegState::Implicit); + for (unsigned RetReg : RetRegs) + MIB.addReg(RetReg, RegState::Implicit); return true; } @@ -3795,34 +3760,41 @@ bool AArch64FastISel::selectTrunc(const Instruction *I) { return false; bool SrcIsKill = hasTrivialKill(Op); - // If we're truncating from i64/i32 to a smaller non-legal type then generate - // an AND. - uint64_t Mask = 0; - switch (DestVT.SimpleTy) { - default: - // Trunc i64 to i32 is handled by the target-independent fast-isel. - return false; - case MVT::i1: - Mask = 0x1; - break; - case MVT::i8: - Mask = 0xff; - break; - case MVT::i16: - Mask = 0xffff; - break; - } + // If we're truncating from i64 to a smaller non-legal type then generate an + // AND. Otherwise, we know the high bits are undefined and a truncate only + // generate a COPY. We cannot mark the source register also as result + // register, because this can incorrectly transfer the kill flag onto the + // source register. + unsigned ResultReg; if (SrcVT == MVT::i64) { + uint64_t Mask = 0; + switch (DestVT.SimpleTy) { + default: + // Trunc i64 to i32 is handled by the target-independent fast-isel. + return false; + case MVT::i1: + Mask = 0x1; + break; + case MVT::i8: + Mask = 0xff; + break; + case MVT::i16: + Mask = 0xffff; + break; + } // Issue an extract_subreg to get the lower 32-bits. - SrcReg = fastEmitInst_extractsubreg(MVT::i32, SrcReg, SrcIsKill, - AArch64::sub_32); - SrcIsKill = true; + unsigned Reg32 = fastEmitInst_extractsubreg(MVT::i32, SrcReg, SrcIsKill, + AArch64::sub_32); + // Create the AND instruction which performs the actual truncation. + ResultReg = emitAnd_ri(MVT::i32, Reg32, /*IsKill=*/true, Mask); + assert(ResultReg && "Unexpected AND instruction emission failure."); + } else { + ResultReg = createResultReg(&AArch64::GPR32RegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), ResultReg) + .addReg(SrcReg, getKillRegState(SrcIsKill)); } - // Create the AND instruction which performs the actual truncation. - unsigned ResultReg = emitAnd_ri(MVT::i32, SrcReg, SrcIsKill, Mask); - assert(ResultReg && "Unexpected AND instruction emission failure."); - updateValueMap(I, ResultReg); return true; }