X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTarget%2FX86%2FX86FastISel.cpp;h=83f2f6a202bc3f820d92edede504275b5bff4aaa;hb=96bd4418b2320dec7cf9573c4f1da0a0ef31465e;hp=32f177047cc01f337447865f99d0efff69903161;hpb=e4824714026a528fe4cb08ad73e048066980eda6;p=oota-llvm.git diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index 32f177047cc..83f2f6a202b 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -57,11 +57,13 @@ class X86FastISel : public FastISel { bool X86ScalarSSEf32; public: - explicit X86FastISel(FunctionLoweringInfo &funcInfo) : FastISel(funcInfo) { + explicit X86FastISel(FunctionLoweringInfo &funcInfo, + const TargetLibraryInfo *libInfo) + : FastISel(funcInfo, libInfo) { Subtarget = &TM.getSubtarget(); StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP; - X86ScalarSSEf64 = Subtarget->hasSSE2() || Subtarget->hasAVX(); - X86ScalarSSEf32 = Subtarget->hasSSE1() || Subtarget->hasAVX(); + X86ScalarSSEf64 = Subtarget->hasSSE2(); + X86ScalarSSEf32 = Subtarget->hasSSE1(); } virtual bool TargetSelectInstruction(const Instruction *I); @@ -155,9 +157,9 @@ bool X86FastISel::isTypeLegal(Type *Ty, MVT &VT, bool AllowI1) { // For now, require SSE/SSE2 for performing floating-point operations, // since x87 requires additional work. if (VT == MVT::f64 && !X86ScalarSSEf64) - return false; + return false; if (VT == MVT::f32 && !X86ScalarSSEf32) - return false; + return false; // Similarly, no f80 support yet. if (VT == MVT::f80) return false; @@ -183,37 +185,37 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, const X86AddressMode &AM, case MVT::i1: case MVT::i8: Opc = X86::MOV8rm; - RC = X86::GR8RegisterClass; + RC = &X86::GR8RegClass; break; case MVT::i16: Opc = X86::MOV16rm; - RC = X86::GR16RegisterClass; + RC = &X86::GR16RegClass; break; case MVT::i32: Opc = X86::MOV32rm; - RC = X86::GR32RegisterClass; + RC = &X86::GR32RegClass; break; case MVT::i64: // Must be in x86-64 mode. Opc = X86::MOV64rm; - RC = X86::GR64RegisterClass; + RC = &X86::GR64RegClass; break; case MVT::f32: if (X86ScalarSSEf32) { Opc = Subtarget->hasAVX() ? X86::VMOVSSrm : X86::MOVSSrm; - RC = X86::FR32RegisterClass; + RC = &X86::FR32RegClass; } else { Opc = X86::LD_Fp32m; - RC = X86::RFP32RegisterClass; + RC = &X86::RFP32RegClass; } break; case MVT::f64: if (X86ScalarSSEf64) { Opc = Subtarget->hasAVX() ? X86::VMOVSDrm : X86::MOVSDrm; - RC = X86::FR64RegisterClass; + RC = &X86::FR64RegClass; } else { Opc = X86::LD_Fp64m; - RC = X86::RFP64RegisterClass; + RC = &X86::RFP64RegClass; } break; case MVT::f80: @@ -240,7 +242,7 @@ X86FastISel::X86FastEmitStore(EVT VT, unsigned Val, const X86AddressMode &AM) { default: return false; case MVT::i1: { // Mask out all but lowest bit. - unsigned AndResult = createResultReg(X86::GR8RegisterClass); + unsigned AndResult = createResultReg(&X86::GR8RegClass); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::AND8ri), AndResult).addReg(Val).addImm(1); Val = AndResult; @@ -543,17 +545,17 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) { StubAM.GVOpFlags = GVFlags; // Prepare for inserting code in the local-value area. - SavePoint SaveInsertPt = enterLocalValueArea(); + MachineBasicBlock::iterator SaveIter = enterLocalValueArea(); if (TLI.getPointerTy() == MVT::i64) { Opc = X86::MOV64rm; - RC = X86::GR64RegisterClass; + RC = &X86::GR64RegClass; if (Subtarget->isPICStyleRIPRel()) StubAM.Base.Reg = X86::RIP; } else { Opc = X86::MOV32rm; - RC = X86::GR32RegisterClass; + RC = &X86::GR32RegClass; } LoadReg = createResultReg(RC); @@ -562,7 +564,7 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) { addFullAddress(LoadMI, StubAM); // Ok, back to normal mode. - leaveLocalValueArea(SaveInsertPt); + leaveLocalValueArea(SaveIter); // Prevent loading GV stub multiple times in same MBB. LocalValueMap[V] = LoadReg; @@ -728,7 +730,7 @@ bool X86FastISel::X86SelectRet(const Instruction *I) { // fastcc with -tailcallopt is intended to provide a guaranteed // tail call optimization. Fastisel doesn't know how to do that. - if (CC == CallingConv::Fast && GuaranteedTailCallOpt) + if (CC == CallingConv::Fast && TM.Options.GuaranteedTailCallOpt) return false; // Let SDISel handle vararg functions. @@ -743,7 +745,7 @@ bool X86FastISel::X86SelectRet(const Instruction *I) { // Analyze operands of the call, assigning locations to each operand. SmallVector ValLocs; CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, TM, ValLocs, - I->getContext()); + I->getContext()); CCInfo.AnalyzeReturn(Outs, RetCC_X86); const Value *RV = Ret->getOperand(0); @@ -837,8 +839,8 @@ bool X86FastISel::X86SelectLoad(const Instruction *I) { static unsigned X86ChooseCmpOpcode(EVT VT, const X86Subtarget *Subtarget) { bool HasAVX = Subtarget->hasAVX(); - bool X86ScalarSSEf32 = HasAVX || Subtarget->hasSSE1(); - bool X86ScalarSSEf64 = HasAVX || Subtarget->hasSSE2(); + bool X86ScalarSSEf32 = Subtarget->hasSSE1(); + bool X86ScalarSSEf64 = Subtarget->hasSSE2(); switch (VT.getSimpleVT().SimpleTy) { default: return 0; @@ -1258,7 +1260,7 @@ bool X86FastISel::X86SelectFPExt(const Instruction *I) { if (V->getType()->isFloatTy()) { unsigned OpReg = getRegForValue(V); if (OpReg == 0) return false; - unsigned ResultReg = createResultReg(X86::FR64RegisterClass); + unsigned ResultReg = createResultReg(&X86::FR64RegClass); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::CVTSS2SDrr), ResultReg) .addReg(OpReg); @@ -1277,7 +1279,7 @@ bool X86FastISel::X86SelectFPTrunc(const Instruction *I) { if (V->getType()->isDoubleTy()) { unsigned OpReg = getRegForValue(V); if (OpReg == 0) return false; - unsigned ResultReg = createResultReg(X86::FR32RegisterClass); + unsigned ResultReg = createResultReg(&X86::FR32RegClass); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::CVTSD2SSrr), ResultReg) .addReg(OpReg); @@ -1314,8 +1316,9 @@ bool X86FastISel::X86SelectTrunc(const Instruction *I) { if (!Subtarget->is64Bit()) { // If we're on x86-32; we can't extract an i8 from a general register. // First issue a copy to GR16_ABCD or GR32_ABCD. - const TargetRegisterClass *CopyRC = (SrcVT == MVT::i16) - ? X86::GR16_ABCDRegisterClass : X86::GR32_ABCDRegisterClass; + const TargetRegisterClass *CopyRC = (SrcVT == MVT::i16) ? + (const TargetRegisterClass*)&X86::GR16_ABCDRegClass : + (const TargetRegisterClass*)&X86::GR32_ABCDRegClass; unsigned CopyReg = createResultReg(CopyRC); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), CopyReg).addReg(InputReg); @@ -1423,7 +1426,7 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) { return DoSelectCall(&I, "memset"); } case Intrinsic::stackprotector: { - // Emit code inline code to store the stack guard onto the stack. + // Emit code to store the stack guard onto the stack. EVT PtrTy = TLI.getPointerTy(); const Value *Op1 = I.getArgOperand(0); // The guard's value. @@ -1484,7 +1487,7 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) { return false; // The call to CreateRegs builds two sequential registers, to store the - // both the the returned values. + // both the returned values. unsigned ResultReg = FuncInfo.CreateRegs(I.getType()); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(OpC), ResultReg) .addReg(Reg1).addReg(Reg2); @@ -1515,6 +1518,22 @@ bool X86FastISel::X86SelectCall(const Instruction *I) { return DoSelectCall(I, 0); } +static unsigned computeBytesPoppedByCallee(const X86Subtarget &Subtarget, + const ImmutableCallSite &CS) { + if (Subtarget.is64Bit()) + return 0; + if (Subtarget.isTargetWindows()) + return 0; + CallingConv::ID CC = CS.getCallingConv(); + if (CC == CallingConv::Fast || CC == CallingConv::GHC) + return 0; + if (!CS.paramHasAttr(1, Attribute::StructRet)) + return 0; + if (CS.paramHasAttr(1, Attribute::InReg)) + return 0; + return 4; +} + // Select either a call, or an llvm.memcpy/memmove/memset intrinsic bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) { const CallInst *CI = cast(I); @@ -1529,7 +1548,7 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) { // fastcc with -tailcallopt is intended to provide a guaranteed // tail call optimization. Fastisel doesn't know how to do that. - if (CC == CallingConv::Fast && GuaranteedTailCallOpt) + if (CC == CallingConv::Fast && TM.Options.GuaranteedTailCallOpt) return false; PointerType *PT = cast(CS.getCalledValue()->getType()); @@ -1543,17 +1562,16 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) { // Fast-isel doesn't know about callee-pop yet. if (X86::isCalleePop(CC, Subtarget->is64Bit(), isVarArg, - GuaranteedTailCallOpt)) + TM.Options.GuaranteedTailCallOpt)) return false; // Check whether the function can return without sret-demotion. SmallVector Outs; - SmallVector Offsets; GetReturnInfo(I->getType(), CS.getAttributes().getRetAttributes(), - Outs, TLI, &Offsets); + Outs, TLI); bool CanLowerReturn = TLI.CanLowerReturn(CS.getCallingConv(), - *FuncInfo.MF, FTy->isVarArg(), - Outs, FTy->getContext()); + *FuncInfo.MF, FTy->isVarArg(), + Outs, FTy->getContext()); if (!CanLowerReturn) return false; @@ -1576,10 +1594,11 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) { SmallVector Args; SmallVector ArgVTs; SmallVector ArgFlags; - Args.reserve(CS.arg_size()); - ArgVals.reserve(CS.arg_size()); - ArgVTs.reserve(CS.arg_size()); - ArgFlags.reserve(CS.arg_size()); + unsigned arg_size = CS.arg_size(); + Args.reserve(arg_size); + ArgVals.reserve(arg_size); + ArgVTs.reserve(arg_size); + ArgFlags.reserve(arg_size); for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end(); i != e; ++i) { // If we're lowering a mem intrinsic instead of a regular call, skip the @@ -1666,7 +1685,7 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) { // Analyze operands of the call, assigning locations to each operand. SmallVector ArgLocs; CCState CCInfo(CC, isVarArg, *FuncInfo.MF, TM, ArgLocs, - I->getParent()->getContext()); + I->getParent()->getContext()); // Allocate shadow area for Win64 if (Subtarget->isTargetWin64()) @@ -1692,7 +1711,6 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) { // Promote the value if needed. switch (VA.getLocInfo()) { - default: llvm_unreachable("Unknown loc info!"); case CCValAssign::Full: break; case CCValAssign::SExt: { assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() && @@ -1736,6 +1754,14 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) { ArgVT = VA.getLocVT(); break; } + case CCValAssign::VExt: + // VExt has not been implemented, so this should be impossible to reach + // for now. However, fallback to Selection DAG isel once implemented. + return false; + case CCValAssign::Indirect: + // FIXME: Indirect doesn't need extending, but fast-isel doesn't fully + // support this. + return false; } if (VA.isRegLoc()) { @@ -1778,7 +1804,7 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) { if (Subtarget->is64Bit() && isVarArg && !Subtarget->isTargetWin64()) { // Count the number of XMM registers allocated. - static const unsigned XMMArgRegs[] = { + static const uint16_t XMMArgRegs[] = { X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7 }; @@ -1792,9 +1818,7 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) { if (CalleeOp) { // Register-indirect call. unsigned CallOpc; - if (Subtarget->isTargetWin64()) - CallOpc = X86::WINCALL64r; - else if (Subtarget->is64Bit()) + if (Subtarget->is64Bit()) CallOpc = X86::CALL64r; else CallOpc = X86::CALL32r; @@ -1805,9 +1829,7 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) { // Direct call. assert(GV && "Not a direct call"); unsigned CallOpc; - if (Subtarget->isTargetWin64()) - CallOpc = X86::WINCALL64pcrel32; - else if (Subtarget->is64Bit()) + if (Subtarget->is64Bit()) CallOpc = X86::CALL64pcrel32; else CallOpc = X86::CALLpcrel32; @@ -1841,22 +1863,24 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) { MIB.addGlobalAddress(GV, 0, OpFlags); } + // Add a register mask with the call-preserved registers. + // Proper defs for return values will be added by setPhysRegsDeadExcept(). + MIB.addRegMask(TRI.getCallPreservedMask(CS.getCallingConv())); + // Add an implicit use GOT pointer in EBX. if (Subtarget->isPICStyleGOT()) - MIB.addReg(X86::EBX); + MIB.addReg(X86::EBX, RegState::Implicit); if (Subtarget->is64Bit() && isVarArg && !Subtarget->isTargetWin64()) - MIB.addReg(X86::AL); + MIB.addReg(X86::AL, RegState::Implicit); // Add implicit physical register uses to the call. for (unsigned i = 0, e = RegArgs.size(); i != e; ++i) - MIB.addReg(RegArgs[i]); + MIB.addReg(RegArgs[i], RegState::Implicit); // Issue CALLSEQ_END unsigned AdjStackUp = TII.getCallFrameDestroyOpcode(); - unsigned NumBytesCallee = 0; - if (!Subtarget->is64Bit() && CS.paramHasAttr(1, Attribute::StructRet)) - NumBytesCallee = 4; + const unsigned NumBytesCallee = computeBytesPoppedByCallee(*Subtarget, CS); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(AdjStackUp)) .addImm(NumBytes).addImm(NumBytesCallee); @@ -1887,7 +1911,7 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) { SmallVector UsedRegs; SmallVector RVLocs; CCState CCRetInfo(CC, false, *FuncInfo.MF, TM, RVLocs, - I->getParent()->getContext()); + I->getParent()->getContext()); unsigned ResultReg = FuncInfo.CreateRegs(I->getType()); CCRetInfo.AnalyzeCallResult(Ins, RetCC_X86); for (unsigned i = 0; i != RVLocs.size(); ++i) { @@ -1901,7 +1925,7 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) { RVLocs[i].getLocReg() == X86::ST1)) { if (isScalarFPTypeInSSEReg(RVLocs[i].getValVT())) { CopyVT = MVT::f80; - CopyReg = createResultReg(X86::RFP80RegisterClass); + CopyReg = createResultReg(&X86::RFP80RegClass); } BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::FpPOP_RETVAL), CopyReg); @@ -1990,51 +2014,55 @@ X86FastISel::TargetSelectInstruction(const Instruction *I) { unsigned X86FastISel::TargetMaterializeConstant(const Constant *C) { MVT VT; if (!isTypeLegal(C->getType(), VT)) - return false; + return 0; + + // Can't handle alternate code models yet. + if (TM.getCodeModel() != CodeModel::Small) + return 0; // Get opcode and regclass of the output for the given load instruction. unsigned Opc = 0; const TargetRegisterClass *RC = NULL; switch (VT.SimpleTy) { - default: return false; + default: return 0; case MVT::i8: Opc = X86::MOV8rm; - RC = X86::GR8RegisterClass; + RC = &X86::GR8RegClass; break; case MVT::i16: Opc = X86::MOV16rm; - RC = X86::GR16RegisterClass; + RC = &X86::GR16RegClass; break; case MVT::i32: Opc = X86::MOV32rm; - RC = X86::GR32RegisterClass; + RC = &X86::GR32RegClass; break; case MVT::i64: // Must be in x86-64 mode. Opc = X86::MOV64rm; - RC = X86::GR64RegisterClass; + RC = &X86::GR64RegClass; break; case MVT::f32: if (X86ScalarSSEf32) { Opc = Subtarget->hasAVX() ? X86::VMOVSSrm : X86::MOVSSrm; - RC = X86::FR32RegisterClass; + RC = &X86::FR32RegClass; } else { Opc = X86::LD_Fp32m; - RC = X86::RFP32RegisterClass; + RC = &X86::RFP32RegClass; } break; case MVT::f64: if (X86ScalarSSEf64) { Opc = Subtarget->hasAVX() ? X86::VMOVSDrm : X86::MOVSDrm; - RC = X86::FR64RegisterClass; + RC = &X86::FR64RegClass; } else { Opc = X86::LD_Fp64m; - RC = X86::RFP64RegisterClass; + RC = &X86::RFP64RegClass; } break; case MVT::f80: // No f80 support yet. - return false; + return 0; } // Materialize addresses with LEA instructions. @@ -2102,7 +2130,7 @@ unsigned X86FastISel::TargetMaterializeAlloca(const AllocaInst *C) { if (!X86SelectAddress(C, AM)) return 0; unsigned Opc = Subtarget->is64Bit() ? X86::LEA64r : X86::LEA32r; - TargetRegisterClass* RC = TLI.getRegClassFor(TLI.getPointerTy()); + const TargetRegisterClass* RC = TLI.getRegClassFor(TLI.getPointerTy()); unsigned ResultReg = createResultReg(RC); addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg), AM); @@ -2118,28 +2146,28 @@ unsigned X86FastISel::TargetMaterializeFloatZero(const ConstantFP *CF) { unsigned Opc = 0; const TargetRegisterClass *RC = NULL; switch (VT.SimpleTy) { - default: return false; - case MVT::f32: - if (X86ScalarSSEf32) { - Opc = Subtarget->hasAVX() ? X86::VFsFLD0SS : X86::FsFLD0SS; - RC = X86::FR32RegisterClass; - } else { - Opc = X86::LD_Fp032; - RC = X86::RFP32RegisterClass; - } - break; - case MVT::f64: - if (X86ScalarSSEf64) { - Opc = Subtarget->hasAVX() ? X86::VFsFLD0SD : X86::FsFLD0SD; - RC = X86::FR64RegisterClass; - } else { - Opc = X86::LD_Fp064; - RC = X86::RFP64RegisterClass; - } - break; - case MVT::f80: - // No f80 support yet. - return false; + default: return false; + case MVT::f32: + if (X86ScalarSSEf32) { + Opc = X86::FsFLD0SS; + RC = &X86::FR32RegClass; + } else { + Opc = X86::LD_Fp032; + RC = &X86::RFP32RegClass; + } + break; + case MVT::f64: + if (X86ScalarSSEf64) { + Opc = X86::FsFLD0SD; + RC = &X86::FR64RegClass; + } else { + Opc = X86::LD_Fp064; + RC = &X86::RFP64RegClass; + } + break; + case MVT::f80: + // No f80 support yet. + return false; } unsigned ResultReg = createResultReg(RC); @@ -2158,7 +2186,7 @@ bool X86FastISel::TryToFoldLoad(MachineInstr *MI, unsigned OpNo, if (!X86SelectAddress(LI->getOperand(0), AM)) return false; - X86InstrInfo &XII = (X86InstrInfo&)TII; + const X86InstrInfo &XII = (const X86InstrInfo&)TII; unsigned Size = TD.getTypeAllocSize(LI->getType()); unsigned Alignment = LI->getAlignment(); @@ -2177,7 +2205,8 @@ bool X86FastISel::TryToFoldLoad(MachineInstr *MI, unsigned OpNo, namespace llvm { - llvm::FastISel *X86::createFastISel(FunctionLoweringInfo &funcInfo) { - return new X86FastISel(funcInfo); + FastISel *X86::createFastISel(FunctionLoweringInfo &funcInfo, + const TargetLibraryInfo *libInfo) { + return new X86FastISel(funcInfo, libInfo); } }