virtual bool TargetSelectInstruction(const Instruction *I);
+ /// TryToFoldLoad - The specified machine instr operand is a vreg, and that
+ /// vreg is being provided by the specified load instruction. If possible,
+ /// try to fold the load as an operand to the instruction, returning true if
+ /// possible.
+ virtual bool TryToFoldLoad(MachineInstr *MI, unsigned OpNo,
+ const LoadInst *LI);
+
#include "X86GenFastISel.inc"
private:
bool X86VisitIntrinsicCall(const IntrinsicInst &I);
bool X86SelectCall(const Instruction *I);
- CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool isTailCall = false);
-
const X86InstrInfo *getInstrInfo() const {
return getTargetMachine()->getInstrInfo();
}
(VT == MVT::f32 && X86ScalarSSEf32); // f32 is when SSE1
}
- bool isTypeLegal(const Type *Ty, EVT &VT, bool AllowI1 = false);
+ bool isTypeLegal(const Type *Ty, MVT &VT, bool AllowI1 = false);
};
} // end anonymous namespace.
-bool X86FastISel::isTypeLegal(const Type *Ty, EVT &VT, bool AllowI1) {
- VT = TLI.getValueType(Ty, /*HandleUnknown=*/true);
- if (VT == MVT::Other || !VT.isSimple())
+bool X86FastISel::isTypeLegal(const Type *Ty, MVT &VT, bool AllowI1) {
+ EVT evt = TLI.getValueType(Ty, /*HandleUnknown=*/true);
+ if (evt == MVT::Other || !evt.isSimple())
// Unhandled type. Halt "fast" selection and bail.
return false;
-
+
+ VT = evt.getSimpleVT();
// For now, require SSE/SSE2 for performing floating-point operations,
// since x87 requires additional work.
if (VT == MVT::f64 && !X86ScalarSSEf64)
#include "X86GenCallingConv.inc"
-/// CCAssignFnForCall - Selects the correct CCAssignFn for a given calling
-/// convention.
-CCAssignFn *X86FastISel::CCAssignFnForCall(CallingConv::ID CC,
- bool isTaillCall) {
- if (Subtarget->is64Bit()) {
- if (CC == CallingConv::GHC)
- return CC_X86_64_GHC;
- else if (Subtarget->isTargetWin64())
- return CC_X86_Win64_C;
- else
- return CC_X86_64_C;
- }
-
- if (CC == CallingConv::X86_FastCall)
- return CC_X86_32_FastCall;
- else if (CC == CallingConv::X86_ThisCall)
- return CC_X86_32_ThisCall;
- else if (CC == CallingConv::Fast)
- return CC_X86_32_FastCC;
- else if (CC == CallingConv::GHC)
- return CC_X86_32_GHC;
- else
- return CC_X86_32_C;
-}
-
/// X86FastEmitLoad - Emit a machine instruction to load a value of type VT.
/// The address is either pre-computed, i.e. Ptr, or a GlobalAddress, i.e. GV.
/// Return true and the result register by reference if it is possible.
// Don't walk into other basic blocks; it's possible we haven't
// visited them yet, so the instructions may not yet be assigned
// virtual registers.
- if (FuncInfo.MBBMap[I->getParent()] != FuncInfo.MBB)
- return false;
-
- Opcode = I->getOpcode();
- U = I;
+ if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(V)) ||
+ FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
+ Opcode = I->getOpcode();
+ U = I;
+ }
} else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(V)) {
Opcode = C->getOpcode();
U = C;
StubAM.GV = GV;
StubAM.GVOpFlags = GVFlags;
+ // Prepare for inserting code in the local-value area.
+ SavePoint SaveInsertPt = enterLocalValueArea();
+
if (TLI.getPointerTy() == MVT::i64) {
Opc = X86::MOV64rm;
RC = X86::GR64RegisterClass;
}
LoadReg = createResultReg(RC);
- addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
- DL, TII.get(Opc), LoadReg), StubAM);
-
+ MachineInstrBuilder LoadMI =
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), LoadReg);
+ addFullAddress(LoadMI, StubAM);
+
+ // Ok, back to normal mode.
+ leaveLocalValueArea(SaveInsertPt);
+
// Prevent loading GV stub multiple times in same MBB.
LocalValueMap[V] = LoadReg;
}
/// X86SelectStore - Select and emit code to implement store instructions.
bool X86FastISel::X86SelectStore(const Instruction *I) {
- EVT VT;
+ MVT VT;
if (!isTypeLegal(I->getOperand(0)->getType(), VT, /*AllowI1=*/true))
return false;
if (Subtarget->isTargetWin64())
return false;
+ // Don't handle popping bytes on return for now.
+ if (FuncInfo.MF->getInfo<X86MachineFunctionInfo>()
+ ->getBytesToPopOnReturn() != 0)
+ return 0;
+
// fastcc with -tailcallopt is intended to provide a guaranteed
// tail call optimization. Fastisel doesn't know how to do that.
if (CC == CallingConv::Fast && GuaranteedTailCallOpt)
if (F.isVarArg())
return false;
- SmallVector<ISD::OutputArg, 4> Outs;
- GetReturnInfo(F.getReturnType(), F.getAttributes().getRetAttributes(),
- Outs, TLI);
+ if (Ret->getNumOperands() > 0) {
+ SmallVector<ISD::OutputArg, 4> Outs;
+ GetReturnInfo(F.getReturnType(), F.getAttributes().getRetAttributes(),
+ Outs, TLI);
- // Analyze operands of the call, assigning locations to each operand.
- SmallVector<CCValAssign, 16> ValLocs;
- CCState CCInfo(CC, F.isVarArg(), TM, ValLocs, I->getContext());
- CCInfo.AnalyzeReturn(Outs, CCAssignFnForCall(CC));
+ // Analyze operands of the call, assigning locations to each operand.
+ SmallVector<CCValAssign, 16> ValLocs;
+ CCState CCInfo(CC, F.isVarArg(), TM, ValLocs, I->getContext());
+ CCInfo.AnalyzeReturn(Outs, RetCC_X86);
+
+ const Value *RV = Ret->getOperand(0);
+ unsigned Reg = getRegForValue(RV);
+ if (Reg == 0)
+ return false;
- // Copy the return value into registers.
- for (unsigned i = 0, e = ValLocs.size(); i != e; ++i) {
- CCValAssign &VA = ValLocs[i];
+ // Only handle a single return value for now.
+ if (ValLocs.size() != 1)
+ return false;
+
+ CCValAssign &VA = ValLocs[0];
// Don't bother handling odd stuff for now.
if (VA.getLocInfo() != CCValAssign::Full)
return false;
+ // Only handle register returns for now.
if (!VA.isRegLoc())
return false;
+ // TODO: For now, don't try to handle cases where getLocInfo()
+ // says Full but the types don't match.
+ if (TLI.getValueType(RV->getType()) != VA.getValVT())
+ return false;
- const Value *RV = Ret->getOperand(VA.getValNo());
- unsigned Reg = getRegForValue(RV);
+ // The calling-convention tables for x87 returns don't tell
+ // the whole story.
+ if (VA.getLocReg() == X86::ST0 || VA.getLocReg() == X86::ST1)
+ return false;
- TargetRegisterClass* RC = TLI.getRegClassFor(VA.getValVT());
- bool Emitted = TII.copyRegToReg(*FuncInfo.MBB, FuncInfo.InsertPt,
- VA.getLocReg(), Reg, RC, RC, DL);
- assert(Emitted && "Failed to emit a copy instruction!"); Emitted=Emitted;
+ // Make the copy.
+ unsigned SrcReg = Reg + VA.getValNo();
+ unsigned DstReg = VA.getLocReg();
+ const TargetRegisterClass* SrcRC = MRI.getRegClass(SrcReg);
+ // Avoid a cross-class copy. This is very unlikely.
+ if (!SrcRC->contains(DstReg))
+ return false;
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ DstReg).addReg(SrcReg);
- MRI.addLiveOut(X86::XMM0);
+ // Mark the register as live out of the function.
+ MRI.addLiveOut(VA.getLocReg());
}
// Now emit the RET.
/// X86SelectLoad - Select and emit code to implement load instructions.
///
bool X86FastISel::X86SelectLoad(const Instruction *I) {
- EVT VT;
+ MVT VT;
if (!isTypeLegal(I->getType(), VT, /*AllowI1=*/true))
return false;
return false;
}
-static unsigned X86ChooseCmpOpcode(EVT VT) {
+static unsigned X86ChooseCmpOpcode(EVT VT, const X86Subtarget *Subtarget) {
switch (VT.getSimpleVT().SimpleTy) {
default: return 0;
case MVT::i8: return X86::CMP8rr;
case MVT::i16: return X86::CMP16rr;
case MVT::i32: return X86::CMP32rr;
case MVT::i64: return X86::CMP64rr;
- case MVT::f32: return X86::UCOMISSrr;
- case MVT::f64: return X86::UCOMISDrr;
+ case MVT::f32: return Subtarget->hasSSE1() ? X86::UCOMISSrr : 0;
+ case MVT::f64: return Subtarget->hasSSE2() ? X86::UCOMISDrr : 0;
}
}
}
}
- unsigned CompareOpc = X86ChooseCmpOpcode(VT);
+ unsigned CompareOpc = X86ChooseCmpOpcode(VT, Subtarget);
if (CompareOpc == 0) return false;
unsigned Op1Reg = getRegForValue(Op1);
bool X86FastISel::X86SelectCmp(const Instruction *I) {
const CmpInst *CI = cast<CmpInst>(I);
- EVT VT;
+ MVT VT;
if (!isTypeLegal(I->getOperand(0)->getType(), VT))
return false;
MachineBasicBlock *TrueMBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
MachineBasicBlock *FalseMBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
- // Fold the common case of a conditional branch with a comparison.
+ // Fold the common case of a conditional branch with a comparison
+ // in the same block (values defined on other blocks may not have
+ // initialized registers).
if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
- if (CI->hasOneUse()) {
+ if (CI->hasOneUse() && CI->getParent() == I->getParent()) {
EVT VT = TLI.getValueType(CI->getOperand(0)->getType());
// Try to take advantage of fallthrough opportunities.
const MachineInstr &MI = *RI;
if (MI.definesRegister(Reg)) {
- unsigned Src, Dst, SrcSR, DstSR;
-
- if (getInstrInfo()->isMoveInstr(MI, Src, Dst, SrcSR, DstSR)) {
- Reg = Src;
+ if (MI.isCopy()) {
+ Reg = MI.getOperand(1).getReg();
continue;
}
return false;
}
- EVT VT = TLI.getValueType(I->getType(), /*HandleUnknown=*/true);
- if (VT == MVT::Other || !isTypeLegal(I->getType(), VT))
+ MVT VT;
+ if (!isTypeLegal(I->getType(), VT))
return false;
unsigned Op0Reg = getRegForValue(I->getOperand(0));
unsigned Op1Reg = getRegForValue(I->getOperand(1));
if (Op1Reg == 0) return false;
- TII.copyRegToReg(*FuncInfo.MBB, FuncInfo.InsertPt,
- CReg, Op1Reg, RC, RC, DL);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ CReg).addReg(Op1Reg);
// The shift instruction uses X86::CL. If we defined a super-register
- // of X86::CL, emit an EXTRACT_SUBREG to precisely describe what
- // we're doing here.
+ // of X86::CL, emit a subreg KILL to precisely describe what we're doing here.
if (CReg != X86::CL)
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
- TII.get(TargetOpcode::EXTRACT_SUBREG), X86::CL)
- .addReg(CReg).addImm(X86::sub_8bit);
+ TII.get(TargetOpcode::KILL), X86::CL)
+ .addReg(CReg, RegState::Kill);
unsigned ResultReg = createResultReg(RC);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(OpReg), ResultReg)
}
bool X86FastISel::X86SelectSelect(const Instruction *I) {
- EVT VT = TLI.getValueType(I->getType(), /*HandleUnknown=*/true);
- if (VT == MVT::Other || !isTypeLegal(I->getType(), VT))
+ MVT VT;
+ if (!isTypeLegal(I->getType(), VT))
return false;
+ // We only use cmov here, if we don't have a cmov instruction bail.
+ if (!Subtarget->hasCMov()) return false;
+
unsigned Opc = 0;
const TargetRegisterClass *RC = NULL;
- if (VT.getSimpleVT() == MVT::i16) {
+ if (VT == MVT::i16) {
Opc = X86::CMOVE16rr;
RC = &X86::GR16RegClass;
- } else if (VT.getSimpleVT() == MVT::i32) {
+ } else if (VT == MVT::i32) {
Opc = X86::CMOVE32rr;
RC = &X86::GR32RegClass;
- } else if (VT.getSimpleVT() == MVT::i64) {
+ } else if (VT == MVT::i64) {
Opc = X86::CMOVE64rr;
RC = &X86::GR64RegClass;
} else {
return false;
// First issue a copy to GR16_ABCD or GR32_ABCD.
- unsigned CopyOpc = (SrcVT == MVT::i16) ? X86::MOV16rr : X86::MOV32rr;
const TargetRegisterClass *CopyRC = (SrcVT == MVT::i16)
? X86::GR16_ABCDRegisterClass : X86::GR32_ABCDRegisterClass;
unsigned CopyReg = createResultReg(CopyRC);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CopyOpc), CopyReg)
- .addReg(InputReg);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ CopyReg).addReg(InputReg);
// Then issue an extract_subreg.
unsigned ResultReg = FastEmitInst_extractsubreg(MVT::i8,
assert(CI && "Non-constant type in Intrinsic::objectsize?");
- EVT VT;
+ MVT VT;
if (!isTypeLegal(Ty, VT))
return false;
const Type *RetTy =
cast<StructType>(Callee->getReturnType())->getTypeAtIndex(unsigned(0));
- EVT VT;
+ MVT VT;
if (!isTypeLegal(RetTy, VT))
return false;
// Handle *simple* calls for now.
const Type *RetTy = CS.getType();
- EVT RetVT;
+ MVT RetVT;
if (RetTy->isVoidTy())
RetVT = MVT::isVoid;
else if (!isTypeLegal(RetTy, RetVT, true))
// Deal with call operands first.
SmallVector<const Value *, 8> ArgVals;
SmallVector<unsigned, 8> Args;
- SmallVector<EVT, 8> ArgVTs;
+ SmallVector<MVT, 8> ArgVTs;
SmallVector<ISD::ArgFlagsTy, 8> ArgFlags;
Args.reserve(CS.arg_size());
ArgVals.reserve(CS.arg_size());
return false;
const Type *ArgTy = (*i)->getType();
- EVT ArgVT;
+ MVT ArgVT;
if (!isTypeLegal(ArgTy, ArgVT))
return false;
unsigned OriginalAlignment = TD.getABITypeAlignment(ArgTy);
CCInfo.AllocateStack(32, 8);
}
- CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CCAssignFnForCall(CC));
+ CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CC_X86);
// Get a count of how many bytes are to be pushed on the stack.
unsigned NumBytes = CCInfo.getNextStackOffset();
break;
}
case CCValAssign::AExt: {
+ // We don't handle MMX parameters yet.
+ if (VA.getLocVT().isVector() && VA.getLocVT().getSizeInBits() == 128)
+ return false;
bool Emitted = X86FastEmitExtend(ISD::ANY_EXTEND, VA.getLocVT(),
Arg, ArgVT, Arg);
if (!Emitted)
break;
}
case CCValAssign::BCvt: {
- unsigned BC = FastEmit_r(ArgVT.getSimpleVT(), VA.getLocVT().getSimpleVT(),
+ unsigned BC = FastEmit_r(ArgVT.getSimpleVT(), VA.getLocVT(),
ISD::BIT_CONVERT, Arg, /*TODO: Kill=*/false);
assert(BC != 0 && "Failed to emit a bitcast!");
Arg = BC;
}
if (VA.isRegLoc()) {
- TargetRegisterClass* RC = TLI.getRegClassFor(ArgVT);
- bool Emitted = TII.copyRegToReg(*FuncInfo.MBB, FuncInfo.InsertPt,
- VA.getLocReg(), Arg, RC, RC, DL);
- assert(Emitted && "Failed to emit a copy instruction!"); Emitted=Emitted;
- Emitted = true;
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ VA.getLocReg()).addReg(Arg);
RegArgs.push_back(VA.getLocReg());
} else {
unsigned LocMemOffset = VA.getLocMemOffset();
// ELF / PIC requires GOT in the EBX register before function calls via PLT
// GOT pointer.
if (Subtarget->isPICStyleGOT()) {
- TargetRegisterClass *RC = X86::GR32RegisterClass;
unsigned Base = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
- bool Emitted = TII.copyRegToReg(*FuncInfo.MBB, FuncInfo.InsertPt,
- X86::EBX, Base, RC, RC, DL);
- assert(Emitted && "Failed to emit a copy instruction!"); Emitted=Emitted;
- Emitted = true;
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ X86::EBX).addReg(Base);
}
// Issue the call.
MachineInstrBuilder MIB;
if (CalleeOp) {
// Register-indirect call.
- unsigned CallOpc = Subtarget->is64Bit() ? X86::CALL64r : X86::CALL32r;
+ unsigned CallOpc;
+ if (Subtarget->isTargetWin64())
+ CallOpc = X86::WINCALL64r;
+ else if (Subtarget->is64Bit())
+ CallOpc = X86::CALL64r;
+ else
+ CallOpc = X86::CALL32r;
MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CallOpc))
.addReg(CalleeOp);
} else {
// Direct call.
assert(GV && "Not a direct call");
- unsigned CallOpc =
- Subtarget->is64Bit() ? X86::CALL64pcrel32 : X86::CALLpcrel32;
+ unsigned CallOpc;
+ if (Subtarget->isTargetWin64())
+ CallOpc = X86::WINCALL64pcrel32;
+ else if (Subtarget->is64Bit())
+ CallOpc = X86::CALL64pcrel32;
+ else
+ CallOpc = X86::CALLpcrel32;
// See if we need any target-specific flags on the GV operand.
unsigned char OpFlags = 0;
// Now handle call return value (if any).
SmallVector<unsigned, 4> UsedRegs;
- if (RetVT.getSimpleVT().SimpleTy != MVT::isVoid) {
+ if (RetVT != MVT::isVoid) {
SmallVector<CCValAssign, 16> RVLocs;
CCState CCInfo(CC, false, TM, RVLocs, I->getParent()->getContext());
CCInfo.AnalyzeCallResult(RetVT, RetCC_X86);
assert(RVLocs.size() == 1 && "Can't handle multi-value calls!");
EVT CopyVT = RVLocs[0].getValVT();
TargetRegisterClass* DstRC = TLI.getRegClassFor(CopyVT);
- TargetRegisterClass *SrcRC = DstRC;
// If this is a call to a function that returns an fp value on the x87 fp
// stack, but where we prefer to use the value in xmm registers, copy it
RVLocs[0].getLocReg() == X86::ST1) &&
isScalarFPTypeInSSEReg(RVLocs[0].getValVT())) {
CopyVT = MVT::f80;
- SrcRC = X86::RSTRegisterClass;
DstRC = X86::RFP80RegisterClass;
}
unsigned ResultReg = createResultReg(DstRC);
- bool Emitted = TII.copyRegToReg(*FuncInfo.MBB, FuncInfo.InsertPt, ResultReg,
- RVLocs[0].getLocReg(), DstRC, SrcRC, DL);
- assert(Emitted && "Failed to emit a copy instruction!"); Emitted=Emitted;
- Emitted = true;
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ ResultReg).addReg(RVLocs[0].getLocReg());
UsedRegs.push_back(RVLocs[0].getLocReg());
if (CopyVT != RVLocs[0].getValVT()) {
}
unsigned X86FastISel::TargetMaterializeConstant(const Constant *C) {
- EVT VT;
+ MVT VT;
if (!isTypeLegal(C->getType(), VT))
return false;
// Get opcode and regclass of the output for the given load instruction.
unsigned Opc = 0;
const TargetRegisterClass *RC = NULL;
- switch (VT.getSimpleVT().SimpleTy) {
+ switch (VT.SimpleTy) {
default: return false;
case MVT::i8:
Opc = X86::MOV8rm;
else
Opc = X86::LEA64r;
unsigned ResultReg = createResultReg(RC);
- addLeaAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
- TII.get(Opc), ResultReg), AM);
+ addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(Opc), ResultReg), AM);
return ResultReg;
}
return 0;
unsigned Opc = Subtarget->is64Bit() ? X86::LEA64r : X86::LEA32r;
TargetRegisterClass* RC = TLI.getRegClassFor(TLI.getPointerTy());
unsigned ResultReg = createResultReg(RC);
- addLeaAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
- TII.get(Opc), ResultReg), AM);
+ addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(Opc), ResultReg), AM);
return ResultReg;
}
+/// TryToFoldLoad - The specified machine instr operand is a vreg, and that
+/// vreg is being provided by the specified load instruction. If possible,
+/// try to fold the load as an operand to the instruction, returning true if
+/// possible.
+bool X86FastISel::TryToFoldLoad(MachineInstr *MI, unsigned OpNo,
+ const LoadInst *LI) {
+ X86AddressMode AM;
+ if (!X86SelectAddress(LI->getOperand(0), AM))
+ return false;
+
+ X86InstrInfo &XII = (X86InstrInfo&)TII;
+
+ unsigned Size = TD.getTypeAllocSize(LI->getType());
+ unsigned Alignment = LI->getAlignment();
+
+ SmallVector<MachineOperand, 8> AddrOps;
+ AM.getFullAddress(AddrOps);
+
+ MachineInstr *Result =
+ XII.foldMemoryOperandImpl(*FuncInfo.MF, MI, OpNo, AddrOps, Size, Alignment);
+ if (Result == 0) return false;
+
+ MI->getParent()->insert(MI, Result);
+ MI->eraseFromParent();
+ return true;
+}
+
+
namespace llvm {
llvm::FastISel *X86::createFastISel(FunctionLoweringInfo &funcInfo) {
return new X86FastISel(funcInfo);