//===----------------------------------------------------------------------===//
#include "X86.h"
+#include "X86ISelLowering.h"
#include "X86InstrBuilder.h"
#include "X86RegisterInfo.h"
#include "X86Subtarget.h"
#include "X86TargetMachine.h"
-#include "llvm/CallingConv.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/Instructions.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/Operator.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/FastISel.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Operator.h"
#include "llvm/Support/CallSite.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/GetElementPtrTypeIterator.h"
/// make the right decision when generating code for different targets.
const X86Subtarget *Subtarget;
- /// StackPtr - Register used as the stack pointer.
+ /// RegInfo - X86 register info.
///
- unsigned StackPtr;
+ const X86RegisterInfo *RegInfo;
/// X86ScalarSSEf32, X86ScalarSSEf64 - Select between SSE or x87
/// floating point ops.
bool X86ScalarSSEf32;
public:
- explicit X86FastISel(FunctionLoweringInfo &funcInfo) : FastISel(funcInfo) {
+ explicit X86FastISel(FunctionLoweringInfo &funcInfo,
+ const TargetLibraryInfo *libInfo)
+ : FastISel(funcInfo, libInfo) {
Subtarget = &TM.getSubtarget<X86Subtarget>();
- StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP;
X86ScalarSSEf64 = Subtarget->hasSSE2();
X86ScalarSSEf32 = Subtarget->hasSSE1();
+ RegInfo = static_cast<const X86RegisterInfo*>(TM.getRegisterInfo());
}
virtual bool TargetSelectInstruction(const Instruction *I);
bool X86VisitIntrinsicCall(const IntrinsicInst &I);
bool X86SelectCall(const Instruction *I);
+ bool DoSelectCall(const Instruction *I, const char *MemIntName);
+
const X86InstrInfo *getInstrInfo() const {
return getTargetMachine()->getInstrInfo();
}
(VT == MVT::f32 && X86ScalarSSEf32); // f32 is when SSE1
}
- bool isTypeLegal(const Type *Ty, MVT &VT, bool AllowI1 = false);
+ bool isTypeLegal(Type *Ty, MVT &VT, bool AllowI1 = false);
+
+ bool IsMemcpySmall(uint64_t Len);
bool TryEmitSmallMemcpy(X86AddressMode DestAM,
X86AddressMode SrcAM, uint64_t Len);
} // end anonymous namespace.
-bool X86FastISel::isTypeLegal(const Type *Ty, MVT &VT, bool AllowI1) {
+bool X86FastISel::isTypeLegal(Type *Ty, MVT &VT, bool AllowI1) {
EVT evt = TLI.getValueType(Ty, /*HandleUnknown=*/true);
if (evt == MVT::Other || !evt.isSimple())
// Unhandled type. Halt "fast" selection and bail.
// For now, require SSE/SSE2 for performing floating-point operations,
// since x87 requires additional work.
if (VT == MVT::f64 && !X86ScalarSSEf64)
- return false;
+ return false;
if (VT == MVT::f32 && !X86ScalarSSEf32)
- return false;
+ return false;
// Similarly, no f80 support yet.
if (VT == MVT::f80)
return false;
case MVT::i1:
case MVT::i8:
Opc = X86::MOV8rm;
- RC = X86::GR8RegisterClass;
+ RC = &X86::GR8RegClass;
break;
case MVT::i16:
Opc = X86::MOV16rm;
- RC = X86::GR16RegisterClass;
+ RC = &X86::GR16RegClass;
break;
case MVT::i32:
Opc = X86::MOV32rm;
- RC = X86::GR32RegisterClass;
+ RC = &X86::GR32RegClass;
break;
case MVT::i64:
// Must be in x86-64 mode.
Opc = X86::MOV64rm;
- RC = X86::GR64RegisterClass;
+ RC = &X86::GR64RegClass;
break;
case MVT::f32:
- if (Subtarget->hasSSE1()) {
- Opc = X86::MOVSSrm;
- RC = X86::FR32RegisterClass;
+ if (X86ScalarSSEf32) {
+ Opc = Subtarget->hasAVX() ? X86::VMOVSSrm : X86::MOVSSrm;
+ RC = &X86::FR32RegClass;
} else {
Opc = X86::LD_Fp32m;
- RC = X86::RFP32RegisterClass;
+ RC = &X86::RFP32RegClass;
}
break;
case MVT::f64:
- if (Subtarget->hasSSE2()) {
- Opc = X86::MOVSDrm;
- RC = X86::FR64RegisterClass;
+ if (X86ScalarSSEf64) {
+ Opc = Subtarget->hasAVX() ? X86::VMOVSDrm : X86::MOVSDrm;
+ RC = &X86::FR64RegClass;
} else {
Opc = X86::LD_Fp64m;
- RC = X86::RFP64RegisterClass;
+ RC = &X86::RFP64RegClass;
}
break;
case MVT::f80:
default: return false;
case MVT::i1: {
// Mask out all but lowest bit.
- unsigned AndResult = createResultReg(X86::GR8RegisterClass);
+ unsigned AndResult = createResultReg(&X86::GR8RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(X86::AND8ri), AndResult).addReg(Val).addImm(1);
Val = AndResult;
case MVT::i32: Opc = X86::MOV32mr; break;
case MVT::i64: Opc = X86::MOV64mr; break; // Must be in x86-64 mode.
case MVT::f32:
- Opc = Subtarget->hasSSE1() ? X86::MOVSSmr : X86::ST_Fp32m;
+ Opc = X86ScalarSSEf32 ?
+ (Subtarget->hasAVX() ? X86::VMOVSSmr : X86::MOVSSmr) : X86::ST_Fp32m;
break;
case MVT::f64:
- Opc = Subtarget->hasSSE2() ? X86::MOVSDmr : X86::ST_Fp64m;
+ Opc = X86ScalarSSEf64 ?
+ (Subtarget->hasAVX() ? X86::VMOVSDmr : X86::MOVSDmr) : X86::ST_Fp64m;
+ break;
+ case MVT::v4f32:
+ Opc = X86::MOVAPSmr;
+ break;
+ case MVT::v2f64:
+ Opc = X86::MOVAPDmr;
+ break;
+ case MVT::v4i32:
+ case MVT::v2i64:
+ case MVT::v8i16:
+ case MVT::v16i8:
+ Opc = X86::MOVDQAmr;
break;
}
case MVT::i32: Opc = X86::MOV32mi; break;
case MVT::i64:
// Must be a 32-bit sign extended value.
- if ((int)CI->getSExtValue() == CI->getSExtValue())
+ if (isInt<32>(CI->getSExtValue()))
Opc = X86::MOV64mi32;
break;
}
unsigned &ResultReg) {
unsigned RR = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Opc,
Src, /*TODO: Kill=*/false);
-
- if (RR != 0) {
- ResultReg = RR;
- return true;
- } else
+ if (RR == 0)
return false;
+
+ ResultReg = RR;
+ return true;
}
/// X86SelectAddress - Attempt to fill in an address from the given value.
U = C;
}
- if (const PointerType *Ty = dyn_cast<PointerType>(V->getType()))
+ if (PointerType *Ty = dyn_cast<PointerType>(V->getType()))
if (Ty->getAddressSpace() > 255)
// Fast instruction selection doesn't support the special
// address spaces.
for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end();
i != e; ++i, ++GTI) {
const Value *Op = *i;
- if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
+ if (StructType *STy = dyn_cast<StructType>(*GTI)) {
const StructLayout *SL = TD.getStructLayout(STy);
Disp += SL->getElementOffset(cast<ConstantInt>(Op)->getZExtValue());
continue;
}
-
+
// A array/variable index is always of the form i*S where S is the
// constant scale size. See if we can push the scale into immediates.
uint64_t S = TD.getTypeAllocSize(GTI.getIndexedType());
// Handle constant address.
if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
- // Can't handle alternate code models or TLS yet.
+ // Can't handle alternate code models yet.
if (TM.getCodeModel() != CodeModel::Small)
return false;
+ // Can't handle TLS yet.
if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
if (GVar->isThreadLocal())
return false;
-
+
+ // Can't handle TLS yet, part 2 (this is slightly crazy, but this is how
+ // it works...).
+ if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
+ if (const GlobalVariable *GVar =
+ dyn_cast_or_null<GlobalVariable>(GA->resolveAliasedGlobal(false)))
+ if (GVar->isThreadLocal())
+ return false;
+
// RIP-relative addresses can't have additional register operands, so if
// we've already folded stuff into the addressing mode, just force the
// global value into its own register, which we can use as the basereg.
if (TLI.getPointerTy() == MVT::i64) {
Opc = X86::MOV64rm;
- RC = X86::GR64RegisterClass;
+ RC = &X86::GR64RegClass;
if (Subtarget->isPICStyleRIPRel())
StubAM.Base.Reg = X86::RIP;
} else {
Opc = X86::MOV32rm;
- RC = X86::GR32RegisterClass;
+ RC = &X86::GR32RegClass;
}
LoadReg = createResultReg(RC);
/// X86SelectStore - Select and emit code to implement store instructions.
bool X86FastISel::X86SelectStore(const Instruction *I) {
+ // Atomic stores need special handling.
+ const StoreInst *S = cast<StoreInst>(I);
+
+ if (S->isAtomic())
+ return false;
+
+ unsigned SABIAlignment =
+ TD.getABITypeAlignment(S->getValueOperand()->getType());
+ if (S->getAlignment() != 0 && S->getAlignment() < SABIAlignment)
+ return false;
+
MVT VT;
if (!isTypeLegal(I->getOperand(0)->getType(), VT, /*AllowI1=*/true))
return false;
bool X86FastISel::X86SelectRet(const Instruction *I) {
const ReturnInst *Ret = cast<ReturnInst>(I);
const Function &F = *I->getParent()->getParent();
+ const X86MachineFunctionInfo *X86MFInfo =
+ FuncInfo.MF->getInfo<X86MachineFunctionInfo>();
if (!FuncInfo.CanLowerReturn)
return false;
return false;
// Don't handle popping bytes on return for now.
- if (FuncInfo.MF->getInfo<X86MachineFunctionInfo>()
- ->getBytesToPopOnReturn() != 0)
+ if (X86MFInfo->getBytesToPopOnReturn() != 0)
return 0;
// fastcc with -tailcallopt is intended to provide a guaranteed
// tail call optimization. Fastisel doesn't know how to do that.
- if (CC == CallingConv::Fast && GuaranteedTailCallOpt)
+ if (CC == CallingConv::Fast && TM.Options.GuaranteedTailCallOpt)
return false;
// Let SDISel handle vararg functions.
if (F.isVarArg())
return false;
+ // Build a list of return value registers.
+ SmallVector<unsigned, 4> RetRegs;
+
if (Ret->getNumOperands() > 0) {
SmallVector<ISD::OutputArg, 4> Outs;
- GetReturnInfo(F.getReturnType(), F.getAttributes().getRetAttributes(),
- Outs, TLI);
+ GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI);
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ValLocs;
- CCState CCInfo(CC, F.isVarArg(), TM, ValLocs, I->getContext());
+ CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, TM, ValLocs,
+ I->getContext());
CCInfo.AnalyzeReturn(Outs, RetCC_X86);
const Value *RV = Ret->getOperand(0);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
DstReg).addReg(SrcReg);
- // Mark the register as live out of the function.
- MRI.addLiveOut(VA.getLocReg());
+ // Add register to return instruction.
+ RetRegs.push_back(VA.getLocReg());
+ }
+
+ // The x86-64 ABI for returning structs by value requires that we copy
+ // the sret argument into %rax for the return. We saved the argument into
+ // a virtual register in the entry block, so now we copy the value out
+ // and into %rax.
+ if (Subtarget->is64Bit() && F.hasStructRetAttr()) {
+ unsigned Reg = X86MFInfo->getSRetReturnReg();
+ assert(Reg &&
+ "SRetReturnReg should have been set in LowerFormalArguments()!");
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ X86::RAX).addReg(Reg);
+ RetRegs.push_back(X86::RAX);
}
// Now emit the RET.
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::RET));
+ MachineInstrBuilder MIB =
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::RET));
+ for (unsigned i = 0, e = RetRegs.size(); i != e; ++i)
+ MIB.addReg(RetRegs[i], RegState::Implicit);
return true;
}
/// X86SelectLoad - Select and emit code to implement load instructions.
///
bool X86FastISel::X86SelectLoad(const Instruction *I) {
+ // Atomic loads need special handling.
+ if (cast<LoadInst>(I)->isAtomic())
+ return false;
+
MVT VT;
if (!isTypeLegal(I->getType(), VT, /*AllowI1=*/true))
return false;
}
static unsigned X86ChooseCmpOpcode(EVT VT, const X86Subtarget *Subtarget) {
+ bool HasAVX = Subtarget->hasAVX();
+ bool X86ScalarSSEf32 = Subtarget->hasSSE1();
+ bool X86ScalarSSEf64 = Subtarget->hasSSE2();
+
switch (VT.getSimpleVT().SimpleTy) {
default: return 0;
case MVT::i8: return X86::CMP8rr;
case MVT::i16: return X86::CMP16rr;
case MVT::i32: return X86::CMP32rr;
case MVT::i64: return X86::CMP64rr;
- case MVT::f32: return Subtarget->hasSSE1() ? X86::UCOMISSrr : 0;
- case MVT::f64: return Subtarget->hasSSE2() ? X86::UCOMISDrr : 0;
+ case MVT::f32:
+ return X86ScalarSSEf32 ? (HasAVX ? X86::VUCOMISSrr : X86::UCOMISSrr) : 0;
+ case MVT::f64:
+ return X86ScalarSSEf64 ? (HasAVX ? X86::VUCOMISDrr : X86::UCOMISDrr) : 0;
}
}
bool X86FastISel::X86SelectZExt(const Instruction *I) {
// Handle zero-extension from i1 to i8, which is common.
- if (I->getType()->isIntegerTy(8) &&
- I->getOperand(0)->getType()->isIntegerTy(1)) {
- unsigned ResultReg = getRegForValue(I->getOperand(0));
- if (ResultReg == 0) return false;
- // Set the high bits to zero.
- ResultReg = FastEmitZExtFromI1(MVT::i8, ResultReg, /*TODO: Kill=*/false);
- if (ResultReg == 0) return false;
- UpdateValueMap(I, ResultReg);
- return true;
+ if (!I->getOperand(0)->getType()->isIntegerTy(1))
+ return false;
+
+ EVT DstVT = TLI.getValueType(I->getType());
+ if (!TLI.isTypeLegal(DstVT))
+ return false;
+
+ unsigned ResultReg = getRegForValue(I->getOperand(0));
+ if (ResultReg == 0)
+ return false;
+
+ // Set the high bits to zero.
+ ResultReg = FastEmitZExtFromI1(MVT::i8, ResultReg, /*TODO: Kill=*/false);
+ if (ResultReg == 0)
+ return false;
+
+ if (DstVT != MVT::i8) {
+ ResultReg = FastEmit_r(MVT::i8, DstVT.getSimpleVT(), ISD::ZERO_EXTEND,
+ ResultReg, /*Kill=*/true);
+ if (ResultReg == 0)
+ return false;
}
- return false;
+ UpdateValueMap(I, ResultReg);
+ return true;
}
if (OpReg == 0) return false;
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TestOpc))
.addReg(OpReg).addImm(1);
-
+
unsigned JmpOpc = X86::JNE_4;
if (FuncInfo.MBB->isLayoutSuccessor(TrueMBB)) {
std::swap(TrueMBB, FalseMBB);
JmpOpc = X86::JE_4;
}
-
+
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(JmpOpc))
.addMBB(TrueMBB);
FastEmitBranch(FalseMBB, DL);
bool X86FastISel::X86SelectFPExt(const Instruction *I) {
// fpext from float to double.
- if (Subtarget->hasSSE2() &&
+ if (X86ScalarSSEf64 &&
I->getType()->isDoubleTy()) {
const Value *V = I->getOperand(0);
if (V->getType()->isFloatTy()) {
unsigned OpReg = getRegForValue(V);
if (OpReg == 0) return false;
- unsigned ResultReg = createResultReg(X86::FR64RegisterClass);
+ unsigned ResultReg = createResultReg(&X86::FR64RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(X86::CVTSS2SDrr), ResultReg)
.addReg(OpReg);
}
bool X86FastISel::X86SelectFPTrunc(const Instruction *I) {
- if (Subtarget->hasSSE2()) {
+ if (X86ScalarSSEf64) {
if (I->getType()->isFloatTy()) {
const Value *V = I->getOperand(0);
if (V->getType()->isDoubleTy()) {
unsigned OpReg = getRegForValue(V);
if (OpReg == 0) return false;
- unsigned ResultReg = createResultReg(X86::FR32RegisterClass);
+ unsigned ResultReg = createResultReg(&X86::FR32RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(X86::CVTSD2SSrr), ResultReg)
.addReg(OpReg);
}
bool X86FastISel::X86SelectTrunc(const Instruction *I) {
- if (Subtarget->is64Bit())
- // All other cases should be handled by the tblgen generated code.
- return false;
EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
EVT DstVT = TLI.getValueType(I->getType());
- // This code only handles truncation to byte right now.
+ // This code only handles truncation to byte.
if (DstVT != MVT::i8 && DstVT != MVT::i1)
- // All other cases should be handled by the tblgen generated code.
return false;
- if (SrcVT != MVT::i16 && SrcVT != MVT::i32)
- // All other cases should be handled by the tblgen generated code.
+ if (!TLI.isTypeLegal(SrcVT))
return false;
unsigned InputReg = getRegForValue(I->getOperand(0));
// Unhandled operand. Halt "fast" selection and bail.
return false;
- // First issue a copy to GR16_ABCD or GR32_ABCD.
- const TargetRegisterClass *CopyRC = (SrcVT == MVT::i16)
- ? X86::GR16_ABCDRegisterClass : X86::GR32_ABCDRegisterClass;
- unsigned CopyReg = createResultReg(CopyRC);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
- CopyReg).addReg(InputReg);
+ if (SrcVT == MVT::i8) {
+ // Truncate from i8 to i1; no code needed.
+ UpdateValueMap(I, InputReg);
+ return true;
+ }
- // Then issue an extract_subreg.
+ if (!Subtarget->is64Bit()) {
+ // If we're on x86-32; we can't extract an i8 from a general register.
+ // First issue a copy to GR16_ABCD or GR32_ABCD.
+ const TargetRegisterClass *CopyRC = (SrcVT == MVT::i16) ?
+ (const TargetRegisterClass*)&X86::GR16_ABCDRegClass :
+ (const TargetRegisterClass*)&X86::GR32_ABCDRegClass;
+ unsigned CopyReg = createResultReg(CopyRC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ CopyReg).addReg(InputReg);
+ InputReg = CopyReg;
+ }
+
+ // Issue an extract_subreg.
unsigned ResultReg = FastEmitInst_extractsubreg(MVT::i8,
- CopyReg, /*Kill=*/true,
+ InputReg, /*Kill=*/true,
X86::sub_8bit);
if (!ResultReg)
return false;
return true;
}
+bool X86FastISel::IsMemcpySmall(uint64_t Len) {
+ return Len <= (Subtarget->is64Bit() ? 32 : 16);
+}
+
bool X86FastISel::TryEmitSmallMemcpy(X86AddressMode DestAM,
X86AddressMode SrcAM, uint64_t Len) {
+
// Make sure we don't bloat code by inlining very large memcpy's.
- bool i64Legal = TLI.isTypeLegal(MVT::i64);
- if (Len > (i64Legal ? 32 : 16)) return false;
+ if (!IsMemcpySmall(Len))
+ return false;
+
+ bool i64Legal = Subtarget->is64Bit();
// We don't care about alignment here since we just emit integer accesses.
while (Len) {
case Intrinsic::memcpy: {
const MemCpyInst &MCI = cast<MemCpyInst>(I);
// Don't handle volatile or variable length memcpys.
- if (MCI.isVolatile() || !isa<ConstantInt>(MCI.getLength()))
+ if (MCI.isVolatile())
+ return false;
+
+ if (isa<ConstantInt>(MCI.getLength())) {
+ // Small memcpy's are common enough that we want to do them
+ // without a call if possible.
+ uint64_t Len = cast<ConstantInt>(MCI.getLength())->getZExtValue();
+ if (IsMemcpySmall(Len)) {
+ X86AddressMode DestAM, SrcAM;
+ if (!X86SelectAddress(MCI.getRawDest(), DestAM) ||
+ !X86SelectAddress(MCI.getRawSource(), SrcAM))
+ return false;
+ TryEmitSmallMemcpy(DestAM, SrcAM, Len);
+ return true;
+ }
+ }
+
+ unsigned SizeWidth = Subtarget->is64Bit() ? 64 : 32;
+ if (!MCI.getLength()->getType()->isIntegerTy(SizeWidth))
+ return false;
+
+ if (MCI.getSourceAddressSpace() > 255 || MCI.getDestAddressSpace() > 255)
+ return false;
+
+ return DoSelectCall(&I, "memcpy");
+ }
+ case Intrinsic::memset: {
+ const MemSetInst &MSI = cast<MemSetInst>(I);
+
+ if (MSI.isVolatile())
+ return false;
+
+ unsigned SizeWidth = Subtarget->is64Bit() ? 64 : 32;
+ if (!MSI.getLength()->getType()->isIntegerTy(SizeWidth))
return false;
- uint64_t Len = cast<ConstantInt>(MCI.getLength())->getZExtValue();
-
- // Get the address of the dest and source addresses.
- X86AddressMode DestAM, SrcAM;
- if (!X86SelectAddress(MCI.getRawDest(), DestAM) ||
- !X86SelectAddress(MCI.getRawSource(), SrcAM))
+ if (MSI.getDestAddressSpace() > 255)
return false;
- return TryEmitSmallMemcpy(DestAM, SrcAM, Len);
+ return DoSelectCall(&I, "memset");
}
-
case Intrinsic::stackprotector: {
- // Emit code inline code to store the stack guard onto the stack.
+ // Emit code to store the stack guard onto the stack.
EVT PtrTy = TLI.getPointerTy();
const Value *Op1 = I.getArgOperand(0); // The guard's value.
assert(DI->getAddress() && "Null address should be checked earlier!");
if (!X86SelectAddress(DI->getAddress(), AM))
return false;
- const TargetInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE);
+ const MCInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE);
// FIXME may need to add RegState::Debug to any registers produced,
// although ESP/EBP should be the only ones at the moment.
addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II), AM).
case Intrinsic::sadd_with_overflow:
case Intrinsic::uadd_with_overflow: {
// FIXME: Should fold immediates.
-
+
// Replace "add with overflow" intrinsics with an "add" instruction followed
// by a seto/setc instruction.
const Function *Callee = I.getCalledFunction();
- const Type *RetTy =
+ Type *RetTy =
cast<StructType>(Callee->getReturnType())->getTypeAtIndex(unsigned(0));
MVT VT;
return false;
// The call to CreateRegs builds two sequential registers, to store the
- // both the the returned values.
+ // both the returned values.
unsigned ResultReg = FuncInfo.CreateRegs(I.getType());
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(OpC), ResultReg)
.addReg(Reg1).addReg(Reg2);
if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI))
return X86VisitIntrinsicCall(*II);
+ // Allow SelectionDAG isel to handle tail calls.
+ if (cast<CallInst>(I)->isTailCall())
+ return false;
+
+ return DoSelectCall(I, 0);
+}
+
+static unsigned computeBytesPoppedByCallee(const X86Subtarget &Subtarget,
+ const ImmutableCallSite &CS) {
+ if (Subtarget.is64Bit())
+ return 0;
+ if (Subtarget.isTargetWindows())
+ return 0;
+ CallingConv::ID CC = CS.getCallingConv();
+ if (CC == CallingConv::Fast || CC == CallingConv::GHC)
+ return 0;
+ if (!CS.paramHasAttr(1, Attribute::StructRet))
+ return 0;
+ if (CS.paramHasAttr(1, Attribute::InReg))
+ return 0;
+ return 4;
+}
+
+// Select either a call, or an llvm.memcpy/memmove/memset intrinsic
+bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) {
+ const CallInst *CI = cast<CallInst>(I);
+ const Value *Callee = CI->getCalledValue();
+
// Handle only C and fastcc calling conventions for now.
ImmutableCallSite CS(CI);
CallingConv::ID CC = CS.getCallingConv();
// fastcc with -tailcallopt is intended to provide a guaranteed
// tail call optimization. Fastisel doesn't know how to do that.
- if (CC == CallingConv::Fast && GuaranteedTailCallOpt)
+ if (CC == CallingConv::Fast && TM.Options.GuaranteedTailCallOpt)
return false;
- const PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
- const FunctionType *FTy = cast<FunctionType>(PT->getElementType());
+ PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
+ FunctionType *FTy = cast<FunctionType>(PT->getElementType());
bool isVarArg = FTy->isVarArg();
// Don't know how to handle Win64 varargs yet. Nothing special needed for
return false;
// Fast-isel doesn't know about callee-pop yet.
- if (Subtarget->IsCalleePop(isVarArg, CC))
+ if (X86::isCalleePop(CC, Subtarget->is64Bit(), isVarArg,
+ TM.Options.GuaranteedTailCallOpt))
return false;
// Check whether the function can return without sret-demotion.
SmallVector<ISD::OutputArg, 4> Outs;
- SmallVector<uint64_t, 4> Offsets;
- GetReturnInfo(I->getType(), CS.getAttributes().getRetAttributes(),
- Outs, TLI, &Offsets);
+ GetReturnInfo(I->getType(), CS.getAttributes(), Outs, TLI);
bool CanLowerReturn = TLI.CanLowerReturn(CS.getCallingConv(),
- FTy->isVarArg(), Outs, FTy->getContext());
+ *FuncInfo.MF, FTy->isVarArg(),
+ Outs, FTy->getContext());
if (!CanLowerReturn)
return false;
SmallVector<unsigned, 8> Args;
SmallVector<MVT, 8> ArgVTs;
SmallVector<ISD::ArgFlagsTy, 8> ArgFlags;
- Args.reserve(CS.arg_size());
- ArgVals.reserve(CS.arg_size());
- ArgVTs.reserve(CS.arg_size());
- ArgFlags.reserve(CS.arg_size());
+ unsigned arg_size = CS.arg_size();
+ Args.reserve(arg_size);
+ ArgVals.reserve(arg_size);
+ ArgVTs.reserve(arg_size);
+ ArgFlags.reserve(arg_size);
for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
i != e; ++i) {
+ // If we're lowering a mem intrinsic instead of a regular call, skip the
+ // last two arguments, which should not passed to the underlying functions.
+ if (MemIntName && e-i <= 2)
+ break;
Value *ArgVal = *i;
ISD::ArgFlagsTy Flags;
unsigned AttrInd = i - CS.arg_begin() + 1;
if (CS.paramHasAttr(AttrInd, Attribute::ZExt))
Flags.setZExt();
+ if (CS.paramHasAttr(AttrInd, Attribute::ByVal)) {
+ PointerType *Ty = cast<PointerType>(ArgVal->getType());
+ Type *ElementTy = Ty->getElementType();
+ unsigned FrameSize = TD.getTypeAllocSize(ElementTy);
+ unsigned FrameAlign = CS.getParamAlignment(AttrInd);
+ if (!FrameAlign)
+ FrameAlign = TLI.getByValTypeAlignment(ElementTy);
+ Flags.setByVal();
+ Flags.setByValSize(FrameSize);
+ Flags.setByValAlign(FrameAlign);
+ if (!IsMemcpySmall(FrameSize))
+ return false;
+ }
+
+ if (CS.paramHasAttr(AttrInd, Attribute::InReg))
+ Flags.setInReg();
+ if (CS.paramHasAttr(AttrInd, Attribute::Nest))
+ Flags.setNest();
+
// If this is an i1/i8/i16 argument, promote to i32 to avoid an extra
// instruction. This is safe because it is common to all fastisel supported
// calling conventions on x86.
ArgVal = ConstantExpr::getZExt(CI,Type::getInt32Ty(CI->getContext()));
}
}
-
+
unsigned ArgReg;
-
+
// Passing bools around ends up doing a trunc to i1 and passing it.
// Codegen this as an argument + "and 1".
if (ArgVal->getType()->isIntegerTy(1) && isa<TruncInst>(ArgVal) &&
ArgVal = cast<TruncInst>(ArgVal)->getOperand(0);
ArgReg = getRegForValue(ArgVal);
if (ArgReg == 0) return false;
-
+
MVT ArgVT;
if (!isTypeLegal(ArgVal->getType(), ArgVT)) return false;
-
+
ArgReg = FastEmit_ri(ArgVT, ArgVT, ISD::AND, ArgReg,
ArgVal->hasOneUse(), 1);
} else {
if (ArgReg == 0) return false;
- // FIXME: Only handle *easy* calls for now.
- if (CS.paramHasAttr(AttrInd, Attribute::InReg) ||
- CS.paramHasAttr(AttrInd, Attribute::Nest) ||
- CS.paramHasAttr(AttrInd, Attribute::ByVal))
- return false;
-
- const Type *ArgTy = ArgVal->getType();
+ Type *ArgTy = ArgVal->getType();
MVT ArgVT;
if (!isTypeLegal(ArgTy, ArgVT))
return false;
+ if (ArgVT == MVT::x86mmx)
+ return false;
unsigned OriginalAlignment = TD.getABITypeAlignment(ArgTy);
Flags.setOrigAlign(OriginalAlignment);
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CC, isVarArg, TM, ArgLocs, I->getParent()->getContext());
+ CCState CCInfo(CC, isVarArg, *FuncInfo.MF, TM, ArgLocs,
+ I->getParent()->getContext());
// Allocate shadow area for Win64
if (Subtarget->isTargetWin64())
unsigned NumBytes = CCInfo.getNextStackOffset();
// Issue CALLSEQ_START
- unsigned AdjStackDown = TM.getRegisterInfo()->getCallFrameSetupOpcode();
+ unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(AdjStackDown))
.addImm(NumBytes);
// Promote the value if needed.
switch (VA.getLocInfo()) {
- default: llvm_unreachable("Unknown loc info!");
case CCValAssign::Full: break;
case CCValAssign::SExt: {
+ assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() &&
+ "Unexpected extend");
bool Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(),
Arg, ArgVT, Arg);
assert(Emitted && "Failed to emit a sext!"); (void)Emitted;
break;
}
case CCValAssign::ZExt: {
+ assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() &&
+ "Unexpected extend");
bool Emitted = X86FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(),
Arg, ArgVT, Arg);
assert(Emitted && "Failed to emit a zext!"); (void)Emitted;
break;
}
case CCValAssign::AExt: {
- // We don't handle MMX parameters yet.
- if (VA.getLocVT().isVector() && VA.getLocVT().getSizeInBits() == 128)
- return false;
+ assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() &&
+ "Unexpected extend");
bool Emitted = X86FastEmitExtend(ISD::ANY_EXTEND, VA.getLocVT(),
Arg, ArgVT, Arg);
if (!Emitted)
ArgVT = VA.getLocVT();
break;
}
+ case CCValAssign::VExt:
+ // VExt has not been implemented, so this should be impossible to reach
+ // for now. However, fallback to Selection DAG isel once implemented.
+ return false;
+ case CCValAssign::Indirect:
+ // FIXME: Indirect doesn't need extending, but fast-isel doesn't fully
+ // support this.
+ return false;
}
if (VA.isRegLoc()) {
} else {
unsigned LocMemOffset = VA.getLocMemOffset();
X86AddressMode AM;
- AM.Base.Reg = StackPtr;
+ AM.Base.Reg = RegInfo->getStackRegister();
AM.Disp = LocMemOffset;
const Value *ArgVal = ArgVals[VA.getValNo()];
-
- // If this is a really simple value, emit this with the Value* version of
- // X86FastEmitStore. If it isn't simple, we don't want to do this, as it
- // can cause us to reevaluate the argument.
- if (isa<ConstantInt>(ArgVal) || isa<ConstantPointerNull>(ArgVal))
- X86FastEmitStore(ArgVT, ArgVal, AM);
- else
- X86FastEmitStore(ArgVT, Arg, AM);
+ ISD::ArgFlagsTy Flags = ArgFlags[VA.getValNo()];
+
+ if (Flags.isByVal()) {
+ X86AddressMode SrcAM;
+ SrcAM.Base.Reg = Arg;
+ bool Res = TryEmitSmallMemcpy(AM, SrcAM, Flags.getByValSize());
+ assert(Res && "memcpy length already checked!"); (void)Res;
+ } else if (isa<ConstantInt>(ArgVal) || isa<ConstantPointerNull>(ArgVal)) {
+ // If this is a really simple value, emit this with the Value* version
+ // of X86FastEmitStore. If it isn't simple, we don't want to do this,
+ // as it can cause us to reevaluate the argument.
+ if (!X86FastEmitStore(ArgVT, ArgVal, AM))
+ return false;
+ } else {
+ if (!X86FastEmitStore(ArgVT, Arg, AM))
+ return false;
+ }
}
}
if (Subtarget->is64Bit() && isVarArg && !Subtarget->isTargetWin64()) {
// Count the number of XMM registers allocated.
- static const unsigned XMMArgRegs[] = {
+ static const uint16_t XMMArgRegs[] = {
X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
};
if (CalleeOp) {
// Register-indirect call.
unsigned CallOpc;
- if (Subtarget->isTargetWin64())
- CallOpc = X86::WINCALL64r;
- else if (Subtarget->is64Bit())
+ if (Subtarget->is64Bit())
CallOpc = X86::CALL64r;
else
CallOpc = X86::CALL32r;
// Direct call.
assert(GV && "Not a direct call");
unsigned CallOpc;
- if (Subtarget->isTargetWin64())
- CallOpc = X86::WINCALL64pcrel32;
- else if (Subtarget->is64Bit())
+ if (Subtarget->is64Bit())
CallOpc = X86::CALL64pcrel32;
else
CallOpc = X86::CALLpcrel32;
}
- MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CallOpc))
- .addGlobalAddress(GV, 0, OpFlags);
+ MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CallOpc));
+ if (MemIntName)
+ MIB.addExternalSymbol(MemIntName, OpFlags);
+ else
+ MIB.addGlobalAddress(GV, 0, OpFlags);
}
+ // Add a register mask with the call-preserved registers.
+ // Proper defs for return values will be added by setPhysRegsDeadExcept().
+ MIB.addRegMask(TRI.getCallPreservedMask(CS.getCallingConv()));
+
// Add an implicit use GOT pointer in EBX.
if (Subtarget->isPICStyleGOT())
- MIB.addReg(X86::EBX);
+ MIB.addReg(X86::EBX, RegState::Implicit);
if (Subtarget->is64Bit() && isVarArg && !Subtarget->isTargetWin64())
- MIB.addReg(X86::AL);
+ MIB.addReg(X86::AL, RegState::Implicit);
// Add implicit physical register uses to the call.
for (unsigned i = 0, e = RegArgs.size(); i != e; ++i)
- MIB.addReg(RegArgs[i]);
+ MIB.addReg(RegArgs[i], RegState::Implicit);
// Issue CALLSEQ_END
- unsigned AdjStackUp = TM.getRegisterInfo()->getCallFrameDestroyOpcode();
- unsigned NumBytesCallee = 0;
- if (!Subtarget->is64Bit() && CS.paramHasAttr(1, Attribute::StructRet))
- NumBytesCallee = 4;
+ unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
+ const unsigned NumBytesCallee = computeBytesPoppedByCallee(*Subtarget, CS);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(AdjStackUp))
.addImm(NumBytes).addImm(NumBytesCallee);
ComputeValueVTs(TLI, I->getType(), RetTys);
for (unsigned i = 0, e = RetTys.size(); i != e; ++i) {
EVT VT = RetTys[i];
- EVT RegisterVT = TLI.getRegisterType(I->getParent()->getContext(), VT);
+ MVT RegisterVT = TLI.getRegisterType(I->getParent()->getContext(), VT);
unsigned NumRegs = TLI.getNumRegisters(I->getParent()->getContext(), VT);
for (unsigned j = 0; j != NumRegs; ++j) {
ISD::InputArg MyFlags;
- MyFlags.VT = RegisterVT.getSimpleVT();
+ MyFlags.VT = RegisterVT;
MyFlags.Used = !CS.getInstruction()->use_empty();
if (CS.paramHasAttr(0, Attribute::SExt))
MyFlags.Flags.setSExt();
// Now handle call return values.
SmallVector<unsigned, 4> UsedRegs;
SmallVector<CCValAssign, 16> RVLocs;
- CCState CCRetInfo(CC, false, TM, RVLocs, I->getParent()->getContext());
+ CCState CCRetInfo(CC, false, *FuncInfo.MF, TM, RVLocs,
+ I->getParent()->getContext());
unsigned ResultReg = FuncInfo.CreateRegs(I->getType());
CCRetInfo.AnalyzeCallResult(Ins, RetCC_X86);
for (unsigned i = 0; i != RVLocs.size(); ++i) {
// stack, but where we prefer to use the value in xmm registers, copy it
// out as F80 and use a truncate to move it from fp stack reg to xmm reg.
if ((RVLocs[i].getLocReg() == X86::ST0 ||
- RVLocs[i].getLocReg() == X86::ST1) &&
- isScalarFPTypeInSSEReg(RVLocs[0].getValVT())) {
- CopyVT = MVT::f80;
- CopyReg = createResultReg(X86::RFP80RegisterClass);
+ RVLocs[i].getLocReg() == X86::ST1)) {
+ if (isScalarFPTypeInSSEReg(RVLocs[i].getValVT())) {
+ CopyVT = MVT::f80;
+ CopyReg = createResultReg(&X86::RFP80RegClass);
+ }
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::FpPOP_RETVAL),
+ CopyReg);
+ } else {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ CopyReg).addReg(RVLocs[i].getLocReg());
+ UsedRegs.push_back(RVLocs[i].getLocReg());
}
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
- CopyReg).addReg(RVLocs[i].getLocReg());
- UsedRegs.push_back(RVLocs[i].getLocReg());
-
if (CopyVT != RVLocs[i].getValVT()) {
// Round the F80 the right size, which also moves to the appropriate xmm
// register. This is accomplished by storing the F80 value in memory and
unsigned X86FastISel::TargetMaterializeConstant(const Constant *C) {
MVT VT;
if (!isTypeLegal(C->getType(), VT))
- return false;
+ return 0;
+
+ // Can't handle alternate code models yet.
+ if (TM.getCodeModel() != CodeModel::Small)
+ return 0;
// Get opcode and regclass of the output for the given load instruction.
unsigned Opc = 0;
const TargetRegisterClass *RC = NULL;
switch (VT.SimpleTy) {
- default: return false;
+ default: return 0;
case MVT::i8:
Opc = X86::MOV8rm;
- RC = X86::GR8RegisterClass;
+ RC = &X86::GR8RegClass;
break;
case MVT::i16:
Opc = X86::MOV16rm;
- RC = X86::GR16RegisterClass;
+ RC = &X86::GR16RegClass;
break;
case MVT::i32:
Opc = X86::MOV32rm;
- RC = X86::GR32RegisterClass;
+ RC = &X86::GR32RegClass;
break;
case MVT::i64:
// Must be in x86-64 mode.
Opc = X86::MOV64rm;
- RC = X86::GR64RegisterClass;
+ RC = &X86::GR64RegClass;
break;
case MVT::f32:
- if (Subtarget->hasSSE1()) {
- Opc = X86::MOVSSrm;
- RC = X86::FR32RegisterClass;
+ if (X86ScalarSSEf32) {
+ Opc = Subtarget->hasAVX() ? X86::VMOVSSrm : X86::MOVSSrm;
+ RC = &X86::FR32RegClass;
} else {
Opc = X86::LD_Fp32m;
- RC = X86::RFP32RegisterClass;
+ RC = &X86::RFP32RegClass;
}
break;
case MVT::f64:
- if (Subtarget->hasSSE2()) {
- Opc = X86::MOVSDrm;
- RC = X86::FR64RegisterClass;
+ if (X86ScalarSSEf64) {
+ Opc = Subtarget->hasAVX() ? X86::VMOVSDrm : X86::MOVSDrm;
+ RC = &X86::FR64RegClass;
} else {
Opc = X86::LD_Fp64m;
- RC = X86::RFP64RegisterClass;
+ RC = &X86::RFP64RegClass;
}
break;
case MVT::f80:
// No f80 support yet.
- return false;
+ return 0;
}
// Materialize addresses with LEA instructions.
if (AM.BaseType == X86AddressMode::RegBase &&
AM.IndexReg == 0 && AM.Disp == 0 && AM.GV == 0)
return AM.Base.Reg;
-
+
Opc = TLI.getPointerTy() == MVT::i32 ? X86::LEA32r : X86::LEA64r;
unsigned ResultReg = createResultReg(RC);
addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
if (!X86SelectAddress(C, AM))
return 0;
unsigned Opc = Subtarget->is64Bit() ? X86::LEA64r : X86::LEA32r;
- TargetRegisterClass* RC = TLI.getRegClassFor(TLI.getPointerTy());
+ const TargetRegisterClass* RC = TLI.getRegClassFor(TLI.getPointerTy());
unsigned ResultReg = createResultReg(RC);
addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(Opc), ResultReg), AM);
unsigned X86FastISel::TargetMaterializeFloatZero(const ConstantFP *CF) {
MVT VT;
if (!isTypeLegal(CF->getType(), VT))
- return false;
+ return 0;
// Get opcode and regclass for the given zero.
unsigned Opc = 0;
const TargetRegisterClass *RC = NULL;
switch (VT.SimpleTy) {
- default: return false;
- case MVT::f32:
- if (Subtarget->hasSSE1()) {
- Opc = X86::FsFLD0SS;
- RC = X86::FR32RegisterClass;
- } else {
- Opc = X86::LD_Fp032;
- RC = X86::RFP32RegisterClass;
- }
- break;
- case MVT::f64:
- if (Subtarget->hasSSE2()) {
- Opc = X86::FsFLD0SD;
- RC = X86::FR64RegisterClass;
- } else {
- Opc = X86::LD_Fp064;
- RC = X86::RFP64RegisterClass;
- }
- break;
- case MVT::f80:
- // No f80 support yet.
- return false;
+ default: return 0;
+ case MVT::f32:
+ if (X86ScalarSSEf32) {
+ Opc = X86::FsFLD0SS;
+ RC = &X86::FR32RegClass;
+ } else {
+ Opc = X86::LD_Fp032;
+ RC = &X86::RFP32RegClass;
+ }
+ break;
+ case MVT::f64:
+ if (X86ScalarSSEf64) {
+ Opc = X86::FsFLD0SD;
+ RC = &X86::FR64RegClass;
+ } else {
+ Opc = X86::LD_Fp064;
+ RC = &X86::RFP64RegClass;
+ }
+ break;
+ case MVT::f80:
+ // No f80 support yet.
+ return 0;
}
unsigned ResultReg = createResultReg(RC);
if (!X86SelectAddress(LI->getOperand(0), AM))
return false;
- X86InstrInfo &XII = (X86InstrInfo&)TII;
+ const X86InstrInfo &XII = (const X86InstrInfo&)TII;
unsigned Size = TD.getTypeAllocSize(LI->getType());
unsigned Alignment = LI->getAlignment();
namespace llvm {
- llvm::FastISel *X86::createFastISel(FunctionLoweringInfo &funcInfo) {
- return new X86FastISel(funcInfo);
+ FastISel *X86::createFastISel(FunctionLoweringInfo &funcInfo,
+ const TargetLibraryInfo *libInfo) {
+ return new X86FastISel(funcInfo, libInfo);
}
}