SDValue V2);
static TargetLoweringObjectFile *createTLOF(X86TargetMachine &TM) {
-
+
bool is64Bit = TM.getSubtarget<X86Subtarget>().is64Bit();
-
+
if (TM.getSubtarget<X86Subtarget>().isTargetDarwin()) {
if (is64Bit) return new X8664_MachoTargetObjectFile();
return new TargetLoweringObjectFileMachO();
return new X8632_ELFTargetObjectFile(TM);
} else if (TM.getSubtarget<X86Subtarget>().isTargetCOFF()) {
return new TargetLoweringObjectFileCOFF();
- }
+ }
llvm_unreachable("unknown subtarget type");
}
setSchedulingPreference(Sched::RegPressure);
setStackPointerRegisterToSaveRestore(X86StackPtr);
+ if (Subtarget->isTargetWindows() && !Subtarget->isTargetCygMing()) {
+ // Setup Windows compiler runtime calls.
+ setLibcallName(RTLIB::SDIV_I64, "_alldiv");
+ setLibcallName(RTLIB::UDIV_I64, "_aulldiv");
+ setLibcallName(RTLIB::FPTOUINT_F64_I64, "_ftol2");
+ setLibcallName(RTLIB::FPTOUINT_F32_I64, "_ftol2");
+ setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::X86_StdCall);
+ setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::X86_StdCall);
+ setLibcallCallingConv(RTLIB::FPTOUINT_F64_I64, CallingConv::C);
+ setLibcallCallingConv(RTLIB::FPTOUINT_F32_I64, CallingConv::C);
+ }
+
if (Subtarget->isTargetDarwin()) {
// Darwin should use _setjmp/_longjmp instead of setjmp/longjmp.
setUseUnderscoreSetJmp(false);
}
// TODO: when we have SSE, these could be more efficient, by using movd/movq.
- if (!X86ScalarSSEf64) {
+ if (!X86ScalarSSEf64) {
setOperationAction(ISD::BIT_CONVERT , MVT::f32 , Expand);
setOperationAction(ISD::BIT_CONVERT , MVT::i32 , Expand);
if (Subtarget->is64Bit()) {
// We may not have a libcall for MEMBARRIER so we should lower this.
setOperationAction(ISD::MEMBARRIER , MVT::Other, Custom);
-
+
// On X86 and X86-64, atomic operations are lowered to locked instructions.
// Locked instructions, in turn, have implicit fence semantics (all memory
// operations are flushed before issuing the locked instruction, and they
setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
if (Subtarget->is64Bit())
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand);
- if (Subtarget->isTargetCygMing())
+ if (Subtarget->isTargetCygMing() || Subtarget->isTargetWindows())
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
else
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
// FIXME: In order to prevent SSE instructions being expanded to MMX ones
// with -msoft-float, disable use of MMX as well.
if (!UseSoftFloat && !DisableMMX && Subtarget->hasMMX()) {
- addRegisterClass(MVT::x86mmx, X86::VR64RegisterClass, false);
+ addRegisterClass(MVT::x86mmx, X86::VR64RegisterClass);
// No operations on x86mmx supported, everything uses intrinsics.
}
// Do not attempt to promote non-128-bit vectors
if (!VT.is128BitVector())
continue;
-
+
setOperationAction(ISD::AND, SVT, Promote);
AddPromotedToType (ISD::AND, SVT, MVT::v2i64);
setOperationAction(ISD::OR, SVT, Promote);
if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
Subtarget->isPICStyleGOT())
return MachineJumpTableInfo::EK_Custom32;
-
+
// Otherwise, use the normal jump table encoding heuristics.
return TargetLowering::getJumpTableEncoding();
}
#include "X86GenCallingConv.inc"
-bool
+bool
X86TargetLowering::CanLowerReturn(CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
LLVMContext &Context) const {
}
}
}
-
+
Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), ValToCopy, Flag);
Flag = Chain.getValue(1);
}
MachineFunction &MF = DAG.getMachineFunction();
X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
unsigned Reg = FuncInfo->getSRetReturnReg();
- assert(Reg &&
+ assert(Reg &&
"SRetReturnReg should have been set in LowerFormalArguments().");
SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg, getPointerTy());
return Ins[0].Flags.isSRet();
}
-/// CCAssignFnForNode - Selects the correct CCAssignFn for a the
-/// given CallingConvention value.
-CCAssignFn *X86TargetLowering::CCAssignFnForNode(CallingConv::ID CC) const {
- if (Subtarget->is64Bit()) {
- if (CC == CallingConv::GHC)
- return CC_X86_64_GHC;
- else if (Subtarget->isTargetWin64())
- return CC_X86_Win64_C;
- else
- return CC_X86_64_C;
- }
-
- if (CC == CallingConv::X86_FastCall)
- return CC_X86_32_FastCall;
- else if (CC == CallingConv::X86_ThisCall)
- return CC_X86_32_ThisCall;
- else if (CC == CallingConv::Fast)
- return CC_X86_32_FastCC;
- else if (CC == CallingConv::GHC)
- return CC_X86_32_GHC;
- else
- return CC_X86_32_C;
-}
-
/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
/// by "Src" to address "Dst" with size and alignment information specified by
/// the specific parameter attribute. The copy will be passed as a byval
ISD::ArgFlagsTy Flags, SelectionDAG &DAG,
DebugLoc dl) {
SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32);
-
+
return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
/*isVolatile*/false, /*AlwaysInline=*/true,
MachinePointerInfo(), MachinePointerInfo());
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
ArgLocs, *DAG.getContext());
- CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForNode(CallConv));
+ CCInfo.AnalyzeFormalArguments(Ins, CC_X86);
unsigned LastVal = ~0U;
SDValue ArgValue;
TotalNumXMMRegs = 0;
if (IsWin64) {
+ const TargetFrameInfo &TFI = *getTargetMachine().getFrameInfo();
+ // Get to the caller-allocated home save location. Add 8 to account
+ // for the return address.
+ int HomeOffset = TFI.getOffsetOfLocalArea() + 8;
FuncInfo->setRegSaveFrameIndex(
- MFI->CreateFixedObject(1, NumIntRegs * 8, false));
+ MFI->CreateFixedObject(1, NumIntRegs * 8 + HomeOffset, false));
FuncInfo->setVarArgsFrameIndex(FuncInfo->getRegSaveFrameIndex());
} else {
// For X86-64, if there are vararg parameters that are passed via
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
ArgLocs, *DAG.getContext());
- CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForNode(CallConv));
+ CCInfo.AnalyzeCallOperands(Outs, CC_X86);
// Get a count of how many bytes are to be pushed on the stack.
unsigned NumBytes = CCInfo.getNextStackOffset();
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CalleeCC, isVarArg, getTargetMachine(),
ArgLocs, *DAG.getContext());
- CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForNode(CalleeCC));
+ CCInfo.AnalyzeCallOperands(Outs, CC_X86);
if (CCInfo.getNextStackOffset()) {
MachineFunction &MF = DAG.getMachineFunction();
if (MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn())
}
}
+ // An stdcall caller is expected to clean up its arguments; the callee
+ // isn't going to do that. PR 8461.
+ if (!CCMatch && CallerCC==CallingConv::X86_StdCall)
+ return false;
+
return true;
}
static bool isPALIGNRMask(const SmallVectorImpl<int> &Mask, EVT VT,
bool hasSSSE3) {
int i, e = VT.getVectorNumElements();
-
+
// Do not handle v2i64 / v2f64 shuffles with palignr.
if (e < 4 || !hasSSSE3)
return false;
-
+
for (i = 0; i != e; ++i)
if (Mask[i] >= 0)
break;
-
+
// All undef, not a palignr.
if (i == e)
return false;
bool NeedsUnary = false;
int s = Mask[i] - i;
-
+
// Check the rest of the elements to see if they are consecutive.
for (++i; i != e; ++i) {
int m = Mask[i];
- if (m < 0)
+ if (m < 0)
continue;
-
+
Unary = Unary && (m < (int)e);
NeedsUnary = NeedsUnary || (m < s);
/// <2, 3, 2, 3>
bool X86::isMOVHLPS_v_undef_Mask(ShuffleVectorSDNode *N) {
unsigned NumElems = N->getValueType(0).getVectorNumElements();
-
+
if (NumElems != 4)
return false;
-
+
return isUndefOrEqual(N->getMaskElt(0), 2) &&
isUndefOrEqual(N->getMaskElt(1), 3) &&
isUndefOrEqual(N->getMaskElt(2), 2) &&
SDValue
X86TargetLowering::LowerAsSplatVectorLoad(SDValue SrcOp, EVT VT, DebugLoc dl,
SelectionDAG &DAG) const {
-
+
// Check if the scalar load can be widened into a vector load. And if
// the address is "base + cst" see if the cst can be "absorbed" into
// the shuffle mask.
return SDValue();
}
-/// EltsFromConsecutiveLoads - Given the initializing elements 'Elts' of a
-/// vector of type 'VT', see if the elements can be replaced by a single large
+/// EltsFromConsecutiveLoads - Given the initializing elements 'Elts' of a
+/// vector of type 'VT', see if the elements can be replaced by a single large
/// load which has the same value as a build_vector whose operands are 'elts'.
///
/// Example: <load i32 *a, load i32 *a+4, undef, undef> -> zextload a
-///
+///
/// FIXME: we'd also like to handle the case where the last elements are zero
/// rather than undef via VZEXT_LOAD, but we do not detect that case today.
/// There's even a handy isZeroNode for that purpose.
DebugLoc &DL, SelectionDAG &DAG) {
EVT EltVT = VT.getVectorElementType();
unsigned NumElems = Elts.size();
-
+
LoadSDNode *LDBase = NULL;
unsigned LastLoadedElt = -1U;
-
+
// For each element in the initializer, see if we've found a load or an undef.
- // If we don't find an initial load element, or later load elements are
+ // If we don't find an initial load element, or later load elements are
// non-consecutive, bail out.
for (unsigned i = 0; i < NumElems; ++i) {
SDValue Elt = Elts[i];
-
+
if (!Elt.getNode() ||
(Elt.getOpcode() != ISD::UNDEF && !ISD::isNON_EXTLoad(Elt.getNode())))
return SDValue();
// Check for a build vector of consecutive loads.
for (unsigned i = 0; i < NumElems; ++i)
V[i] = Op.getOperand(i);
-
+
// Check for elements which are consecutive loads.
SDValue LD = EltsFromConsecutiveLoads(VT, V, dl, DAG);
if (LD.getNode())
return LD;
-
- // For SSE 4.1, use insertps to put the high elements into the low element.
+
+ // For SSE 4.1, use insertps to put the high elements into the low element.
if (getSubtarget()->hasSSE41()) {
SDValue Result;
if (Op.getOperand(0).getOpcode() != ISD::UNDEF)
Result = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(0));
else
Result = DAG.getUNDEF(VT);
-
+
for (unsigned i = 1; i < NumElems; ++i) {
if (Op.getOperand(i).getOpcode() == ISD::UNDEF) continue;
Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Result,
}
return Result;
}
-
+
// Otherwise, expand into a number of unpckl*, start by extending each of
// our (non-undef) elements to the full vector width with the element in the
// bottom slot of the vector (which generates no code for SSE).
if (V[i+EltStride].getOpcode() == ISD::UNDEF &&
EltStride == NumElems/2)
continue;
-
+
V[i] = getUnpackl(DAG, dl, VT, V[i], V[i + EltStride]);
}
EltStride >>= 1;
// movssrr and movsdrr do not clear top bits. Try to use movd, movq
// instead.
MVT ExtVT = (OpVT == MVT::v2f64) ? MVT::i64 : MVT::i32;
- if ((ExtVT.SimpleTy != MVT::i64 || Subtarget->is64Bit()) &&
+ if ((ExtVT != MVT::i64 || Subtarget->is64Bit()) &&
SrcOp.getOpcode() == ISD::SCALAR_TO_VECTOR &&
SrcOp.getOperand(0).getOpcode() == ISD::BIT_CONVERT &&
SrcOp.getOperand(0).getOperand(0).getValueType() == ExtVT) {
SDValue
X86TargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const {
DebugLoc dl = Op.getDebugLoc();
-
+
if (Op.getValueType() == MVT::v1i64 &&
Op.getOperand(0).getValueType() == MVT::i64)
return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v1i64, Op.getOperand(0));
const EVT PtrVT, TLSModel::Model model,
bool is64Bit) {
DebugLoc dl = GA->getDebugLoc();
-
+
// Get the Thread Pointer, which is %gs:0 (32-bit) or %fs:0 (64-bit).
Value *Ptr = Constant::getNullValue(Type::getInt8PtrTy(*DAG.getContext(),
is64Bit ? 257 : 256));
- SDValue ThreadPointer = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
+ SDValue ThreadPointer = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
DAG.getIntPtrConstant(0),
MachinePointerInfo(Ptr), false, false, 0);
// emit "addl x@ntpoff,%eax" (local exec) or "addl x@indntpoff,%eax" (initial
// exec)
- SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl,
+ SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl,
GA->getValueType(0),
GA->getOffset(), OperandFlags);
SDValue Offset = DAG.getNode(WrapperKind, dl, PtrVT, TGA);
SDValue
X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
-
+
GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
const GlobalValue *GV = GA->getGlobal();
if (Subtarget->isTargetELF()) {
// TODO: implement the "local dynamic" model
// TODO: implement the "initial exec"model for pic executables
-
+
// If GV is an alias then use the aliasee for determining
// thread-localness.
if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
GV = GA->resolveAliasedGlobal(false);
-
- TLSModel::Model model
+
+ TLSModel::Model model
= getTLSModel(GV, getTargetMachine().getRelocationModel());
-
+
switch (model) {
case TLSModel::GeneralDynamic:
case TLSModel::LocalDynamic: // not implemented
if (Subtarget->is64Bit())
return LowerToTLSGeneralDynamicModel64(GA, DAG, getPointerTy());
return LowerToTLSGeneralDynamicModel32(GA, DAG, getPointerTy());
-
+
case TLSModel::InitialExec:
case TLSModel::LocalExec:
return LowerToTLSExecModel(GA, DAG, getPointerTy(), model,
unsigned char OpFlag = 0;
unsigned WrapperKind = Subtarget->isPICStyleRIPRel() ?
X86ISD::WrapperRIP : X86ISD::Wrapper;
-
+
// In PIC mode (unless we're in RIPRel PIC mode) we add an offset to the
// global base reg.
bool PIC32 = (getTargetMachine().getRelocationModel() == Reloc::PIC_) &&
OpFlag = X86II::MO_TLVP_PIC_BASE;
else
OpFlag = X86II::MO_TLVP;
- DebugLoc DL = Op.getDebugLoc();
+ DebugLoc DL = Op.getDebugLoc();
SDValue Result = DAG.getTargetGlobalAddress(GA->getGlobal(), DL,
getPointerTy(),
GA->getOffset(), OpFlag);
SDValue Offset = DAG.getNode(WrapperKind, DL, getPointerTy(), Result);
-
+
// With PIC32, the address is actually $g + Offset.
if (PIC32)
Offset = DAG.getNode(ISD::ADD, DL, getPointerTy(),
DAG.getNode(X86ISD::GlobalBaseReg,
DebugLoc(), getPointerTy()),
Offset);
-
+
// Lowering the machine isd will make sure everything is in the right
// location.
SDValue Args[] = { Offset };
SDValue Chain = DAG.getNode(X86ISD::TLSCALL, DL, MVT::Other, Args, 1);
-
+
// TLSCALL will be codegen'ed as call. Inform MFI that function has calls.
MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
MFI->setAdjustsStack(true);
unsigned Reg = Subtarget->is64Bit() ? X86::RAX : X86::EAX;
return DAG.getCopyFromReg(Chain, DL, Reg, getPointerTy());
}
-
+
assert(false &&
"TLS not implemented for this target.");
}
SDValue X86TargetLowering::BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain,
- SDValue StackSlot,
+ SDValue StackSlot,
SelectionDAG &DAG) const {
// Build the FILD
DebugLoc DL = Op.getDebugLoc();
Tys = DAG.getVTList(MVT::f64, MVT::Other, MVT::Flag);
else
Tys = DAG.getVTList(Op.getValueType(), MVT::Other);
-
+
unsigned ByteSize = SrcVT.getSizeInBits()/8;
-
+
int SSFI = cast<FrameIndexSDNode>(StackSlot)->getIndex();
MachineMemOperand *MMO =
DAG.getMachineFunction()
.getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI),
MachineMemOperand::MOLoad, ByteSize, ByteSize);
-
+
SDValue Ops[] = { Chain, StackSlot, DAG.getValueType(SrcVT) };
SDValue Result = DAG.getMemIntrinsicNode(useSSE ? X86ISD::FILD_FLAG :
X86ISD::FILD, DL,
DAG.getMachineFunction()
.getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI),
MachineMemOperand::MOStore, SSFISize, SSFISize);
-
+
Chain = DAG.getMemIntrinsicNode(X86ISD::FST, DL, Tys,
Ops, array_lengthof(Ops),
Op.getValueType(), MMO);
DAG.getMachineFunction()
.getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI),
MachineMemOperand::MOLoad, 8, 8);
-
+
SDVTList Tys = DAG.getVTList(MVT::f80, MVT::Other);
SDValue Ops[] = { Store, StackSlot, DAG.getValueType(MVT::i64) };
SDValue Fild = DAG.getMemIntrinsicNode(X86ISD::FILD, dl, Tys, Ops, 3,
int SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize, false);
SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
-
-
+
+
unsigned Opc;
switch (DstTy.getSimpleVT().SimpleTy) {
default: llvm_unreachable("Invalid FP_TO_SINT to lower!");
SDValue Ops[] = {
Chain, StackSlot, DAG.getValueType(TheVT)
};
-
+
MachineMemOperand *MMO =
MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI),
MachineMemOperand::MOLoad, MemSize, MemSize);
SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize, false);
StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
}
-
+
MachineMemOperand *MMO =
MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI),
MachineMemOperand::MOStore, MemSize, MemSize);
if (Cond.getOpcode() == ISD::AND &&
Cond.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY) {
ConstantSDNode *C = dyn_cast<ConstantSDNode>(Cond.getOperand(1));
- if (C && C->getAPIntValue() == 1)
+ if (C && C->getAPIntValue() == 1)
Cond = Cond.getOperand(0);
}
// We know the result of AND is compared against zero. Try to match
// it to BT.
- if (Cond.getOpcode() == ISD::AND && Cond.hasOneUse()) {
+ if (Cond.getOpcode() == ISD::AND && Cond.hasOneUse()) {
SDValue NewSetCC = LowerToBT(Cond, ISD::SETNE, dl, DAG);
if (NewSetCC.getNode()) {
CC = NewSetCC.getOperand(0);
if (Cond.getOpcode() == ISD::AND &&
Cond.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY) {
ConstantSDNode *C = dyn_cast<ConstantSDNode>(Cond.getOperand(1));
- if (C && C->getAPIntValue() == 1)
+ if (C && C->getAPIntValue() == 1)
Cond = Cond.getOperand(0);
}
// We know the result of AND is compared against zero. Try to match
// it to BT.
- if (Cond.getOpcode() == ISD::AND && Cond.hasOneUse()) {
+ if (Cond.getOpcode() == ISD::AND && Cond.hasOneUse()) {
SDValue NewSetCC = LowerToBT(Cond, ISD::SETNE, dl, DAG);
if (NewSetCC.getNode()) {
CC = NewSetCC.getOperand(0);
SDValue
X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
SelectionDAG &DAG) const {
- assert(Subtarget->isTargetCygMing() &&
- "This should be used only on Cygwin/Mingw targets");
+ assert((Subtarget->isTargetCygMing() || Subtarget->isTargetWindows()) &&
+ "This should be used only on Windows targets");
DebugLoc dl = Op.getDebugLoc();
// Get the inputs.
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
- Chain = DAG.getNode(X86ISD::MINGW_ALLOCA, dl, NodeTys, Chain, Flag);
+ Chain = DAG.getNode(X86ISD::WIN_ALLOCA, dl, NodeTys, Chain, Flag);
Flag = Chain.getValue(1);
Chain = DAG.getCopyFromReg(Chain, dl, X86StackPtr, SPTy).getValue(1);
}
SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
- // X86-64 va_list is a struct { i32, i32, i8*, i8* }.
- assert(Subtarget->is64Bit() && "This code only handles 64-bit va_arg!");
+ assert(Subtarget->is64Bit() &&
+ "LowerVAARG only handles 64-bit va_arg!");
+ assert((Subtarget->isTargetLinux() ||
+ Subtarget->isTargetDarwin()) &&
+ "Unhandled target in LowerVAARG");
+ assert(Op.getNode()->getNumOperands() == 4);
+ SDValue Chain = Op.getOperand(0);
+ SDValue SrcPtr = Op.getOperand(1);
+ const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
+ unsigned Align = Op.getConstantOperandVal(3);
+ DebugLoc dl = Op.getDebugLoc();
- report_fatal_error("VAArgInst is not yet implemented for x86-64!");
- return SDValue();
+ EVT ArgVT = Op.getNode()->getValueType(0);
+ const Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
+ uint32_t ArgSize = getTargetData()->getTypeAllocSize(ArgTy);
+ uint8_t ArgMode;
+
+ // Decide which area this value should be read from.
+ // TODO: Implement the AMD64 ABI in its entirety. This simple
+ // selection mechanism works only for the basic types.
+ if (ArgVT == MVT::f80) {
+ llvm_unreachable("va_arg for f80 not yet implemented");
+ } else if (ArgVT.isFloatingPoint() && ArgSize <= 16 /*bytes*/) {
+ ArgMode = 2; // Argument passed in XMM register. Use fp_offset.
+ } else if (ArgVT.isInteger() && ArgSize <= 32 /*bytes*/) {
+ ArgMode = 1; // Argument passed in GPR64 register(s). Use gp_offset.
+ } else {
+ llvm_unreachable("Unhandled argument type in LowerVAARG");
+ }
+
+ if (ArgMode == 2) {
+ // Sanity Check: Make sure using fp_offset makes sense.
+ assert(!UseSoftFloat &&
+ !(DAG.getMachineFunction()
+ .getFunction()->hasFnAttr(Attribute::NoImplicitFloat)) &&
+ Subtarget->hasSSE1());
+ }
+
+ // Insert VAARG_64 node into the DAG
+ // VAARG_64 returns two values: Variable Argument Address, Chain
+ SmallVector<SDValue, 11> InstOps;
+ InstOps.push_back(Chain);
+ InstOps.push_back(SrcPtr);
+ InstOps.push_back(DAG.getConstant(ArgSize, MVT::i32));
+ InstOps.push_back(DAG.getConstant(ArgMode, MVT::i8));
+ InstOps.push_back(DAG.getConstant(Align, MVT::i32));
+ SDVTList VTs = DAG.getVTList(getPointerTy(), MVT::Other);
+ SDValue VAARG = DAG.getMemIntrinsicNode(X86ISD::VAARG_64, dl,
+ VTs, &InstOps[0], InstOps.size(),
+ MVT::i64,
+ MachinePointerInfo(SV),
+ /*Align=*/0,
+ /*Volatile=*/false,
+ /*ReadMem=*/true,
+ /*WriteMem=*/true);
+ Chain = VAARG.getValue(1);
+
+ // Load the next argument and return it
+ return DAG.getLoad(ArgVT, dl,
+ Chain,
+ VAARG,
+ MachinePointerInfo(),
+ false, false, 0);
}
SDValue X86TargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr,
DAG.getIntPtrConstant(24), 8, /*isVolatile*/false,
- false,
+ false,
MachinePointerInfo(DstSV), MachinePointerInfo(SrcSV));
}
int SSFI = MF.getFrameInfo()->CreateStackObject(2, StackAlignment, false);
SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
-
+
MachineMemOperand *MMO =
MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI),
MachineMemOperand::MOStore, 2, 2);
-
+
SDValue Ops[] = { DAG.getEntryNode(), StackSlot };
SDValue Chain = DAG.getMemIntrinsicNode(X86ISD::FNSTCW16m, DL,
DAG.getVTList(MVT::Other),
Op.getOperand(1), DAG.getConstant(23, MVT::i32));
ConstantInt *CI = ConstantInt::get(*Context, APInt(32, 0x3f800000U));
-
+
std::vector<Constant*> CV(4, CI);
Constant *C = ConstantVector::get(CV);
SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
R, M, Op);
// a += a
Op = DAG.getNode(ISD::ADD, dl, VT, Op, Op);
-
+
C = ConstantVector::get(CVM2);
CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
M = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
MachinePointerInfo::getConstantPool(),
false, false, 16);
-
+
// r = pblendv(r, psllw(r & (char16)63, 2), a);
M = DAG.getNode(ISD::AND, dl, VT, R, M);
M = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
R, M, Op);
// a += a
Op = DAG.getNode(ISD::ADD, dl, VT, Op, Op);
-
+
// return pblendv(r, r+r, a);
R = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
DAG.getConstant(Intrinsic::x86_sse41_pblendvb, MVT::i32),
SDValue X86TargetLowering::LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG) const{
DebugLoc dl = Op.getDebugLoc();
-
+
if (!Subtarget->hasSSE2()) {
SDValue Chain = Op.getOperand(0);
- SDValue Zero = DAG.getConstant(0,
+ SDValue Zero = DAG.getConstant(0,
Subtarget->is64Bit() ? MVT::i64 : MVT::i32);
SDValue Ops[] = {
DAG.getRegister(X86::ESP, MVT::i32), // Base
Zero,
Chain
};
- SDNode *Res =
+ SDNode *Res =
DAG.getMachineNode(X86::OR32mrLocked, dl, MVT::Other, Ops,
array_lengthof(Ops));
return SDValue(Res, 0);
}
-
+
unsigned isDev = cast<ConstantSDNode>(Op.getOperand(5))->getZExtValue();
if (!isDev)
return DAG.getNode(X86ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
-
+
unsigned Op1 = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
unsigned Op2 = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
unsigned Op3 = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
unsigned Op4 = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
-
+
// def : Pat<(membarrier (i8 0), (i8 0), (i8 0), (i8 1), (i8 1)), (SFENCE)>;
if (!Op1 && !Op2 && !Op3 && Op4)
return DAG.getNode(X86ISD::SFENCE, dl, MVT::Other, Op.getOperand(0));
-
+
// def : Pat<(membarrier (i8 1), (i8 0), (i8 0), (i8 0), (i8 1)), (LFENCE)>;
if (Op1 && !Op2 && !Op3 && !Op4)
return DAG.getNode(X86ISD::LFENCE, dl, MVT::Other, Op.getOperand(0));
-
- // def : Pat<(membarrier (i8 imm), (i8 imm), (i8 imm), (i8 imm), (i8 1)),
+
+ // def : Pat<(membarrier (i8 imm), (i8 imm), (i8 imm), (i8 imm), (i8 1)),
// (MFENCE)>;
return DAG.getNode(X86ISD::MFENCE, dl, MVT::Other, Op.getOperand(0));
}
SelectionDAG &DAG) const {
EVT SrcVT = Op.getOperand(0).getValueType();
EVT DstVT = Op.getValueType();
- assert((Subtarget->is64Bit() && !Subtarget->hasSSE2() &&
+ assert((Subtarget->is64Bit() && !Subtarget->hasSSE2() &&
Subtarget->hasMMX() && !DisableMMX) &&
"Unexpected custom BIT_CONVERT");
- assert((DstVT == MVT::i64 ||
+ assert((DstVT == MVT::i64 ||
(DstVT.isVector() && DstVT.getSizeInBits()==64)) &&
"Unexpected custom BIT_CONVERT");
// i64 <=> MMX conversions are Legal.
N->getOperand(1),
swapInH.getValue(1) };
SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
- SDValue Result = DAG.getNode(X86ISD::LCMPXCHG8_DAG, dl, Tys, Ops, 3);
+ MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
+ SDValue Result = DAG.getMemIntrinsicNode(X86ISD::LCMPXCHG8_DAG, dl, Tys,
+ Ops, 3, T, MMO);
SDValue cpOutL = DAG.getCopyFromReg(Result.getValue(0), dl, X86::EAX,
MVT::i32, Result.getValue(1));
SDValue cpOutH = DAG.getCopyFromReg(cpOutL.getValue(1), dl, X86::EDX,
case X86ISD::PUNPCKHDQ: return "X86ISD::PUNPCKHDQ";
case X86ISD::PUNPCKHQDQ: return "X86ISD::PUNPCKHQDQ";
case X86ISD::VASTART_SAVE_XMM_REGS: return "X86ISD::VASTART_SAVE_XMM_REGS";
- case X86ISD::MINGW_ALLOCA: return "X86ISD::MINGW_ALLOCA";
+ case X86ISD::VAARG_64: return "X86ISD::VAARG_64";
+ case X86ISD::WIN_ALLOCA: return "X86ISD::WIN_ALLOCA";
}
}
return BB;
}
+MachineBasicBlock *
+X86TargetLowering::EmitVAARG64WithCustomInserter(
+ MachineInstr *MI,
+ MachineBasicBlock *MBB) const {
+ // Emit va_arg instruction on X86-64.
+
+ // Operands to this pseudo-instruction:
+ // 0 ) Output : destination address (reg)
+ // 1-5) Input : va_list address (addr, i64mem)
+ // 6 ) ArgSize : Size (in bytes) of vararg type
+ // 7 ) ArgMode : 0=overflow only, 1=use gp_offset, 2=use fp_offset
+ // 8 ) Align : Alignment of type
+ // 9 ) EFLAGS (implicit-def)
+
+ assert(MI->getNumOperands() == 10 && "VAARG_64 should have 10 operands!");
+ assert(X86::AddrNumOperands == 5 && "VAARG_64 assumes 5 address operands");
+
+ unsigned DestReg = MI->getOperand(0).getReg();
+ MachineOperand &Base = MI->getOperand(1);
+ MachineOperand &Scale = MI->getOperand(2);
+ MachineOperand &Index = MI->getOperand(3);
+ MachineOperand &Disp = MI->getOperand(4);
+ MachineOperand &Segment = MI->getOperand(5);
+ unsigned ArgSize = MI->getOperand(6).getImm();
+ unsigned ArgMode = MI->getOperand(7).getImm();
+ unsigned Align = MI->getOperand(8).getImm();
+
+ // Memory Reference
+ assert(MI->hasOneMemOperand() && "Expected VAARG_64 to have one memoperand");
+ MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin();
+ MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end();
+
+ // Machine Information
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
+ const TargetRegisterClass *AddrRegClass = getRegClassFor(MVT::i64);
+ const TargetRegisterClass *OffsetRegClass = getRegClassFor(MVT::i32);
+ DebugLoc DL = MI->getDebugLoc();
+
+ // struct va_list {
+ // i32 gp_offset
+ // i32 fp_offset
+ // i64 overflow_area (address)
+ // i64 reg_save_area (address)
+ // }
+ // sizeof(va_list) = 24
+ // alignment(va_list) = 8
+
+ unsigned TotalNumIntRegs = 6;
+ unsigned TotalNumXMMRegs = 8;
+ bool UseGPOffset = (ArgMode == 1);
+ bool UseFPOffset = (ArgMode == 2);
+ unsigned MaxOffset = TotalNumIntRegs * 8 +
+ (UseFPOffset ? TotalNumXMMRegs * 16 : 0);
+
+ /* Align ArgSize to a multiple of 8 */
+ unsigned ArgSizeA8 = (ArgSize + 7) & ~7;
+ bool NeedsAlign = (Align > 8);
+
+ MachineBasicBlock *thisMBB = MBB;
+ MachineBasicBlock *overflowMBB;
+ MachineBasicBlock *offsetMBB;
+ MachineBasicBlock *endMBB;
+
+ unsigned OffsetDestReg = 0; // Argument address computed by offsetMBB
+ unsigned OverflowDestReg = 0; // Argument address computed by overflowMBB
+ unsigned OffsetReg = 0;
+
+ if (!UseGPOffset && !UseFPOffset) {
+ // If we only pull from the overflow region, we don't create a branch.
+ // We don't need to alter control flow.
+ OffsetDestReg = 0; // unused
+ OverflowDestReg = DestReg;
+
+ offsetMBB = NULL;
+ overflowMBB = thisMBB;
+ endMBB = thisMBB;
+ } else {
+ // First emit code to check if gp_offset (or fp_offset) is below the bound.
+ // If so, pull the argument from reg_save_area. (branch to offsetMBB)
+ // If not, pull from overflow_area. (branch to overflowMBB)
+ //
+ // thisMBB
+ // | .
+ // | .
+ // offsetMBB overflowMBB
+ // | .
+ // | .
+ // endMBB
+
+ // Registers for the PHI in endMBB
+ OffsetDestReg = MRI.createVirtualRegister(AddrRegClass);
+ OverflowDestReg = MRI.createVirtualRegister(AddrRegClass);
+
+ const BasicBlock *LLVM_BB = MBB->getBasicBlock();
+ MachineFunction *MF = MBB->getParent();
+ overflowMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ offsetMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ endMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+
+ MachineFunction::iterator MBBIter = MBB;
+ ++MBBIter;
+
+ // Insert the new basic blocks
+ MF->insert(MBBIter, offsetMBB);
+ MF->insert(MBBIter, overflowMBB);
+ MF->insert(MBBIter, endMBB);
+
+ // Transfer the remainder of MBB and its successor edges to endMBB.
+ endMBB->splice(endMBB->begin(), thisMBB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ thisMBB->end());
+ endMBB->transferSuccessorsAndUpdatePHIs(thisMBB);
+
+ // Make offsetMBB and overflowMBB successors of thisMBB
+ thisMBB->addSuccessor(offsetMBB);
+ thisMBB->addSuccessor(overflowMBB);
+
+ // endMBB is a successor of both offsetMBB and overflowMBB
+ offsetMBB->addSuccessor(endMBB);
+ overflowMBB->addSuccessor(endMBB);
+
+ // Load the offset value into a register
+ OffsetReg = MRI.createVirtualRegister(OffsetRegClass);
+ BuildMI(thisMBB, DL, TII->get(X86::MOV32rm), OffsetReg)
+ .addOperand(Base)
+ .addOperand(Scale)
+ .addOperand(Index)
+ .addDisp(Disp, UseFPOffset ? 4 : 0)
+ .addOperand(Segment)
+ .setMemRefs(MMOBegin, MMOEnd);
+
+ // Check if there is enough room left to pull this argument.
+ BuildMI(thisMBB, DL, TII->get(X86::CMP32ri))
+ .addReg(OffsetReg)
+ .addImm(MaxOffset + 8 - ArgSizeA8);
+
+ // Branch to "overflowMBB" if offset >= max
+ // Fall through to "offsetMBB" otherwise
+ BuildMI(thisMBB, DL, TII->get(X86::GetCondBranchFromCond(X86::COND_AE)))
+ .addMBB(overflowMBB);
+ }
+
+ // In offsetMBB, emit code to use the reg_save_area.
+ if (offsetMBB) {
+ assert(OffsetReg != 0);
+
+ // Read the reg_save_area address.
+ unsigned RegSaveReg = MRI.createVirtualRegister(AddrRegClass);
+ BuildMI(offsetMBB, DL, TII->get(X86::MOV64rm), RegSaveReg)
+ .addOperand(Base)
+ .addOperand(Scale)
+ .addOperand(Index)
+ .addDisp(Disp, 16)
+ .addOperand(Segment)
+ .setMemRefs(MMOBegin, MMOEnd);
+
+ // Zero-extend the offset
+ unsigned OffsetReg64 = MRI.createVirtualRegister(AddrRegClass);
+ BuildMI(offsetMBB, DL, TII->get(X86::SUBREG_TO_REG), OffsetReg64)
+ .addImm(0)
+ .addReg(OffsetReg)
+ .addImm(X86::sub_32bit);
+
+ // Add the offset to the reg_save_area to get the final address.
+ BuildMI(offsetMBB, DL, TII->get(X86::ADD64rr), OffsetDestReg)
+ .addReg(OffsetReg64)
+ .addReg(RegSaveReg);
+
+ // Compute the offset for the next argument
+ unsigned NextOffsetReg = MRI.createVirtualRegister(OffsetRegClass);
+ BuildMI(offsetMBB, DL, TII->get(X86::ADD32ri), NextOffsetReg)
+ .addReg(OffsetReg)
+ .addImm(UseFPOffset ? 16 : 8);
+
+ // Store it back into the va_list.
+ BuildMI(offsetMBB, DL, TII->get(X86::MOV32mr))
+ .addOperand(Base)
+ .addOperand(Scale)
+ .addOperand(Index)
+ .addDisp(Disp, UseFPOffset ? 4 : 0)
+ .addOperand(Segment)
+ .addReg(NextOffsetReg)
+ .setMemRefs(MMOBegin, MMOEnd);
+
+ // Jump to endMBB
+ BuildMI(offsetMBB, DL, TII->get(X86::JMP_4))
+ .addMBB(endMBB);
+ }
+
+ //
+ // Emit code to use overflow area
+ //
+
+ // Load the overflow_area address into a register.
+ unsigned OverflowAddrReg = MRI.createVirtualRegister(AddrRegClass);
+ BuildMI(overflowMBB, DL, TII->get(X86::MOV64rm), OverflowAddrReg)
+ .addOperand(Base)
+ .addOperand(Scale)
+ .addOperand(Index)
+ .addDisp(Disp, 8)
+ .addOperand(Segment)
+ .setMemRefs(MMOBegin, MMOEnd);
+
+ // If we need to align it, do so. Otherwise, just copy the address
+ // to OverflowDestReg.
+ if (NeedsAlign) {
+ // Align the overflow address
+ assert((Align & (Align-1)) == 0 && "Alignment must be a power of 2");
+ unsigned TmpReg = MRI.createVirtualRegister(AddrRegClass);
+
+ // aligned_addr = (addr + (align-1)) & ~(align-1)
+ BuildMI(overflowMBB, DL, TII->get(X86::ADD64ri32), TmpReg)
+ .addReg(OverflowAddrReg)
+ .addImm(Align-1);
+
+ BuildMI(overflowMBB, DL, TII->get(X86::AND64ri32), OverflowDestReg)
+ .addReg(TmpReg)
+ .addImm(~(uint64_t)(Align-1));
+ } else {
+ BuildMI(overflowMBB, DL, TII->get(TargetOpcode::COPY), OverflowDestReg)
+ .addReg(OverflowAddrReg);
+ }
+
+ // Compute the next overflow address after this argument.
+ // (the overflow address should be kept 8-byte aligned)
+ unsigned NextAddrReg = MRI.createVirtualRegister(AddrRegClass);
+ BuildMI(overflowMBB, DL, TII->get(X86::ADD64ri32), NextAddrReg)
+ .addReg(OverflowDestReg)
+ .addImm(ArgSizeA8);
+
+ // Store the new overflow address.
+ BuildMI(overflowMBB, DL, TII->get(X86::MOV64mr))
+ .addOperand(Base)
+ .addOperand(Scale)
+ .addOperand(Index)
+ .addDisp(Disp, 8)
+ .addOperand(Segment)
+ .addReg(NextAddrReg)
+ .setMemRefs(MMOBegin, MMOEnd);
+
+ // If we branched, emit the PHI to the front of endMBB.
+ if (offsetMBB) {
+ BuildMI(*endMBB, endMBB->begin(), DL,
+ TII->get(X86::PHI), DestReg)
+ .addReg(OffsetDestReg).addMBB(offsetMBB)
+ .addReg(OverflowDestReg).addMBB(overflowMBB);
+ }
+
+ // Erase the pseudo instruction
+ MI->eraseFromParent();
+
+ return endMBB;
+}
+
MachineBasicBlock *
X86TargetLowering::EmitVAStartSaveXMMRegsWithCustomInserter(
MachineInstr *MI,
}
MachineBasicBlock *
-X86TargetLowering::EmitLoweredMingwAlloca(MachineInstr *MI,
+X86TargetLowering::EmitLoweredWinAlloca(MachineInstr *MI,
MachineBasicBlock *BB) const {
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
DebugLoc DL = MI->getDebugLoc();
// FIXME: The code should be tweaked as soon as we'll try to do codegen for
// mingw-w64.
+ const char *StackProbeSymbol =
+ Subtarget->isTargetWindows() ? "_chkstk" : "_alloca";
+
BuildMI(*BB, MI, DL, TII->get(X86::CALLpcrel32))
- .addExternalSymbol("_alloca")
+ .addExternalSymbol(StackProbeSymbol)
.addReg(X86::EAX, RegState::Implicit)
.addReg(X86::ESP, RegState::Implicit)
.addReg(X86::EAX, RegState::Define | RegState::Implicit)
// our load from the relocation, sticking it in either RDI (x86-64)
// or EAX and doing an indirect call. The return value will then
// be in the normal return register.
- const X86InstrInfo *TII
+ const X86InstrInfo *TII
= static_cast<const X86InstrInfo*>(getTargetMachine().getInstrInfo());
DebugLoc DL = MI->getDebugLoc();
MachineFunction *F = BB->getParent();
assert(Subtarget->isTargetDarwin() && "Darwin only instr emitted?");
assert(MI->getOperand(3).isGlobal() && "This should be a global");
-
+
if (Subtarget->is64Bit()) {
MachineInstrBuilder MIB = BuildMI(*BB, MI, DL,
TII->get(X86::MOV64rm), X86::RDI)
.addReg(X86::RIP)
.addImm(0).addReg(0)
- .addGlobalAddress(MI->getOperand(3).getGlobal(), 0,
+ .addGlobalAddress(MI->getOperand(3).getGlobal(), 0,
MI->getOperand(3).getTargetFlags())
.addReg(0);
MIB = BuildMI(*BB, MI, DL, TII->get(X86::CALL64m));
TII->get(X86::MOV32rm), X86::EAX)
.addReg(0)
.addImm(0).addReg(0)
- .addGlobalAddress(MI->getOperand(3).getGlobal(), 0,
+ .addGlobalAddress(MI->getOperand(3).getGlobal(), 0,
MI->getOperand(3).getTargetFlags())
.addReg(0);
MIB = BuildMI(*BB, MI, DL, TII->get(X86::CALL32m));
TII->get(X86::MOV32rm), X86::EAX)
.addReg(TII->getGlobalBaseReg(F))
.addImm(0).addReg(0)
- .addGlobalAddress(MI->getOperand(3).getGlobal(), 0,
+ .addGlobalAddress(MI->getOperand(3).getGlobal(), 0,
MI->getOperand(3).getTargetFlags())
.addReg(0);
MIB = BuildMI(*BB, MI, DL, TII->get(X86::CALL32m));
addDirectMem(MIB, X86::EAX);
}
-
+
MI->eraseFromParent(); // The pseudo instruction is gone now.
return BB;
}
MachineBasicBlock *BB) const {
switch (MI->getOpcode()) {
default: assert(false && "Unexpected instr type to insert");
- case X86::MINGW_ALLOCA:
- return EmitLoweredMingwAlloca(MI, BB);
+ case X86::WIN_ALLOCA:
+ return EmitLoweredWinAlloca(MI, BB);
case X86::TLSCall_32:
case X86::TLSCall_64:
return EmitLoweredTLSCall(MI, BB);
false);
case X86::VASTART_SAVE_XMM_REGS:
return EmitVAStartSaveXMMRegsWithCustomInserter(MI, BB);
+
+ case X86::VAARG_64:
+ return EmitVAARG64WithCustomInserter(MI, BB);
}
}
// SETCC_CARRY sets the dest to ~0 for true or 0 for false.
if (Op.getOpcode() == X86ISD::SETCC_CARRY)
return Op.getValueType().getScalarType().getSizeInBits();
-
+
// Fallback case.
return 1;
}
bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const {
InlineAsm *IA = cast<InlineAsm>(CI->getCalledValue());
- std::vector<InlineAsm::ConstraintInfo> Constraints = IA->ParseConstraints();
+ InlineAsm::ConstraintInfoVector Constraints = IA->ParseConstraints();
std::string AsmStr = IA->getAsmString();
X86TargetLowering::getConstraintType(const std::string &Constraint) const {
if (Constraint.size() == 1) {
switch (Constraint[0]) {
- case 'A':
- return C_Register;
- case 'f':
- case 'r':
case 'R':
- case 'l':
case 'q':
case 'Q':
- case 'x':
+ case 'f':
+ case 't':
+ case 'u':
case 'y':
+ case 'x':
case 'Y':
return C_RegisterClass;
+ case 'a':
+ case 'b':
+ case 'c':
+ case 'd':
+ case 'S':
+ case 'D':
+ case 'A':
+ return C_Register;
+ case 'I':
+ case 'J':
+ case 'K':
+ case 'L':
+ case 'M':
+ case 'N':
+ case 'G':
+ case 'C':
case 'e':
case 'Z':
return C_Other;
return TargetLowering::getConstraintType(Constraint);
}
-/// Examine constraint type and operand type and determine a weight value,
-/// where: -1 = invalid match, and 0 = so-so match to 3 = good match.
+/// Examine constraint type and operand type and determine a weight value.
/// This object must already have been set up with the operand type
/// and the current alternative constraint selected.
-int X86TargetLowering::getSingleConstraintMatchWeight(
+TargetLowering::ConstraintWeight
+ X86TargetLowering::getSingleConstraintMatchWeight(
AsmOperandInfo &info, const char *constraint) const {
- int weight = -1;
+ ConstraintWeight weight = CW_Invalid;
Value *CallOperandVal = info.CallOperandVal;
// If we don't have a value, we can't do a match,
// but allow it at the lowest weight.
if (CallOperandVal == NULL)
- return 0;
+ return CW_Default;
+ const Type *type = CallOperandVal->getType();
// Look at the constraint type.
switch (*constraint) {
default:
- return TargetLowering::getSingleConstraintMatchWeight(info, constraint);
+ weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
+ case 'R':
+ case 'q':
+ case 'Q':
+ case 'a':
+ case 'b':
+ case 'c':
+ case 'd':
+ case 'S':
+ case 'D':
+ case 'A':
+ if (CallOperandVal->getType()->isIntegerTy())
+ weight = CW_SpecificReg;
+ break;
+ case 'f':
+ case 't':
+ case 'u':
+ if (type->isFloatingPointTy())
+ weight = CW_SpecificReg;
+ break;
+ case 'y':
+ if (type->isX86_MMXTy() && !DisableMMX && Subtarget->hasMMX())
+ weight = CW_SpecificReg;
+ break;
+ case 'x':
+ case 'Y':
+ if ((type->getPrimitiveSizeInBits() == 128) && Subtarget->hasSSE1())
+ weight = CW_Register;
break;
case 'I':
if (ConstantInt *C = dyn_cast<ConstantInt>(info.CallOperandVal)) {
if (C->getZExtValue() <= 31)
- weight = 3;
+ weight = CW_Constant;
+ }
+ break;
+ case 'J':
+ if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
+ if (C->getZExtValue() <= 63)
+ weight = CW_Constant;
+ }
+ break;
+ case 'K':
+ if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
+ if ((C->getSExtValue() >= -0x80) && (C->getSExtValue() <= 0x7f))
+ weight = CW_Constant;
+ }
+ break;
+ case 'L':
+ if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
+ if ((C->getZExtValue() == 0xff) || (C->getZExtValue() == 0xffff))
+ weight = CW_Constant;
+ }
+ break;
+ case 'M':
+ if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
+ if (C->getZExtValue() <= 3)
+ weight = CW_Constant;
+ }
+ break;
+ case 'N':
+ if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
+ if (C->getZExtValue() <= 0xff)
+ weight = CW_Constant;
+ }
+ break;
+ case 'G':
+ case 'C':
+ if (dyn_cast<ConstantFP>(CallOperandVal)) {
+ weight = CW_Constant;
+ }
+ break;
+ case 'e':
+ if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
+ if ((C->getSExtValue() >= -0x80000000LL) &&
+ (C->getSExtValue() <= 0x7fffffffLL))
+ weight = CW_Constant;
+ }
+ break;
+ case 'Z':
+ if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
+ if (C->getZExtValue() <= 0xffffffff)
+ weight = CW_Constant;
}
break;
- // etc.
}
return weight;
}